]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_lock.c
xnu-792.17.14.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
CommitLineData
55e303ae 1/*
91447636 2 * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved.
55e303ae 3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
55e303ae 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28/*-
29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. Berkeley Software Design Inc's name may not be used to endorse or
40 * promote products derived from this software without specific prior
41 * written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
56 */
57
58#include <sys/cdefs.h>
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/fcntl.h>
62#include <sys/kernel.h> /* for hz */
91447636 63#include <sys/file_internal.h>
55e303ae
A
64#include <sys/malloc.h>
65#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
91447636
A
66#include <sys/kpi_mbuf.h>
67#include <sys/mount_internal.h>
68#include <sys/proc_internal.h> /* for p_start */
69#include <sys/kauth.h>
55e303ae
A
70#include <sys/resourcevar.h>
71#include <sys/socket.h>
55e303ae
A
72#include <sys/unistd.h>
73#include <sys/user.h>
91447636 74#include <sys/vnode_internal.h>
55e303ae 75
91447636 76#include <kern/thread.h>
55e303ae
A
77
78#include <machine/limits.h>
79
80#include <net/if.h>
81
82#include <nfs/rpcv2.h>
83#include <nfs/nfsproto.h>
84#include <nfs/nfs.h>
85#include <nfs/nfsmount.h>
86#include <nfs/nfsnode.h>
87#include <nfs/nfs_lock.h>
55e303ae
A
88
89#define OFF_MAX QUAD_MAX
90
e5568f75
A
91/*
92 * globals for managing the lockd fifo
93 */
91447636 94vnode_t nfslockdvnode = 0;
55e303ae 95int nfslockdwaiting = 0;
91447636 96time_t nfslockdstarttimeout = 0;
55e303ae
A
97int nfslockdfifolock = 0;
98#define NFSLOCKDFIFOLOCK_LOCKED 1
99#define NFSLOCKDFIFOLOCK_WANT 2
100
101/*
e5568f75
A
102 * pending lock request messages are kept in this queue which is
103 * kept sorted by transaction ID (xid).
104 */
105uint64_t nfs_lockxid = 0;
106LOCKD_MSG_QUEUE nfs_pendlockq;
107
108/*
109 * This structure is used to identify processes which have acquired NFS locks.
110 * Knowing which processes have ever acquired locks allows us to short-circuit
111 * unlock requests for processes that have never had an NFS file lock. Thus
112 * avoiding a costly and unnecessary lockd request.
113 */
114struct nfs_lock_pid {
115 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */
116 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */
117 int lp_valid; /* valid entry? */
118 int lp_time; /* last time seen valid */
119 pid_t lp_pid; /* The process ID. */
120 struct timeval lp_pid_start; /* Start time of process id */
121};
122
123#define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
124#define NFS_LOCK_PID_HASH(pid) \
125 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
126LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
127TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
128u_long nfs_lock_pid_hash;
129int nfs_lock_pid_lock;
130
131
132/*
133 * initialize global nfs lock state
134 */
135void
136nfs_lockinit(void)
137{
138 TAILQ_INIT(&nfs_pendlockq);
139 nfs_lock_pid_lock = 0;
140 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
141 M_TEMP, &nfs_lock_pid_hash);
142 TAILQ_INIT(&nfs_lock_pid_lru);
143}
144
145/*
146 * insert a lock request message into the pending queue
147 */
148static inline void
149nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
150{
151 LOCKD_MSG_REQUEST *mr;
152
153 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
154 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
155 /* fast path: empty queue or new largest xid */
156 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
157 return;
158 }
159 /* slow path: need to walk list to find insertion point */
160 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
161 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
162 }
163 if (mr) {
164 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
165 } else {
166 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
167 }
168}
169
170/*
171 * remove a lock request message from the pending queue
172 */
173static inline void
174nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
175{
176 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
177}
178
179/*
180 * find a pending lock request message by xid
181 *
182 * We search from the head of the list assuming that the message we're
183 * looking for is for an older request (because we have an answer to it).
184 * This assumes that lock request will be answered primarily in FIFO order.
185 * However, this may not be the case if there are blocked requests. We may
186 * want to move blocked requests to a separate queue (but that'll complicate
187 * duplicate xid checking).
188 */
189static inline LOCKD_MSG_REQUEST *
190nfs_lockdmsg_find_by_xid(uint64_t lockxid)
191{
192 LOCKD_MSG_REQUEST *mr;
193
194 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
195 if (mr->lmr_msg.lm_xid == lockxid)
196 return mr;
197 if (mr->lmr_msg.lm_xid > lockxid)
198 return NULL;
199 }
200 return mr;
201}
202
203/*
204 * Because we can't depend on nlm_granted messages containing the same
205 * cookie we sent with the original lock request, we need code test if
206 * an nlm_granted answer matches the lock request. We also need code
207 * that can find a lockd message based solely on the nlm_granted answer.
208 */
209
210/*
211 * compare lockd message to answer
212 *
213 * returns 0 on equality and 1 if different
214 */
215static inline int
216nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
217{
218 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
219 return 1;
220 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
221 return 1;
222 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
223 return 1;
224 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
225 return 1;
226 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
227 return 1;
228 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
229 return 1;
230 return 0;
231}
232
233/*
234 * find a pending lock request message based on the lock info provided
235 * in the lockd_ans/nlm_granted data. We need this because we can't
236 * depend on nlm_granted messages containing the same cookie we sent
237 * with the original lock request.
238 *
239 * We search from the head of the list assuming that the message we're
240 * looking for is for an older request (because we have an answer to it).
241 * This assumes that lock request will be answered primarily in FIFO order.
242 * However, this may not be the case if there are blocked requests. We may
243 * want to move blocked requests to a separate queue (but that'll complicate
244 * duplicate xid checking).
55e303ae 245 */
e5568f75
A
246static inline LOCKD_MSG_REQUEST *
247nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
248{
249 LOCKD_MSG_REQUEST *mr;
250
251 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
252 return NULL;
253 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
254 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
255 break;
256 }
257 return mr;
258}
259
260/*
261 * return the next unique lock request transaction ID
262 */
263static inline uint64_t
264nfs_lockxid_get(void)
265{
266 LOCKD_MSG_REQUEST *mr;
267
268 /* derive initial lock xid from system time */
269 if (!nfs_lockxid) {
270 /*
271 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
272 * due to a broken clock) because we immediately increment it
273 * and we guarantee to never use xid 0. So, nfs_lockxid should only
274 * ever be 0 the first time this function is called.
275 */
276 struct timeval tv;
277 microtime(&tv);
278 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
279 }
280
281 /* make sure we get a unique xid */
282 do {
283 /* Skip zero xid if it should ever happen. */
284 if (++nfs_lockxid == 0)
285 nfs_lockxid++;
286 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
287 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
288 /* fast path: empty queue or new largest xid */
289 break;
290 }
291 /* check if xid is already in use */
292 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
293
294 return nfs_lockxid;
295}
296
297
298/*
299 * Check the nfs_lock_pid hash table for an entry and, if requested,
300 * add the entry if it is not found.
301 *
302 * (Also, if adding, try to clean up some stale entries.)
303 */
304static int
91447636 305nfs_lock_pid_check(proc_t p, int addflag, vnode_t vp)
e5568f75
A
306{
307 struct nfs_lock_pid *lp, *lplru, *lplru_next;
91447636 308 proc_t plru;
e5568f75
A
309 int error = 0;
310 struct timeval now;
311
312 /* lock hash */
313loop:
314 if (nfs_lock_pid_lock) {
91447636 315 struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
316 while (nfs_lock_pid_lock) {
317 nfs_lock_pid_lock = -1;
318 tsleep(&nfs_lock_pid_lock, PCATCH, "nfslockpid", 0);
91447636 319 if ((error = nfs_sigintr(nmp, NULL, p)))
e5568f75
A
320 return (error);
321 }
322 goto loop;
323 }
324 nfs_lock_pid_lock = 1;
325
326 /* Search hash chain */
327 error = ENOENT;
91447636 328 lp = NFS_LOCK_PID_HASH(proc_pid(p))->lh_first;
e5568f75 329 for (; lp != NULL; lp = lp->lp_hash.le_next)
91447636 330 if (lp->lp_pid == proc_pid(p)) {
e5568f75
A
331 /* found pid... */
332 if (timevalcmp(&lp->lp_pid_start, &p->p_stats->p_start, ==)) {
333 /* ...and it's valid */
334 /* move to tail of LRU */
335 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
336 microuptime(&now);
337 lp->lp_time = now.tv_sec;
338 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
339 error = 0;
340 break;
341 }
342 /* ...but it's no longer valid */
343 /* remove from hash, invalidate, and move to lru head */
344 LIST_REMOVE(lp, lp_hash);
345 lp->lp_valid = 0;
346 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
347 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
348 lp = NULL;
349 break;
350 }
351
352 /* if we didn't find it (valid) and we've been asked to add it */
353 if ((error == ENOENT) && addflag) {
354 /* scan lru list for invalid, stale entries to reuse/free */
355 int lrucnt = 0;
356 microuptime(&now);
357 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
358 lplru_next = TAILQ_NEXT(lplru, lp_lru);
359 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
360 /*
361 * If the oldest LRU entry is relatively new, then don't
362 * bother scanning any further.
363 */
364 break;
365 }
366 /* remove entry from LRU, and check if it's still in use */
367 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
368 if (!lplru->lp_valid || !(plru = pfind(lplru->lp_pid)) ||
369 timevalcmp(&lplru->lp_pid_start, &plru->p_stats->p_start, !=)) {
370 /* no longer in use */
371 LIST_REMOVE(lplru, lp_hash);
372 if (!lp) {
373 /* we'll reuse this one */
374 lp = lplru;
375 } else {
376 /* we can free this one */
377 FREE(lplru, M_TEMP);
378 }
379 } else {
380 /* still in use */
381 lplru->lp_time = now.tv_sec;
382 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
383 }
384 /* don't check too many entries at once */
385 if (++lrucnt > 8)
386 break;
387 }
388 if (!lp) {
389 /* we need to allocate a new one */
390 MALLOC(lp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
391 M_TEMP, M_WAITOK | M_ZERO);
392 }
91447636
A
393 if (!lp) {
394 error = ENOMEM;
395 } else {
396 /* (re)initialize nfs_lock_pid info */
397 lp->lp_pid = proc_pid(p);
398 lp->lp_pid_start = p->p_stats->p_start;
399 /* insert pid in hash */
400 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
401 lp->lp_valid = 1;
402 lp->lp_time = now.tv_sec;
403 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
404 error = 0;
405 }
e5568f75
A
406 }
407
408 /* unlock hash */
409 if (nfs_lock_pid_lock < 0) {
410 nfs_lock_pid_lock = 0;
411 wakeup(&nfs_lock_pid_lock);
412 } else
413 nfs_lock_pid_lock = 0;
414
415 return (error);
416}
417
55e303ae
A
418
419/*
420 * nfs_advlock --
421 * NFS advisory byte-level locks.
422 */
423int
91447636
A
424nfs_dolock(struct vnop_advlock_args *ap)
425/* struct vnop_advlock_args {
426 struct vnodeop_desc *a_desc;
427 vnode_t a_vp;
428 caddr_t a_id;
429 int a_op;
430 struct flock *a_fl;
431 int a_flags;
432 vfs_context_t a_context;
55e303ae
A
433}; */
434{
e5568f75
A
435 LOCKD_MSG_REQUEST msgreq;
436 LOCKD_MSG *msg;
91447636 437 vnode_t vp, wvp;
55e303ae
A
438 struct nfsnode *np;
439 int error, error1;
440 struct flock *fl;
441 int fmode, ioflg;
55e303ae 442 struct nfsmount *nmp;
91447636 443 struct nfs_vattr nvattr;
55e303ae 444 off_t start, end;
e5568f75
A
445 struct timeval now;
446 int timeo, endtime, lastmsg, wentdown = 0;
447 int lockpidcheck;
91447636
A
448 kauth_cred_t cred;
449 proc_t p;
450 struct sockaddr *saddr;
55e303ae 451
91447636
A
452 p = vfs_context_proc(ap->a_context);
453 cred = vfs_context_ucred(ap->a_context);
55e303ae
A
454
455 vp = ap->a_vp;
456 fl = ap->a_fl;
457 np = VTONFS(vp);
458
91447636 459 nmp = VFSTONFS(vnode_mount(vp));
55e303ae
A
460 if (!nmp)
461 return (ENXIO);
462 if (nmp->nm_flag & NFSMNT_NOLOCKS)
91447636 463 return (ENOTSUP);
55e303ae
A
464
465 /*
466 * The NLM protocol doesn't allow the server to return an error
467 * on ranges, so we do it. Pre LFS (Large File Summit)
468 * standards required EINVAL for the range errors. More recent
469 * standards use EOVERFLOW, but their EINVAL wording still
470 * encompasses these errors.
471 * Any code sensitive to this is either:
472 * 1) written pre-LFS and so can handle only EINVAL, or
473 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
474 * implementations.
475 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
476 */
477 if (fl->l_whence != SEEK_END) {
478 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
479 fl->l_start < 0 ||
480 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
481 (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
482 return (EINVAL);
483 }
484 /*
91447636 485 * If daemon is running take a ref on its fifo vnode
55e303ae 486 */
91447636
A
487 if (!(wvp = nfslockdvnode)) {
488 if (!nfslockdwaiting && !nfslockdstarttimeout)
489 return (ENOTSUP);
55e303ae
A
490 /*
491 * Don't wake lock daemon if it hasn't been started yet and
492 * this is an unlock request (since we couldn't possibly
493 * actually have a lock on the file). This could be an
494 * uninformed unlock request due to closef()'s behavior of doing
495 * unlocks on all files if a process has had a lock on ANY file.
496 */
91447636 497 if (!nfslockdvnode && (fl->l_type == F_UNLCK))
55e303ae 498 return (EINVAL);
91447636
A
499 microuptime(&now);
500 if (nfslockdwaiting) {
501 /* wake up lock daemon */
502 nfslockdstarttimeout = now.tv_sec + 60;
503 (void)wakeup((void *)&nfslockdwaiting);
504 }
505 /* wait on nfslockdvnode for a while to allow daemon to start */
506 while (!nfslockdvnode && (now.tv_sec < nfslockdstarttimeout)) {
507 error = tsleep((void *)&nfslockdvnode, PCATCH | PUSER, "lockdstart", 2*hz);
508 if (error && (error != EWOULDBLOCK))
509 return (error);
510 /* check that we still have our mount... */
511 /* ...and that we still support locks */
512 nmp = VFSTONFS(vnode_mount(vp));
513 if (!nmp)
514 return (ENXIO);
515 if (nmp->nm_flag & NFSMNT_NOLOCKS)
516 return (ENOTSUP);
517 if (!error)
518 break;
519 microuptime(&now);
520 }
521 /*
522 * check for nfslockdvnode
523 * If it hasn't started by now, there's a problem.
524 */
525 if (!(wvp = nfslockdvnode))
526 return (ENOTSUP);
527 }
528 error = vnode_getwithref(wvp);
529 if (error)
530 return (ENOTSUP);
531 error = vnode_ref(wvp);
532 if (error) {
533 vnode_put(wvp);
534 return (ENOTSUP);
55e303ae 535 }
e5568f75 536
55e303ae 537 /*
e5568f75
A
538 * Need to check if this process has successfully acquired an NFS lock before.
539 * If not, and this is an unlock request we can simply return success here.
55e303ae 540 */
e5568f75
A
541 lockpidcheck = nfs_lock_pid_check(p, 0, vp);
542 if (lockpidcheck) {
91447636
A
543 if (lockpidcheck != ENOENT) {
544 vnode_rele(wvp);
545 vnode_put(wvp);
e5568f75 546 return (lockpidcheck);
91447636 547 }
55e303ae 548 if (ap->a_op == F_UNLCK) {
91447636
A
549 vnode_rele(wvp);
550 vnode_put(wvp);
55e303ae
A
551 return (0);
552 }
55e303ae 553 }
55e303ae
A
554
555 /*
556 * The NFS Lock Manager protocol doesn't directly handle
557 * negative lengths or SEEK_END, so we need to normalize
558 * things here where we have all the info.
559 * (Note: SEEK_CUR is already adjusted for at this point)
560 */
561 /* Convert the flock structure into a start and end. */
562 switch (fl->l_whence) {
563 case SEEK_SET:
564 case SEEK_CUR:
565 /*
566 * Caller is responsible for adding any necessary offset
567 * to fl->l_start when SEEK_CUR is used.
568 */
569 start = fl->l_start;
570 break;
571 case SEEK_END:
572 /* need to flush, and refetch attributes to make */
573 /* sure we have the correct end of file offset */
574 if (np->n_flag & NMODIFIED) {
91447636
A
575 NATTRINVALIDATE(np);
576 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
55e303ae 577 if (error) {
91447636
A
578 vnode_rele(wvp);
579 vnode_put(wvp);
55e303ae
A
580 return (error);
581 }
582 }
91447636
A
583 NATTRINVALIDATE(np);
584
585 error = nfs_getattr(vp, &nvattr, cred, p);
55e303ae 586 if (error) {
91447636
A
587 vnode_rele(wvp);
588 vnode_put(wvp);
55e303ae
A
589 return (error);
590 }
591 start = np->n_size + fl->l_start;
592 break;
593 default:
91447636
A
594 vnode_rele(wvp);
595 vnode_put(wvp);
55e303ae
A
596 return (EINVAL);
597 }
598 if (fl->l_len == 0)
599 end = -1;
600 else if (fl->l_len > 0)
601 end = start + fl->l_len - 1;
602 else { /* l_len is negative */
603 end = start - 1;
604 start += fl->l_len;
605 }
606 if (start < 0) {
91447636
A
607 vnode_rele(wvp);
608 vnode_put(wvp);
55e303ae
A
609 return (EINVAL);
610 }
e5568f75
A
611 if (!NFS_ISV3(vp) &&
612 ((start >= 0x80000000) || (end >= 0x80000000))) {
91447636
A
613 vnode_rele(wvp);
614 vnode_put(wvp);
e5568f75
A
615 return (EINVAL);
616 }
55e303ae 617
e5568f75
A
618 /*
619 * Fill in the information structure.
620 */
621 msgreq.lmr_answered = 0;
622 msgreq.lmr_errno = 0;
623 msgreq.lmr_saved_errno = 0;
624 msg = &msgreq.lmr_msg;
625 msg->lm_version = LOCKD_MSG_VERSION;
626 msg->lm_flags = 0;
627
628 msg->lm_fl = *fl;
629 msg->lm_fl.l_start = start;
55e303ae 630 if (end != -1)
e5568f75 631 msg->lm_fl.l_len = end - start + 1;
91447636 632 msg->lm_fl.l_pid = proc_pid(p);
55e303ae 633
e5568f75
A
634 if (ap->a_flags & F_WAIT)
635 msg->lm_flags |= LOCKD_MSG_BLOCK;
636 if (ap->a_op == F_GETLK)
637 msg->lm_flags |= LOCKD_MSG_TEST;
55e303ae 638
91447636 639 nmp = VFSTONFS(vnode_mount(vp));
55e303ae 640 if (!nmp) {
91447636
A
641 vnode_rele(wvp);
642 vnode_put(wvp);
55e303ae
A
643 return (ENXIO);
644 }
645
91447636
A
646 saddr = mbuf_data(nmp->nm_nam);
647 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
e5568f75
A
648 msg->lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH;
649 bcopy(VTONFS(vp)->n_fhp, msg->lm_fh, msg->lm_fh_len);
650 if (NFS_ISV3(vp))
651 msg->lm_flags |= LOCKD_MSG_NFSV3;
91447636 652 cru2x(cred, &msg->lm_cred);
55e303ae 653
e5568f75
A
654 microuptime(&now);
655 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
55e303ae
A
656
657 fmode = FFLAGS(O_WRONLY);
91447636
A
658 if ((error = VNOP_OPEN(wvp, fmode, ap->a_context))) {
659 vnode_rele(wvp);
660 vnode_put(wvp);
55e303ae
A
661 return (error);
662 }
91447636 663 vnode_lock(wvp);
55e303ae 664 ++wvp->v_writecount;
91447636 665 vnode_unlock(wvp);
55e303ae 666
e5568f75
A
667 /* allocate unique xid */
668 msg->lm_xid = nfs_lockxid_get();
669 nfs_lockdmsg_enqueue(&msgreq);
670
671 timeo = 2*hz;
55e303ae
A
672#define IO_NOMACCHECK 0;
673 ioflg = IO_UNIT | IO_NOMACCHECK;
674 for (;;) {
e5568f75 675 error = 0;
55e303ae
A
676 while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) {
677 nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT;
e5568f75
A
678 error = tsleep((void *)&nfslockdfifolock,
679 PCATCH | PUSER, "lockdfifo", 20*hz);
680 if (error)
55e303ae
A
681 break;
682 }
e5568f75
A
683 if (error)
684 break;
55e303ae
A
685 nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED;
686
e5568f75 687 error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)msg, sizeof(*msg), 0,
91447636 688 UIO_SYSSPACE32, ioflg, proc_ucred(kernproc), NULL, p);
55e303ae
A
689
690 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED;
691 if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) {
692 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT;
693 wakeup((void *)&nfslockdfifolock);
694 }
55e303ae
A
695
696 if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
697 break;
698 }
e5568f75 699
55e303ae 700 /*
e5568f75
A
701 * Always wait for an answer. Not waiting for unlocks could
702 * cause a lock to be left if the unlock request gets dropped.
55e303ae 703 */
55e303ae
A
704
705 /*
e5568f75
A
706 * Retry if it takes too long to get a response.
707 *
708 * The timeout numbers were picked out of thin air... they start
709 * at 2 and double each timeout with a max of 60 seconds.
710 *
711 * In order to maintain responsiveness, we pass a small timeout
712 * to tsleep and calculate the timeouts ourselves. This allows
713 * us to pick up on mount changes quicker.
55e303ae 714 */
e5568f75
A
715wait_for_granted:
716 error = EWOULDBLOCK;
717 microuptime(&now);
718 if ((timeo/hz) > 0)
719 endtime = now.tv_sec + timeo/hz;
720 else
721 endtime = now.tv_sec + 1;
722 while (now.tv_sec < endtime) {
723 error = tsleep((void *)&msgreq, PCATCH | PUSER, "lockd", 2*hz);
724 if (msgreq.lmr_answered) {
55e303ae 725 /*
e5568f75
A
726 * Note: it's possible to have a lock granted at
727 * essentially the same time that we get interrupted.
728 * Since the lock may be granted, we can't return an
729 * error from this request or we might not unlock the
730 * lock that's been granted.
55e303ae 731 */
e5568f75
A
732 error = 0;
733 break;
734 }
735 if (error != EWOULDBLOCK)
736 break;
737 /* check that we still have our mount... */
738 /* ...and that we still support locks */
91447636 739 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
740 if (!nmp || (nmp->nm_flag & NFSMNT_NOLOCKS))
741 break;
742 /*
743 * If the mount is hung and we've requested not to hang
744 * on remote filesystems, then bail now.
745 */
91447636 746 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
e5568f75
A
747 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
748 if (fl->l_type == F_UNLCK)
749 printf("nfs_dolock: aborting unlock request "
750 "due to timeout (noremotehang)\n");
751 error = EIO;
752 break;
753 }
754 microuptime(&now);
755 }
756 if (error) {
757 /* check that we still have our mount... */
91447636 758 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
759 if (!nmp) {
760 if (error == EWOULDBLOCK)
761 error = ENXIO;
762 break;
763 }
764 /* ...and that we still support locks */
765 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
766 if (error == EWOULDBLOCK)
91447636 767 error = ENOTSUP;
e5568f75
A
768 break;
769 }
91447636 770 if ((error == ENOTSUP) &&
e5568f75
A
771 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
772 /*
773 * We have evidence that locks work, yet lockd
91447636 774 * returned ENOTSUP. This is probably because
e5568f75
A
775 * it was unable to contact the server's lockd to
776 * send it the request.
777 *
778 * Because we know locks work, we'll consider
779 * this failure to be a timeout.
780 */
781 error = EWOULDBLOCK;
782 }
783 if (error != EWOULDBLOCK) {
784 /*
785 * We're going to bail on this request.
786 * If we were a blocked lock request, send a cancel.
787 */
788 if ((msgreq.lmr_errno == EINPROGRESS) &&
789 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
790 /* set this request up as a cancel */
791 msg->lm_flags |= LOCKD_MSG_CANCEL;
792 nfs_lockdmsg_dequeue(&msgreq);
793 msg->lm_xid = nfs_lockxid_get();
794 nfs_lockdmsg_enqueue(&msgreq);
795 msgreq.lmr_saved_errno = error;
796 msgreq.lmr_errno = 0;
797 msgreq.lmr_answered = 0;
798 /* reset timeout */
799 timeo = 2*hz;
800 /* send cancel request */
801 continue;
802 }
803 break;
804 }
805
806 /*
807 * If the mount is hung and we've requested not to hang
808 * on remote filesystems, then bail now.
809 */
91447636 810 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
e5568f75
A
811 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
812 if (fl->l_type == F_UNLCK)
813 printf("nfs_dolock: aborting unlock request "
814 "due to timeout (noremotehang)\n");
815 error = EIO;
816 break;
817 }
818 /* warn if we're not getting any response */
819 microuptime(&now);
820 if ((msgreq.lmr_errno != EINPROGRESS) &&
821 (nmp->nm_tprintf_initial_delay != 0) &&
822 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
823 lastmsg = now.tv_sec;
91447636 824 nfs_down(nmp, p, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
e5568f75
A
825 wentdown = 1;
826 }
827 if (msgreq.lmr_errno == EINPROGRESS) {
828 /*
829 * We've got a blocked lock request that we are
830 * going to retry. First, we'll want to try to
831 * send a cancel for the previous request.
832 *
833 * Clear errno so if we don't get a response
834 * to the resend we'll call nfs_down().
835 * Also reset timeout because we'll expect a
836 * quick response to the cancel/resend (even if
837 * it is NLM_BLOCKED).
838 */
839 msg->lm_flags |= LOCKD_MSG_CANCEL;
840 nfs_lockdmsg_dequeue(&msgreq);
841 msg->lm_xid = nfs_lockxid_get();
842 nfs_lockdmsg_enqueue(&msgreq);
843 msgreq.lmr_saved_errno = msgreq.lmr_errno;
844 msgreq.lmr_errno = 0;
845 msgreq.lmr_answered = 0;
846 timeo = 2*hz;
847 /* send cancel then resend request */
55e303ae
A
848 continue;
849 }
e5568f75
A
850 /*
851 * We timed out, so we will rewrite the request
852 * to the fifo, but only if it isn't already full.
853 */
854 ioflg |= IO_NDELAY;
855 timeo *= 2;
856 if (timeo > 60*hz)
857 timeo = 60*hz;
858 /* resend request */
859 continue;
860 }
55e303ae 861
91447636
A
862 /* we got a reponse, so the server's lockd is OK */
863 nfs_up(VFSTONFS(vnode_mount(vp)), p, NFSSTA_LOCKTIMEO,
864 wentdown ? "lockd alive again" : NULL);
865 wentdown = 0;
55e303ae 866
e5568f75
A
867 if (msgreq.lmr_errno == EINPROGRESS) {
868 /* got NLM_BLOCKED response */
869 /* need to wait for NLM_GRANTED */
870 timeo = 60*hz;
871 msgreq.lmr_answered = 0;
872 goto wait_for_granted;
873 }
874
875 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
876 (msgreq.lmr_saved_errno == EINPROGRESS)) {
877 /*
878 * We just got a successful reply to the
879 * cancel of the previous blocked lock request.
880 * Now, go ahead and resend the request.
881 */
882 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
883 nfs_lockdmsg_dequeue(&msgreq);
884 msg->lm_xid = nfs_lockxid_get();
885 nfs_lockdmsg_enqueue(&msgreq);
886 msgreq.lmr_saved_errno = 0;
887 msgreq.lmr_errno = 0;
888 msgreq.lmr_answered = 0;
889 timeo = 2*hz;
890 /* resend request */
891 continue;
892 }
893
894 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
895 if (msg->lm_fl.l_type != F_UNLCK) {
896 fl->l_type = msg->lm_fl.l_type;
897 fl->l_pid = msg->lm_fl.l_pid;
898 fl->l_start = msg->lm_fl.l_start;
899 fl->l_len = msg->lm_fl.l_len;
55e303ae
A
900 fl->l_whence = SEEK_SET;
901 } else {
902 fl->l_type = F_UNLCK;
903 }
904 }
e5568f75
A
905
906 /*
907 * If the blocked lock request was cancelled.
908 * Restore the error condition from when we
909 * originally bailed on the request.
910 */
911 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
912 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
913 error = msgreq.lmr_saved_errno;
914 } else
915 error = msgreq.lmr_errno;
916
917 if (!error) {
918 /* record that NFS file locking has worked on this mount */
91447636 919 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
920 if (nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK))
921 nmp->nm_state |= NFSSTA_LOCKSWORK;
922 /*
923 * If we successfully acquired a lock, make sure this pid
924 * is in the nfs_lock_pid hash table so we know we can't
925 * short-circuit unlock requests.
926 */
927 if ((lockpidcheck == ENOENT) &&
928 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW)))
929 nfs_lock_pid_check(p, 1, vp);
930
931 }
55e303ae
A
932 break;
933 }
91447636 934
e5568f75 935 nfs_lockdmsg_dequeue(&msgreq);
55e303ae 936
91447636
A
937 error1 = VNOP_CLOSE(wvp, FWRITE, ap->a_context);
938 vnode_rele(wvp);
939 vnode_put(wvp);
55e303ae
A
940 /* prefer any previous 'error' to our vn_close 'error1'. */
941 return (error != 0 ? error : error1);
942}
943
944/*
945 * nfslockdans --
946 * NFS advisory byte-level locks answer from the lock daemon.
947 */
948int
91447636 949nfslockdans(proc_t p, struct lockd_ans *ansp)
55e303ae 950{
e5568f75 951 LOCKD_MSG_REQUEST *msgreq;
55e303ae
A
952 int error;
953
91447636
A
954 /* Let root make this call. */
955 error = proc_suser(p);
956 if (error)
55e303ae
A
957 return (error);
958
959 /* the version should match, or we're out of sync */
e5568f75 960 if (ansp->la_version != LOCKD_ANS_VERSION)
55e303ae
A
961 return (EINVAL);
962
e5568f75
A
963 /* try to find the lockd message by transaction id (cookie) */
964 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
965 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
966 /*
967 * We can't depend on the granted message having our cookie,
968 * so we check the answer against the lockd message found.
969 * If no message was found or it doesn't match the answer,
970 * we look for the lockd message by the answer's lock info.
971 */
972 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
973 msgreq = nfs_lockdmsg_find_by_answer(ansp);
974 /*
975 * We need to make sure this request isn't being cancelled
976 * If it is, we don't want to accept the granted message.
977 */
978 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
979 msgreq = NULL;
55e303ae 980 }
e5568f75 981 if (!msgreq)
55e303ae 982 return (EPIPE);
55e303ae 983
e5568f75
A
984 msgreq->lmr_errno = ansp->la_errno;
985 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
986 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
987 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
988 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
989 else
990 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
991 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
992 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
993 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
994 } else {
995 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
996 }
997 }
55e303ae 998
e5568f75
A
999 msgreq->lmr_answered = 1;
1000 (void)wakeup((void *)msgreq);
55e303ae
A
1001
1002 return (0);
1003}
1004
1005/*
1006 * nfslockdfd --
1007 * NFS advisory byte-level locks: fifo file# from the lock daemon.
1008 */
1009int
91447636 1010nfslockdfd(proc_t p, int fd)
55e303ae
A
1011{
1012 int error;
91447636 1013 vnode_t vp, oldvp;
55e303ae 1014
91447636 1015 error = proc_suser(p);
55e303ae
A
1016 if (error)
1017 return (error);
1018 if (fd < 0) {
91447636 1019 vp = NULL;
55e303ae 1020 } else {
91447636 1021 error = file_vnode(fd, &vp);
55e303ae
A
1022 if (error)
1023 return (error);
91447636
A
1024 error = vnode_getwithref(vp);
1025 if (error)
1026 return (error);
1027 error = vnode_ref(vp);
1028 if (error) {
1029 vnode_put(vp);
1030 return (error);
1031 }
1032 }
1033 oldvp = nfslockdvnode;
1034 nfslockdvnode = vp;
1035 if (oldvp) {
1036 vnode_rele(oldvp);
1037 }
1038 (void)wakeup((void *)&nfslockdvnode);
1039 if (vp) {
1040 vnode_put(vp);
55e303ae 1041 }
55e303ae
A
1042 return (0);
1043}
1044
1045/*
1046 * nfslockdwait --
1047 * lock daemon waiting for lock request
1048 */
1049int
91447636 1050nfslockdwait(proc_t p)
55e303ae
A
1051{
1052 int error;
55e303ae 1053
91447636
A
1054 error = proc_suser(p);
1055 if (error)
1056 return (error);
1057 if (nfslockdwaiting || nfslockdvnode)
55e303ae 1058 return (EBUSY);
55e303ae 1059
91447636 1060 nfslockdstarttimeout = 0;
55e303ae
A
1061 nfslockdwaiting = 1;
1062 tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0);
1063 nfslockdwaiting = 0;
1064
1065 return (0);
1066}