2 * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
31 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Berkeley Software Design Inc's name may not be used to endorse or
42 * promote products derived from this software without specific prior
45 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
60 #include <sys/cdefs.h>
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/fcntl.h>
64 #include <sys/kernel.h> /* for hz */
65 #include <sys/file_internal.h>
66 #include <sys/malloc.h>
67 #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
68 #include <sys/kpi_mbuf.h>
69 #include <sys/mount_internal.h>
70 #include <sys/proc_internal.h> /* for p_start */
71 #include <sys/kauth.h>
72 #include <sys/resourcevar.h>
73 #include <sys/socket.h>
74 #include <sys/unistd.h>
76 #include <sys/vnode_internal.h>
78 #include <kern/thread.h>
80 #include <machine/limits.h>
84 #include <nfs/rpcv2.h>
85 #include <nfs/nfsproto.h>
87 #include <nfs/nfsmount.h>
88 #include <nfs/nfsnode.h>
89 #include <nfs/nfs_lock.h>
91 #define OFF_MAX QUAD_MAX
94 * globals for managing the lockd fifo
96 vnode_t nfslockdvnode
= 0;
97 int nfslockdwaiting
= 0;
98 time_t nfslockdstarttimeout
= 0;
99 int nfslockdfifolock
= 0;
100 #define NFSLOCKDFIFOLOCK_LOCKED 1
101 #define NFSLOCKDFIFOLOCK_WANT 2
104 * pending lock request messages are kept in this queue which is
105 * kept sorted by transaction ID (xid).
107 uint64_t nfs_lockxid
= 0;
108 LOCKD_MSG_QUEUE nfs_pendlockq
;
111 * This structure is used to identify processes which have acquired NFS locks.
112 * Knowing which processes have ever acquired locks allows us to short-circuit
113 * unlock requests for processes that have never had an NFS file lock. Thus
114 * avoiding a costly and unnecessary lockd request.
116 struct nfs_lock_pid
{
117 TAILQ_ENTRY(nfs_lock_pid
) lp_lru
; /* LRU list */
118 LIST_ENTRY(nfs_lock_pid
) lp_hash
; /* hash chain */
119 int lp_valid
; /* valid entry? */
120 int lp_time
; /* last time seen valid */
121 pid_t lp_pid
; /* The process ID. */
122 struct timeval lp_pid_start
; /* Start time of process id */
125 #define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
126 #define NFS_LOCK_PID_HASH(pid) \
127 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
128 LIST_HEAD(, nfs_lock_pid
) *nfs_lock_pid_hash_tbl
;
129 TAILQ_HEAD(, nfs_lock_pid
) nfs_lock_pid_lru
;
130 u_long nfs_lock_pid_hash
;
131 int nfs_lock_pid_lock
;
135 * initialize global nfs lock state
140 TAILQ_INIT(&nfs_pendlockq
);
141 nfs_lock_pid_lock
= 0;
142 nfs_lock_pid_hash_tbl
= hashinit(NFS_LOCK_PID_HASH_SIZE
,
143 M_TEMP
, &nfs_lock_pid_hash
);
144 TAILQ_INIT(&nfs_lock_pid_lru
);
148 * insert a lock request message into the pending queue
151 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST
*msgreq
)
153 LOCKD_MSG_REQUEST
*mr
;
155 mr
= TAILQ_LAST(&nfs_pendlockq
, nfs_lock_msg_queue
);
156 if (!mr
|| (msgreq
->lmr_msg
.lm_xid
> mr
->lmr_msg
.lm_xid
)) {
157 /* fast path: empty queue or new largest xid */
158 TAILQ_INSERT_TAIL(&nfs_pendlockq
, msgreq
, lmr_next
);
161 /* slow path: need to walk list to find insertion point */
162 while (mr
&& (msgreq
->lmr_msg
.lm_xid
> mr
->lmr_msg
.lm_xid
)) {
163 mr
= TAILQ_PREV(mr
, nfs_lock_msg_queue
, lmr_next
);
166 TAILQ_INSERT_AFTER(&nfs_pendlockq
, mr
, msgreq
, lmr_next
);
168 TAILQ_INSERT_HEAD(&nfs_pendlockq
, msgreq
, lmr_next
);
173 * remove a lock request message from the pending queue
176 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST
*msgreq
)
178 TAILQ_REMOVE(&nfs_pendlockq
, msgreq
, lmr_next
);
182 * find a pending lock request message by xid
184 * We search from the head of the list assuming that the message we're
185 * looking for is for an older request (because we have an answer to it).
186 * This assumes that lock request will be answered primarily in FIFO order.
187 * However, this may not be the case if there are blocked requests. We may
188 * want to move blocked requests to a separate queue (but that'll complicate
189 * duplicate xid checking).
191 static inline LOCKD_MSG_REQUEST
*
192 nfs_lockdmsg_find_by_xid(uint64_t lockxid
)
194 LOCKD_MSG_REQUEST
*mr
;
196 TAILQ_FOREACH(mr
, &nfs_pendlockq
, lmr_next
) {
197 if (mr
->lmr_msg
.lm_xid
== lockxid
)
199 if (mr
->lmr_msg
.lm_xid
> lockxid
)
206 * Because we can't depend on nlm_granted messages containing the same
207 * cookie we sent with the original lock request, we need code test if
208 * an nlm_granted answer matches the lock request. We also need code
209 * that can find a lockd message based solely on the nlm_granted answer.
213 * compare lockd message to answer
215 * returns 0 on equality and 1 if different
218 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST
*msgreq
, struct lockd_ans
*ansp
)
220 if (!(ansp
->la_flags
& LOCKD_ANS_LOCK_INFO
))
222 if (msgreq
->lmr_msg
.lm_fl
.l_pid
!= ansp
->la_pid
)
224 if (msgreq
->lmr_msg
.lm_fl
.l_start
!= ansp
->la_start
)
226 if (msgreq
->lmr_msg
.lm_fl
.l_len
!= ansp
->la_len
)
228 if (msgreq
->lmr_msg
.lm_fh_len
!= ansp
->la_fh_len
)
230 if (bcmp(msgreq
->lmr_msg
.lm_fh
, ansp
->la_fh
, ansp
->la_fh_len
))
236 * find a pending lock request message based on the lock info provided
237 * in the lockd_ans/nlm_granted data. We need this because we can't
238 * depend on nlm_granted messages containing the same cookie we sent
239 * with the original lock request.
241 * We search from the head of the list assuming that the message we're
242 * looking for is for an older request (because we have an answer to it).
243 * This assumes that lock request will be answered primarily in FIFO order.
244 * However, this may not be the case if there are blocked requests. We may
245 * want to move blocked requests to a separate queue (but that'll complicate
246 * duplicate xid checking).
248 static inline LOCKD_MSG_REQUEST
*
249 nfs_lockdmsg_find_by_answer(struct lockd_ans
*ansp
)
251 LOCKD_MSG_REQUEST
*mr
;
253 if (!(ansp
->la_flags
& LOCKD_ANS_LOCK_INFO
))
255 TAILQ_FOREACH(mr
, &nfs_pendlockq
, lmr_next
) {
256 if (!nfs_lockdmsg_compare_to_answer(mr
, ansp
))
263 * return the next unique lock request transaction ID
265 static inline uint64_t
266 nfs_lockxid_get(void)
268 LOCKD_MSG_REQUEST
*mr
;
270 /* derive initial lock xid from system time */
273 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
274 * due to a broken clock) because we immediately increment it
275 * and we guarantee to never use xid 0. So, nfs_lockxid should only
276 * ever be 0 the first time this function is called.
280 nfs_lockxid
= (uint64_t)tv
.tv_sec
<< 12;
283 /* make sure we get a unique xid */
285 /* Skip zero xid if it should ever happen. */
286 if (++nfs_lockxid
== 0)
288 if (!(mr
= TAILQ_LAST(&nfs_pendlockq
, nfs_lock_msg_queue
)) ||
289 (mr
->lmr_msg
.lm_xid
< nfs_lockxid
)) {
290 /* fast path: empty queue or new largest xid */
293 /* check if xid is already in use */
294 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid
));
301 * Check the nfs_lock_pid hash table for an entry and, if requested,
302 * add the entry if it is not found.
304 * (Also, if adding, try to clean up some stale entries.)
307 nfs_lock_pid_check(proc_t p
, int addflag
, vnode_t vp
)
309 struct nfs_lock_pid
*lp
, *lplru
, *lplru_next
;
316 if (nfs_lock_pid_lock
) {
317 struct nfsmount
*nmp
= VFSTONFS(vnode_mount(vp
));
318 while (nfs_lock_pid_lock
) {
319 nfs_lock_pid_lock
= -1;
320 tsleep(&nfs_lock_pid_lock
, PCATCH
, "nfslockpid", 0);
321 if ((error
= nfs_sigintr(nmp
, NULL
, p
)))
326 nfs_lock_pid_lock
= 1;
328 /* Search hash chain */
330 lp
= NFS_LOCK_PID_HASH(proc_pid(p
))->lh_first
;
331 for (; lp
!= NULL
; lp
= lp
->lp_hash
.le_next
)
332 if (lp
->lp_pid
== proc_pid(p
)) {
334 if (timevalcmp(&lp
->lp_pid_start
, &p
->p_stats
->p_start
, ==)) {
335 /* ...and it's valid */
336 /* move to tail of LRU */
337 TAILQ_REMOVE(&nfs_lock_pid_lru
, lp
, lp_lru
);
339 lp
->lp_time
= now
.tv_sec
;
340 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru
, lp
, lp_lru
);
344 /* ...but it's no longer valid */
345 /* remove from hash, invalidate, and move to lru head */
346 LIST_REMOVE(lp
, lp_hash
);
348 TAILQ_REMOVE(&nfs_lock_pid_lru
, lp
, lp_lru
);
349 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru
, lp
, lp_lru
);
354 /* if we didn't find it (valid) and we've been asked to add it */
355 if ((error
== ENOENT
) && addflag
) {
356 /* scan lru list for invalid, stale entries to reuse/free */
359 for (lplru
= TAILQ_FIRST(&nfs_lock_pid_lru
); lplru
; lplru
= lplru_next
) {
360 lplru_next
= TAILQ_NEXT(lplru
, lp_lru
);
361 if (lplru
->lp_valid
&& (lplru
->lp_time
>= (now
.tv_sec
- 2))) {
363 * If the oldest LRU entry is relatively new, then don't
364 * bother scanning any further.
368 /* remove entry from LRU, and check if it's still in use */
369 TAILQ_REMOVE(&nfs_lock_pid_lru
, lplru
, lp_lru
);
370 if (!lplru
->lp_valid
|| !(plru
= pfind(lplru
->lp_pid
)) ||
371 timevalcmp(&lplru
->lp_pid_start
, &plru
->p_stats
->p_start
, !=)) {
372 /* no longer in use */
373 LIST_REMOVE(lplru
, lp_hash
);
375 /* we'll reuse this one */
378 /* we can free this one */
383 lplru
->lp_time
= now
.tv_sec
;
384 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru
, lplru
, lp_lru
);
386 /* don't check too many entries at once */
391 /* we need to allocate a new one */
392 MALLOC(lp
, struct nfs_lock_pid
*, sizeof(struct nfs_lock_pid
),
393 M_TEMP
, M_WAITOK
| M_ZERO
);
398 /* (re)initialize nfs_lock_pid info */
399 lp
->lp_pid
= proc_pid(p
);
400 lp
->lp_pid_start
= p
->p_stats
->p_start
;
401 /* insert pid in hash */
402 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp
->lp_pid
), lp
, lp_hash
);
404 lp
->lp_time
= now
.tv_sec
;
405 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru
, lp
, lp_lru
);
411 if (nfs_lock_pid_lock
< 0) {
412 nfs_lock_pid_lock
= 0;
413 wakeup(&nfs_lock_pid_lock
);
415 nfs_lock_pid_lock
= 0;
423 * NFS advisory byte-level locks.
426 nfs_dolock(struct vnop_advlock_args
*ap
)
427 /* struct vnop_advlock_args {
428 struct vnodeop_desc *a_desc;
434 vfs_context_t a_context;
437 LOCKD_MSG_REQUEST msgreq
;
444 struct nfsmount
*nmp
;
445 struct nfs_vattr nvattr
;
448 int timeo
, endtime
, lastmsg
, wentdown
= 0;
452 struct sockaddr
*saddr
;
454 p
= vfs_context_proc(ap
->a_context
);
455 cred
= vfs_context_ucred(ap
->a_context
);
461 nmp
= VFSTONFS(vnode_mount(vp
));
464 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
)
468 * The NLM protocol doesn't allow the server to return an error
469 * on ranges, so we do it. Pre LFS (Large File Summit)
470 * standards required EINVAL for the range errors. More recent
471 * standards use EOVERFLOW, but their EINVAL wording still
472 * encompasses these errors.
473 * Any code sensitive to this is either:
474 * 1) written pre-LFS and so can handle only EINVAL, or
475 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
477 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
479 if (fl
->l_whence
!= SEEK_END
) {
480 if ((fl
->l_whence
!= SEEK_CUR
&& fl
->l_whence
!= SEEK_SET
) ||
482 (fl
->l_len
> 0 && fl
->l_len
- 1 > OFF_MAX
- fl
->l_start
) ||
483 (fl
->l_len
< 0 && fl
->l_start
+ fl
->l_len
< 0))
487 * If daemon is running take a ref on its fifo vnode
489 if (!(wvp
= nfslockdvnode
)) {
490 if (!nfslockdwaiting
&& !nfslockdstarttimeout
)
493 * Don't wake lock daemon if it hasn't been started yet and
494 * this is an unlock request (since we couldn't possibly
495 * actually have a lock on the file). This could be an
496 * uninformed unlock request due to closef()'s behavior of doing
497 * unlocks on all files if a process has had a lock on ANY file.
499 if (!nfslockdvnode
&& (fl
->l_type
== F_UNLCK
))
502 if (nfslockdwaiting
) {
503 /* wake up lock daemon */
504 nfslockdstarttimeout
= now
.tv_sec
+ 60;
505 (void)wakeup((void *)&nfslockdwaiting
);
507 /* wait on nfslockdvnode for a while to allow daemon to start */
508 while (!nfslockdvnode
&& (now
.tv_sec
< nfslockdstarttimeout
)) {
509 error
= tsleep((void *)&nfslockdvnode
, PCATCH
| PUSER
, "lockdstart", 2*hz
);
510 if (error
&& (error
!= EWOULDBLOCK
))
512 /* check that we still have our mount... */
513 /* ...and that we still support locks */
514 nmp
= VFSTONFS(vnode_mount(vp
));
517 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
)
524 * check for nfslockdvnode
525 * If it hasn't started by now, there's a problem.
527 if (!(wvp
= nfslockdvnode
))
530 error
= vnode_getwithref(wvp
);
533 error
= vnode_ref(wvp
);
540 * Need to check if this process has successfully acquired an NFS lock before.
541 * If not, and this is an unlock request we can simply return success here.
543 lockpidcheck
= nfs_lock_pid_check(p
, 0, vp
);
545 if (lockpidcheck
!= ENOENT
) {
548 return (lockpidcheck
);
550 if (ap
->a_op
== F_UNLCK
) {
558 * The NFS Lock Manager protocol doesn't directly handle
559 * negative lengths or SEEK_END, so we need to normalize
560 * things here where we have all the info.
561 * (Note: SEEK_CUR is already adjusted for at this point)
563 /* Convert the flock structure into a start and end. */
564 switch (fl
->l_whence
) {
568 * Caller is responsible for adding any necessary offset
569 * to fl->l_start when SEEK_CUR is used.
574 /* need to flush, and refetch attributes to make */
575 /* sure we have the correct end of file offset */
576 if (np
->n_flag
& NMODIFIED
) {
578 error
= nfs_vinvalbuf(vp
, V_SAVE
, cred
, p
, 1);
587 error
= nfs_getattr(vp
, &nvattr
, cred
, p
);
593 start
= np
->n_size
+ fl
->l_start
;
602 else if (fl
->l_len
> 0)
603 end
= start
+ fl
->l_len
- 1;
604 else { /* l_len is negative */
614 ((start
>= 0x80000000) || (end
>= 0x80000000))) {
621 * Fill in the information structure.
623 msgreq
.lmr_answered
= 0;
624 msgreq
.lmr_errno
= 0;
625 msgreq
.lmr_saved_errno
= 0;
626 msg
= &msgreq
.lmr_msg
;
627 msg
->lm_version
= LOCKD_MSG_VERSION
;
631 msg
->lm_fl
.l_start
= start
;
633 msg
->lm_fl
.l_len
= end
- start
+ 1;
634 msg
->lm_fl
.l_pid
= proc_pid(p
);
636 if (ap
->a_flags
& F_WAIT
)
637 msg
->lm_flags
|= LOCKD_MSG_BLOCK
;
638 if (ap
->a_op
== F_GETLK
)
639 msg
->lm_flags
|= LOCKD_MSG_TEST
;
641 nmp
= VFSTONFS(vnode_mount(vp
));
648 saddr
= mbuf_data(nmp
->nm_nam
);
649 bcopy(saddr
, &msg
->lm_addr
, min(sizeof msg
->lm_addr
, saddr
->sa_len
));
650 msg
->lm_fh_len
= NFS_ISV3(vp
) ? VTONFS(vp
)->n_fhsize
: NFSX_V2FH
;
651 bcopy(VTONFS(vp
)->n_fhp
, msg
->lm_fh
, msg
->lm_fh_len
);
653 msg
->lm_flags
|= LOCKD_MSG_NFSV3
;
654 cru2x(cred
, &msg
->lm_cred
);
657 lastmsg
= now
.tv_sec
- ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
659 fmode
= FFLAGS(O_WRONLY
);
660 if ((error
= VNOP_OPEN(wvp
, fmode
, ap
->a_context
))) {
669 /* allocate unique xid */
670 msg
->lm_xid
= nfs_lockxid_get();
671 nfs_lockdmsg_enqueue(&msgreq
);
674 #define IO_NOMACCHECK 0;
675 ioflg
= IO_UNIT
| IO_NOMACCHECK
;
678 while (nfslockdfifolock
& NFSLOCKDFIFOLOCK_LOCKED
) {
679 nfslockdfifolock
|= NFSLOCKDFIFOLOCK_WANT
;
680 error
= tsleep((void *)&nfslockdfifolock
,
681 PCATCH
| PUSER
, "lockdfifo", 20*hz
);
687 nfslockdfifolock
|= NFSLOCKDFIFOLOCK_LOCKED
;
689 error
= vn_rdwr(UIO_WRITE
, wvp
, (caddr_t
)msg
, sizeof(*msg
), 0,
690 UIO_SYSSPACE32
, ioflg
, proc_ucred(kernproc
), NULL
, p
);
692 nfslockdfifolock
&= ~NFSLOCKDFIFOLOCK_LOCKED
;
693 if (nfslockdfifolock
& NFSLOCKDFIFOLOCK_WANT
) {
694 nfslockdfifolock
&= ~NFSLOCKDFIFOLOCK_WANT
;
695 wakeup((void *)&nfslockdfifolock
);
698 if (error
&& (((ioflg
& IO_NDELAY
) == 0) || error
!= EAGAIN
)) {
703 * Always wait for an answer. Not waiting for unlocks could
704 * cause a lock to be left if the unlock request gets dropped.
708 * Retry if it takes too long to get a response.
710 * The timeout numbers were picked out of thin air... they start
711 * at 2 and double each timeout with a max of 60 seconds.
713 * In order to maintain responsiveness, we pass a small timeout
714 * to tsleep and calculate the timeouts ourselves. This allows
715 * us to pick up on mount changes quicker.
721 endtime
= now
.tv_sec
+ timeo
/hz
;
723 endtime
= now
.tv_sec
+ 1;
724 while (now
.tv_sec
< endtime
) {
725 error
= tsleep((void *)&msgreq
, PCATCH
| PUSER
, "lockd", 2*hz
);
726 if (msgreq
.lmr_answered
) {
728 * Note: it's possible to have a lock granted at
729 * essentially the same time that we get interrupted.
730 * Since the lock may be granted, we can't return an
731 * error from this request or we might not unlock the
732 * lock that's been granted.
737 if (error
!= EWOULDBLOCK
)
739 /* check that we still have our mount... */
740 /* ...and that we still support locks */
741 nmp
= VFSTONFS(vnode_mount(vp
));
742 if (!nmp
|| (nmp
->nm_flag
& NFSMNT_NOLOCKS
))
745 * If the mount is hung and we've requested not to hang
746 * on remote filesystems, then bail now.
748 if ((p
!= NULL
) && ((proc_noremotehang(p
)) != 0) &&
749 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
)) != 0)) {
750 if (fl
->l_type
== F_UNLCK
)
751 printf("nfs_dolock: aborting unlock request "
752 "due to timeout (noremotehang)\n");
759 /* check that we still have our mount... */
760 nmp
= VFSTONFS(vnode_mount(vp
));
762 if (error
== EWOULDBLOCK
)
766 /* ...and that we still support locks */
767 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
) {
768 if (error
== EWOULDBLOCK
)
772 if ((error
== ENOTSUP
) &&
773 (nmp
->nm_state
& NFSSTA_LOCKSWORK
)) {
775 * We have evidence that locks work, yet lockd
776 * returned ENOTSUP. This is probably because
777 * it was unable to contact the server's lockd to
778 * send it the request.
780 * Because we know locks work, we'll consider
781 * this failure to be a timeout.
785 if (error
!= EWOULDBLOCK
) {
787 * We're going to bail on this request.
788 * If we were a blocked lock request, send a cancel.
790 if ((msgreq
.lmr_errno
== EINPROGRESS
) &&
791 !(msg
->lm_flags
& LOCKD_MSG_CANCEL
)) {
792 /* set this request up as a cancel */
793 msg
->lm_flags
|= LOCKD_MSG_CANCEL
;
794 nfs_lockdmsg_dequeue(&msgreq
);
795 msg
->lm_xid
= nfs_lockxid_get();
796 nfs_lockdmsg_enqueue(&msgreq
);
797 msgreq
.lmr_saved_errno
= error
;
798 msgreq
.lmr_errno
= 0;
799 msgreq
.lmr_answered
= 0;
802 /* send cancel request */
809 * If the mount is hung and we've requested not to hang
810 * on remote filesystems, then bail now.
812 if ((p
!= NULL
) && ((proc_noremotehang(p
)) != 0) &&
813 ((nmp
->nm_state
& (NFSSTA_TIMEO
|NFSSTA_LOCKTIMEO
)) != 0)) {
814 if (fl
->l_type
== F_UNLCK
)
815 printf("nfs_dolock: aborting unlock request "
816 "due to timeout (noremotehang)\n");
820 /* warn if we're not getting any response */
822 if ((msgreq
.lmr_errno
!= EINPROGRESS
) &&
823 (nmp
->nm_tprintf_initial_delay
!= 0) &&
824 ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
825 lastmsg
= now
.tv_sec
;
826 nfs_down(nmp
, p
, 0, NFSSTA_LOCKTIMEO
, "lockd not responding");
829 if (msgreq
.lmr_errno
== EINPROGRESS
) {
831 * We've got a blocked lock request that we are
832 * going to retry. First, we'll want to try to
833 * send a cancel for the previous request.
835 * Clear errno so if we don't get a response
836 * to the resend we'll call nfs_down().
837 * Also reset timeout because we'll expect a
838 * quick response to the cancel/resend (even if
839 * it is NLM_BLOCKED).
841 msg
->lm_flags
|= LOCKD_MSG_CANCEL
;
842 nfs_lockdmsg_dequeue(&msgreq
);
843 msg
->lm_xid
= nfs_lockxid_get();
844 nfs_lockdmsg_enqueue(&msgreq
);
845 msgreq
.lmr_saved_errno
= msgreq
.lmr_errno
;
846 msgreq
.lmr_errno
= 0;
847 msgreq
.lmr_answered
= 0;
849 /* send cancel then resend request */
853 * We timed out, so we will rewrite the request
854 * to the fifo, but only if it isn't already full.
864 /* we got a reponse, so the server's lockd is OK */
865 nfs_up(VFSTONFS(vnode_mount(vp
)), p
, NFSSTA_LOCKTIMEO
,
866 wentdown
? "lockd alive again" : NULL
);
869 if (msgreq
.lmr_errno
== EINPROGRESS
) {
870 /* got NLM_BLOCKED response */
871 /* need to wait for NLM_GRANTED */
873 msgreq
.lmr_answered
= 0;
874 goto wait_for_granted
;
877 if ((msg
->lm_flags
& LOCKD_MSG_CANCEL
) &&
878 (msgreq
.lmr_saved_errno
== EINPROGRESS
)) {
880 * We just got a successful reply to the
881 * cancel of the previous blocked lock request.
882 * Now, go ahead and resend the request.
884 msg
->lm_flags
&= ~LOCKD_MSG_CANCEL
;
885 nfs_lockdmsg_dequeue(&msgreq
);
886 msg
->lm_xid
= nfs_lockxid_get();
887 nfs_lockdmsg_enqueue(&msgreq
);
888 msgreq
.lmr_saved_errno
= 0;
889 msgreq
.lmr_errno
= 0;
890 msgreq
.lmr_answered
= 0;
896 if ((msg
->lm_flags
& LOCKD_MSG_TEST
) && msgreq
.lmr_errno
== 0) {
897 if (msg
->lm_fl
.l_type
!= F_UNLCK
) {
898 fl
->l_type
= msg
->lm_fl
.l_type
;
899 fl
->l_pid
= msg
->lm_fl
.l_pid
;
900 fl
->l_start
= msg
->lm_fl
.l_start
;
901 fl
->l_len
= msg
->lm_fl
.l_len
;
902 fl
->l_whence
= SEEK_SET
;
904 fl
->l_type
= F_UNLCK
;
909 * If the blocked lock request was cancelled.
910 * Restore the error condition from when we
911 * originally bailed on the request.
913 if (msg
->lm_flags
& LOCKD_MSG_CANCEL
) {
914 msg
->lm_flags
&= ~LOCKD_MSG_CANCEL
;
915 error
= msgreq
.lmr_saved_errno
;
917 error
= msgreq
.lmr_errno
;
920 /* record that NFS file locking has worked on this mount */
921 nmp
= VFSTONFS(vnode_mount(vp
));
922 if (nmp
&& !(nmp
->nm_state
& NFSSTA_LOCKSWORK
))
923 nmp
->nm_state
|= NFSSTA_LOCKSWORK
;
925 * If we successfully acquired a lock, make sure this pid
926 * is in the nfs_lock_pid hash table so we know we can't
927 * short-circuit unlock requests.
929 if ((lockpidcheck
== ENOENT
) &&
930 ((ap
->a_op
== F_SETLK
) || (ap
->a_op
== F_SETLKW
)))
931 nfs_lock_pid_check(p
, 1, vp
);
937 nfs_lockdmsg_dequeue(&msgreq
);
939 error1
= VNOP_CLOSE(wvp
, FWRITE
, ap
->a_context
);
942 /* prefer any previous 'error' to our vn_close 'error1'. */
943 return (error
!= 0 ? error
: error1
);
948 * NFS advisory byte-level locks answer from the lock daemon.
951 nfslockdans(proc_t p
, struct lockd_ans
*ansp
)
953 LOCKD_MSG_REQUEST
*msgreq
;
956 /* Let root make this call. */
957 error
= proc_suser(p
);
961 /* the version should match, or we're out of sync */
962 if (ansp
->la_version
!= LOCKD_ANS_VERSION
)
965 /* try to find the lockd message by transaction id (cookie) */
966 msgreq
= nfs_lockdmsg_find_by_xid(ansp
->la_xid
);
967 if (ansp
->la_flags
& LOCKD_ANS_GRANTED
) {
969 * We can't depend on the granted message having our cookie,
970 * so we check the answer against the lockd message found.
971 * If no message was found or it doesn't match the answer,
972 * we look for the lockd message by the answer's lock info.
974 if (!msgreq
|| nfs_lockdmsg_compare_to_answer(msgreq
, ansp
))
975 msgreq
= nfs_lockdmsg_find_by_answer(ansp
);
977 * We need to make sure this request isn't being cancelled
978 * If it is, we don't want to accept the granted message.
980 if (msgreq
&& (msgreq
->lmr_msg
.lm_flags
& LOCKD_MSG_CANCEL
))
986 msgreq
->lmr_errno
= ansp
->la_errno
;
987 if ((msgreq
->lmr_msg
.lm_flags
& LOCKD_MSG_TEST
) && msgreq
->lmr_errno
== 0) {
988 if (ansp
->la_flags
& LOCKD_ANS_LOCK_INFO
) {
989 if (ansp
->la_flags
& LOCKD_ANS_LOCK_EXCL
)
990 msgreq
->lmr_msg
.lm_fl
.l_type
= F_WRLCK
;
992 msgreq
->lmr_msg
.lm_fl
.l_type
= F_RDLCK
;
993 msgreq
->lmr_msg
.lm_fl
.l_pid
= ansp
->la_pid
;
994 msgreq
->lmr_msg
.lm_fl
.l_start
= ansp
->la_start
;
995 msgreq
->lmr_msg
.lm_fl
.l_len
= ansp
->la_len
;
997 msgreq
->lmr_msg
.lm_fl
.l_type
= F_UNLCK
;
1001 msgreq
->lmr_answered
= 1;
1002 (void)wakeup((void *)msgreq
);
1009 * NFS advisory byte-level locks: fifo file# from the lock daemon.
1012 nfslockdfd(proc_t p
, int fd
)
1017 error
= proc_suser(p
);
1023 error
= file_vnode(fd
, &vp
);
1026 error
= vnode_getwithref(vp
);
1029 error
= vnode_ref(vp
);
1035 oldvp
= nfslockdvnode
;
1040 (void)wakeup((void *)&nfslockdvnode
);
1049 * lock daemon waiting for lock request
1052 nfslockdwait(proc_t p
)
1056 error
= proc_suser(p
);
1059 if (nfslockdwaiting
|| nfslockdvnode
)
1062 nfslockdstarttimeout
= 0;
1063 nfslockdwaiting
= 1;
1064 tsleep((void *)&nfslockdwaiting
, PCATCH
| PUSER
, "lockd", 0);
1065 nfslockdwaiting
= 0;