2 * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. Berkeley Software Design Inc's name may not be used to endorse or
40 * promote products derived from this software without specific prior
43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
58 #include <sys/cdefs.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/fcntl.h>
62 #include <sys/kernel.h> /* for hz */
63 #include <sys/file_internal.h>
64 #include <sys/malloc.h>
65 #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
66 #include <sys/kpi_mbuf.h>
67 #include <sys/mount_internal.h>
68 #include <sys/proc_internal.h> /* for p_start */
69 #include <sys/kauth.h>
70 #include <sys/resourcevar.h>
71 #include <sys/socket.h>
72 #include <sys/unistd.h>
74 #include <sys/vnode_internal.h>
76 #include <kern/thread.h>
77 #include <kern/host.h>
79 #include <machine/limits.h>
83 #include <nfs/rpcv2.h>
84 #include <nfs/nfsproto.h>
86 #include <nfs/nfs_gss.h>
87 #include <nfs/nfsmount.h>
88 #include <nfs/nfsnode.h>
89 #include <nfs/nfs_lock.h>
91 #include <mach/host_priv.h>
92 #include <mach/mig_errors.h>
93 #include <mach/host_special_ports.h>
94 #include <lockd/lockd_mach.h>
96 extern void ipc_port_release_send(ipc_port_t
);
98 #define OFF_MAX QUAD_MAX
101 * pending lock request messages are kept in this queue which is
102 * kept sorted by transaction ID (xid).
104 static uint64_t nfs_lockxid
= 0;
105 static LOCKD_MSG_QUEUE nfs_pendlockq
;
108 * This structure is used to identify processes which have acquired NFS locks.
109 * Knowing which processes have ever acquired locks allows us to short-circuit
110 * unlock requests for processes that have never had an NFS file lock. Thus
111 * avoiding a costly and unnecessary lockd request.
113 struct nfs_lock_pid
{
114 TAILQ_ENTRY(nfs_lock_pid
) lp_lru
; /* LRU list */
115 LIST_ENTRY(nfs_lock_pid
) lp_hash
; /* hash chain */
116 int lp_valid
; /* valid entry? */
117 int lp_time
; /* last time seen valid */
118 pid_t lp_pid
; /* The process ID. */
119 struct timeval lp_pid_start
; /* Start time of process id */
122 #define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
123 #define NFS_LOCK_PID_HASH(pid) \
124 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
125 static LIST_HEAD(, nfs_lock_pid
) *nfs_lock_pid_hash_tbl
;
126 static TAILQ_HEAD(, nfs_lock_pid
) nfs_lock_pid_lru
;
127 static u_long nfs_lock_pid_hash
;
128 static uint32_t nfs_lock_pid_hash_trusted
;
130 static lck_grp_t
*nfs_lock_lck_grp
;
131 static lck_mtx_t
*nfs_lock_mutex
;
133 void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST
*);
134 void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST
*);
135 int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST
*, struct lockd_ans
*);
136 LOCKD_MSG_REQUEST
*nfs_lockdmsg_find_by_answer(struct lockd_ans
*);
137 LOCKD_MSG_REQUEST
*nfs_lockdmsg_find_by_xid(uint64_t);
138 uint64_t nfs_lockxid_get(void);
139 int nfs_lock_pid_check(proc_t
, int);
140 int nfs_lockd_send_request(LOCKD_MSG
*, int);
143 * initialize global nfs lock state
148 TAILQ_INIT(&nfs_pendlockq
);
149 nfs_lock_pid_hash_trusted
= 1;
150 nfs_lock_pid_hash_tbl
= hashinit(NFS_LOCK_PID_HASH_SIZE
,
151 M_TEMP
, &nfs_lock_pid_hash
);
152 TAILQ_INIT(&nfs_lock_pid_lru
);
154 nfs_lock_lck_grp
= lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL
);
155 nfs_lock_mutex
= lck_mtx_alloc_init(nfs_lock_lck_grp
, LCK_ATTR_NULL
);
159 * change the count of NFS mounts that may need to make lockd requests
161 * If the mount count drops to zero, then send a shutdown request to
162 * lockd if we've sent any requests to it.
165 nfs_lockd_mount_change(int i
)
167 mach_port_t lockd_port
= IPC_PORT_NULL
;
171 lck_mtx_lock(nfs_lock_mutex
);
173 nfs_lockd_mounts
+= i
;
175 /* send a shutdown request if there are no more lockd mounts */
176 send_shutdown
= ((nfs_lockd_mounts
== 0) && nfs_lockd_request_sent
);
178 nfs_lockd_request_sent
= 0;
180 lck_mtx_unlock(nfs_lock_mutex
);
186 * Let lockd know that it is no longer need for any NFS mounts
188 kr
= host_get_lockd_port(host_priv_self(), &lockd_port
);
189 if ((kr
!= KERN_SUCCESS
) || !IPC_PORT_VALID(lockd_port
)) {
190 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
191 kr
, (lockd_port
== IPC_PORT_NULL
) ? "NULL" :
192 (lockd_port
== IPC_PORT_DEAD
) ? "DEAD" : "VALID");
196 kr
= lockd_shutdown(lockd_port
);
197 if (kr
!= KERN_SUCCESS
)
198 printf("nfs_lockd_mount_change: shutdown %d\n", kr
);
200 ipc_port_release_send(lockd_port
);
204 * insert a lock request message into the pending queue
205 * (nfs_lock_mutex must be held)
208 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST
*msgreq
)
210 LOCKD_MSG_REQUEST
*mr
;
212 mr
= TAILQ_LAST(&nfs_pendlockq
, nfs_lock_msg_queue
);
213 if (!mr
|| (msgreq
->lmr_msg
.lm_xid
> mr
->lmr_msg
.lm_xid
)) {
214 /* fast path: empty queue or new largest xid */
215 TAILQ_INSERT_TAIL(&nfs_pendlockq
, msgreq
, lmr_next
);
218 /* slow path: need to walk list to find insertion point */
219 while (mr
&& (msgreq
->lmr_msg
.lm_xid
> mr
->lmr_msg
.lm_xid
)) {
220 mr
= TAILQ_PREV(mr
, nfs_lock_msg_queue
, lmr_next
);
223 TAILQ_INSERT_AFTER(&nfs_pendlockq
, mr
, msgreq
, lmr_next
);
225 TAILQ_INSERT_HEAD(&nfs_pendlockq
, msgreq
, lmr_next
);
230 * remove a lock request message from the pending queue
231 * (nfs_lock_mutex must be held)
234 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST
*msgreq
)
236 TAILQ_REMOVE(&nfs_pendlockq
, msgreq
, lmr_next
);
240 * find a pending lock request message by xid
242 * We search from the head of the list assuming that the message we're
243 * looking for is for an older request (because we have an answer to it).
244 * This assumes that lock request will be answered primarily in FIFO order.
245 * However, this may not be the case if there are blocked requests. We may
246 * want to move blocked requests to a separate queue (but that'll complicate
247 * duplicate xid checking).
249 * (nfs_lock_mutex must be held)
251 inline LOCKD_MSG_REQUEST
*
252 nfs_lockdmsg_find_by_xid(uint64_t lockxid
)
254 LOCKD_MSG_REQUEST
*mr
;
256 TAILQ_FOREACH(mr
, &nfs_pendlockq
, lmr_next
) {
257 if (mr
->lmr_msg
.lm_xid
== lockxid
)
259 if (mr
->lmr_msg
.lm_xid
> lockxid
)
266 * Because we can't depend on nlm_granted messages containing the same
267 * cookie we sent with the original lock request, we need code test if
268 * an nlm_granted answer matches the lock request. We also need code
269 * that can find a lockd message based solely on the nlm_granted answer.
273 * compare lockd message to answer
275 * returns 0 on equality and 1 if different
278 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST
*msgreq
, struct lockd_ans
*ansp
)
280 if (!(ansp
->la_flags
& LOCKD_ANS_LOCK_INFO
))
282 if (msgreq
->lmr_msg
.lm_fl
.l_pid
!= ansp
->la_pid
)
284 if (msgreq
->lmr_msg
.lm_fl
.l_start
!= ansp
->la_start
)
286 if (msgreq
->lmr_msg
.lm_fl
.l_len
!= ansp
->la_len
)
288 if (msgreq
->lmr_msg
.lm_fh_len
!= ansp
->la_fh_len
)
290 if (bcmp(msgreq
->lmr_msg
.lm_fh
, ansp
->la_fh
, ansp
->la_fh_len
))
296 * find a pending lock request message based on the lock info provided
297 * in the lockd_ans/nlm_granted data. We need this because we can't
298 * depend on nlm_granted messages containing the same cookie we sent
299 * with the original lock request.
301 * We search from the head of the list assuming that the message we're
302 * looking for is for an older request (because we have an answer to it).
303 * This assumes that lock request will be answered primarily in FIFO order.
304 * However, this may not be the case if there are blocked requests. We may
305 * want to move blocked requests to a separate queue (but that'll complicate
306 * duplicate xid checking).
308 * (nfs_lock_mutex must be held)
310 inline LOCKD_MSG_REQUEST
*
311 nfs_lockdmsg_find_by_answer(struct lockd_ans
*ansp
)
313 LOCKD_MSG_REQUEST
*mr
;
315 if (!(ansp
->la_flags
& LOCKD_ANS_LOCK_INFO
))
317 TAILQ_FOREACH(mr
, &nfs_pendlockq
, lmr_next
) {
318 if (!nfs_lockdmsg_compare_to_answer(mr
, ansp
))
325 * return the next unique lock request transaction ID
326 * (nfs_lock_mutex must be held)
329 nfs_lockxid_get(void)
331 LOCKD_MSG_REQUEST
*mr
;
333 /* derive initial lock xid from system time */
336 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
337 * due to a broken clock) because we immediately increment it
338 * and we guarantee to never use xid 0. So, nfs_lockxid should only
339 * ever be 0 the first time this function is called.
343 nfs_lockxid
= (uint64_t)tv
.tv_sec
<< 12;
346 /* make sure we get a unique xid */
348 /* Skip zero xid if it should ever happen. */
349 if (++nfs_lockxid
== 0)
351 if (!(mr
= TAILQ_LAST(&nfs_pendlockq
, nfs_lock_msg_queue
)) ||
352 (mr
->lmr_msg
.lm_xid
< nfs_lockxid
)) {
353 /* fast path: empty queue or new largest xid */
356 /* check if xid is already in use */
357 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid
));
364 * Check the nfs_lock_pid hash table for an entry and, if requested,
365 * add the entry if it is not found.
367 * (Also, if adding, try to clean up some stale entries.)
368 * (nfs_lock_mutex must be held)
371 nfs_lock_pid_check(proc_t p
, int addflag
)
373 struct nfs_lock_pid
*lp
, *lplru
, *lplru_next
, *mlp
;
374 TAILQ_HEAD(, nfs_lock_pid
) nfs_lock_pid_free
;
375 proc_t plru
= PROC_NULL
;
380 TAILQ_INIT(&nfs_lock_pid_free
);
384 /* Search hash chain */
387 lp
= NFS_LOCK_PID_HASH(pid
)->lh_first
;
388 for (; lp
!= NULL
; lp
= lp
->lp_hash
.le_next
)
389 if (lp
->lp_pid
== pid
) {
391 if (timevalcmp(&lp
->lp_pid_start
, &p
->p_start
, ==)) {
392 /* ...and it's valid */
393 /* move to tail of LRU */
394 TAILQ_REMOVE(&nfs_lock_pid_lru
, lp
, lp_lru
);
396 lp
->lp_time
= now
.tv_sec
;
397 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru
, lp
, lp_lru
);
401 /* ...but it's no longer valid */
402 /* remove from hash, invalidate, and move to lru head */
403 LIST_REMOVE(lp
, lp_hash
);
405 TAILQ_REMOVE(&nfs_lock_pid_lru
, lp
, lp_lru
);
406 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru
, lp
, lp_lru
);
411 /* if we didn't find it (valid), use any newly allocated one */
415 /* if we don't have an lp and we've been asked to add it */
416 if ((error
== ENOENT
) && addflag
&& !lp
) {
417 /* scan lru list for invalid, stale entries to reuse/free */
420 for (lplru
= TAILQ_FIRST(&nfs_lock_pid_lru
); lplru
; lplru
= lplru_next
) {
421 lplru_next
= TAILQ_NEXT(lplru
, lp_lru
);
422 if (lplru
->lp_valid
&& (lplru
->lp_time
>= (now
.tv_sec
- 2))) {
424 * If the oldest LRU entry is relatively new, then don't
425 * bother scanning any further.
429 /* remove entry from LRU, and check if it's still in use */
430 TAILQ_REMOVE(&nfs_lock_pid_lru
, lplru
, lp_lru
);
431 if (!lplru
->lp_valid
|| !(plru
= proc_find(lplru
->lp_pid
)) ||
432 timevalcmp(&lplru
->lp_pid_start
, &plru
->p_start
, !=)) {
433 if (plru
!= PROC_NULL
) {
437 /* no longer in use */
438 LIST_REMOVE(lplru
, lp_hash
);
440 /* we'll reuse this one */
443 /* queue it up for freeing */
444 TAILQ_INSERT_HEAD(&nfs_lock_pid_free
, lplru
, lp_lru
);
448 if (plru
!= PROC_NULL
) {
452 lplru
->lp_time
= now
.tv_sec
;
453 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru
, lplru
, lp_lru
);
455 /* don't check too many entries at once */
460 /* we need to allocate a new one */
461 lck_mtx_unlock(nfs_lock_mutex
);
462 MALLOC(mlp
, struct nfs_lock_pid
*, sizeof(struct nfs_lock_pid
),
463 M_TEMP
, M_WAITOK
| M_ZERO
);
464 lck_mtx_lock(nfs_lock_mutex
);
465 if (mlp
) /* make sure somebody hasn't already added this guy */
470 if ((error
== ENOENT
) && addflag
&& lp
) {
471 /* (re)initialize nfs_lock_pid info */
473 lp
->lp_pid_start
= p
->p_start
;
474 /* insert pid in hash */
475 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp
->lp_pid
), lp
, lp_hash
);
477 lp
->lp_time
= now
.tv_sec
;
478 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru
, lp
, lp_lru
);
482 if ((mlp
&& (lp
!= mlp
)) || TAILQ_FIRST(&nfs_lock_pid_free
)) {
483 lck_mtx_unlock(nfs_lock_mutex
);
484 if (mlp
&& (lp
!= mlp
)) {
485 /* we didn't need this one, so we can free it */
488 /* free up any stale entries */
489 while ((lp
= TAILQ_FIRST(&nfs_lock_pid_free
))) {
490 TAILQ_REMOVE(&nfs_lock_pid_free
, lp
, lp_lru
);
493 lck_mtx_lock(nfs_lock_mutex
);
499 #define MACH_MAX_TRIES 3
502 nfs_lockd_send_request(LOCKD_MSG
*msg
, int interruptable
)
506 mach_port_t lockd_port
= IPC_PORT_NULL
;
508 kr
= host_get_lockd_port(host_priv_self(), &lockd_port
);
509 if (kr
!= KERN_SUCCESS
|| !IPC_PORT_VALID(lockd_port
))
513 /* In the kernel all mach messaging is interruptable */
525 (uint32_t *)&msg
->lm_addr
,
526 (uint32_t *)&msg
->lm_cred
,
529 if (kr
!= KERN_SUCCESS
)
530 printf("lockd_request received %d!\n", kr
);
531 } while (!interruptable
&& kr
== MACH_SEND_INTERRUPTED
);
532 } while (kr
== MIG_SERVER_DIED
&& retries
++ < MACH_MAX_TRIES
);
534 ipc_port_release_send(lockd_port
);
536 case MACH_SEND_INTERRUPTED
:
540 * Other MACH or MIG errors we will retry. Eventually
541 * we will call nfs_down and allow the user to disable
551 * NFS advisory byte-level locks (client)
555 struct vnop_advlock_args
/* {
556 struct vnodeop_desc *a_desc;
562 vfs_context_t a_context;
567 LOCKD_MSG_REQUEST msgreq
;
572 int interruptable
, modified
;
574 struct nfsmount
*nmp
;
575 struct nfs_vattr nvattr
;
578 int timeo
, endtime
, lastmsg
, wentdown
= 0;
579 int lockpidcheck
, nfsvers
;
580 struct sockaddr
*saddr
;
584 p
= vfs_context_proc(ctx
);
592 lck_mtx_lock(&nmp
->nm_lock
);
593 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
) {
594 lck_mtx_unlock(&nmp
->nm_lock
);
597 nfsvers
= nmp
->nm_vers
;
598 lck_mtx_unlock(&nmp
->nm_lock
);
601 * The NLM protocol doesn't allow the server to return an error
602 * on ranges, so we do it. Pre LFS (Large File Summit)
603 * standards required EINVAL for the range errors. More recent
604 * standards use EOVERFLOW, but their EINVAL wording still
605 * encompasses these errors.
606 * Any code sensitive to this is either:
607 * 1) written pre-LFS and so can handle only EINVAL, or
608 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
610 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
612 if (fl
->l_whence
!= SEEK_END
) {
613 if ((fl
->l_whence
!= SEEK_CUR
&& fl
->l_whence
!= SEEK_SET
) ||
615 (fl
->l_len
> 0 && fl
->l_len
- 1 > OFF_MAX
- fl
->l_start
) ||
616 (fl
->l_len
< 0 && fl
->l_start
+ fl
->l_len
< 0))
620 lck_mtx_lock(nfs_lock_mutex
);
623 * Need to check if this process has successfully acquired an NFS lock before.
624 * If not, and this is an unlock request we can simply return success here.
626 lockpidcheck
= nfs_lock_pid_check(p
, 0);
627 lck_mtx_unlock(nfs_lock_mutex
);
629 if (lockpidcheck
!= ENOENT
)
630 return (lockpidcheck
);
631 if ((ap
->a_op
== F_UNLCK
) && nfs_lock_pid_hash_trusted
)
636 * The NFS Lock Manager protocol doesn't directly handle
637 * negative lengths or SEEK_END, so we need to normalize
638 * things here where we have all the info.
639 * (Note: SEEK_CUR is already adjusted for at this point)
641 /* Convert the flock structure into a start and end. */
642 switch (fl
->l_whence
) {
646 * Caller is responsible for adding any necessary offset
647 * to fl->l_start when SEEK_CUR is used.
652 /* need to flush, and refetch attributes to make */
653 /* sure we have the correct end of file offset */
654 if ((error
= nfs_node_lock(np
)))
656 modified
= (np
->n_flag
& NMODIFIED
);
658 if (modified
&& ((error
= nfs_vinvalbuf(vp
, V_SAVE
, ctx
, 1))))
660 if ((error
= nfs_getattr(np
, &nvattr
, ctx
, NGA_UNCACHED
)))
662 nfs_data_lock(np
, NFS_DATA_LOCK_SHARED
);
663 start
= np
->n_size
+ fl
->l_start
;
671 else if (fl
->l_len
> 0)
672 end
= start
+ fl
->l_len
- 1;
673 else { /* l_len is negative */
680 if ((nfsvers
== NFS_VER2
) &&
681 ((start
>= 0x80000000) || (end
>= 0x80000000)))
685 * Fill in the information structure.
686 * We set all values to zero with bzero to clear
687 * out any information in the sockaddr_storage
688 * and nfs_filehandle contained in msgreq so that
689 * we will not leak extraneous information out of
690 * the kernel when calling up to lockd via our mig
693 bzero(&msgreq
, sizeof(msgreq
));
694 msg
= &msgreq
.lmr_msg
;
695 msg
->lm_version
= LOCKD_MSG_VERSION
;
699 msg
->lm_fl
.l_start
= start
;
701 msg
->lm_fl
.l_len
= end
- start
+ 1;
702 msg
->lm_fl
.l_pid
= vfs_context_pid(ctx
);
704 if (ap
->a_flags
& F_WAIT
)
705 msg
->lm_flags
|= LOCKD_MSG_BLOCK
;
706 if (ap
->a_op
== F_GETLK
)
707 msg
->lm_flags
|= LOCKD_MSG_TEST
;
713 lck_mtx_lock(&nmp
->nm_lock
);
714 saddr
= mbuf_data(nmp
->nm_nam
);
715 bcopy(saddr
, &msg
->lm_addr
, min(sizeof msg
->lm_addr
, saddr
->sa_len
));
716 msg
->lm_fh_len
= (nfsvers
== NFS_VER2
) ? NFSX_V2FH
: np
->n_fhsize
;
717 bcopy(np
->n_fhp
, msg
->lm_fh
, msg
->lm_fh_len
);
718 if (nfsvers
== NFS_VER3
)
719 msg
->lm_flags
|= LOCKD_MSG_NFSV3
;
720 cru2x(vfs_context_ucred(ctx
), &msg
->lm_cred
);
723 lastmsg
= now
.tv_sec
- ((nmp
->nm_tprintf_delay
) - (nmp
->nm_tprintf_initial_delay
));
724 interruptable
= nmp
->nm_flag
& NFSMNT_INT
;
725 lck_mtx_unlock(&nmp
->nm_lock
);
727 lck_mtx_lock(nfs_lock_mutex
);
729 /* allocate unique xid */
730 msg
->lm_xid
= nfs_lockxid_get();
731 nfs_lockdmsg_enqueue(&msgreq
);
736 nfs_lockd_request_sent
= 1;
738 /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
739 lck_mtx_unlock(nfs_lock_mutex
);
740 error
= nfs_lockd_send_request(msg
, interruptable
);
741 lck_mtx_lock(nfs_lock_mutex
);
742 if (error
&& error
!= EAGAIN
)
746 * Always wait for an answer. Not waiting for unlocks could
747 * cause a lock to be left if the unlock request gets dropped.
751 * Retry if it takes too long to get a response.
753 * The timeout numbers were picked out of thin air... they start
754 * at 2 and double each timeout with a max of 60 seconds.
756 * In order to maintain responsiveness, we pass a small timeout
757 * to msleep and calculate the timeouts ourselves. This allows
758 * us to pick up on mount changes quicker.
765 endtime
= now
.tv_sec
+ timeo
;
766 while (now
.tv_sec
< endtime
) {
768 if (!msgreq
.lmr_answered
)
769 error
= msleep(&msgreq
, nfs_lock_mutex
, PCATCH
| PUSER
, "lockd", &ts
);
770 if (msgreq
.lmr_answered
) {
772 * Note: it's possible to have a lock granted at
773 * essentially the same time that we get interrupted.
774 * Since the lock may be granted, we can't return an
775 * error from this request or we might not unlock the
776 * lock that's been granted.
779 if ((msgreq
.lmr_errno
== ENOTSUP
) && nmp
&&
780 (nmp
->nm_state
& NFSSTA_LOCKSWORK
)) {
782 * We have evidence that locks work, yet lockd
783 * returned ENOTSUP. This is probably because
784 * it was unable to contact the server's lockd
785 * to send it the request.
787 * Because we know locks work, we'll consider
788 * this failure to be a timeout.
796 if (error
!= EWOULDBLOCK
)
798 /* check that we still have our mount... */
799 /* ...and that we still support locks */
801 if ((error2
= nfs_sigintr(nmp
, NULL
, vfs_context_thread(ctx
), 0))) {
803 if (fl
->l_type
== F_UNLCK
)
804 printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error
);
807 lck_mtx_lock(&nmp
->nm_lock
);
808 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
) {
809 lck_mtx_unlock(&nmp
->nm_lock
);
812 interruptable
= nmp
->nm_flag
& NFSMNT_INT
;
813 lck_mtx_unlock(&nmp
->nm_lock
);
817 /* check that we still have our mount... */
819 if ((error2
= nfs_sigintr(nmp
, NULL
, vfs_context_thread(ctx
), 0))) {
821 if (error2
!= EINTR
) {
822 if (fl
->l_type
== F_UNLCK
)
823 printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error
);
827 /* ...and that we still support locks */
828 lck_mtx_lock(&nmp
->nm_lock
);
829 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
) {
830 if (error
== EWOULDBLOCK
)
832 lck_mtx_unlock(&nmp
->nm_lock
);
835 interruptable
= nmp
->nm_flag
& NFSMNT_INT
;
836 if (error
!= EWOULDBLOCK
) {
837 lck_mtx_unlock(&nmp
->nm_lock
);
839 * We're going to bail on this request.
840 * If we were a blocked lock request, send a cancel.
842 if ((msgreq
.lmr_errno
== EINPROGRESS
) &&
843 !(msg
->lm_flags
& LOCKD_MSG_CANCEL
)) {
844 /* set this request up as a cancel */
845 msg
->lm_flags
|= LOCKD_MSG_CANCEL
;
846 nfs_lockdmsg_dequeue(&msgreq
);
847 msg
->lm_xid
= nfs_lockxid_get();
848 nfs_lockdmsg_enqueue(&msgreq
);
849 msgreq
.lmr_saved_errno
= error
;
850 msgreq
.lmr_errno
= 0;
851 msgreq
.lmr_answered
= 0;
854 /* send cancel request */
860 /* warn if we're not getting any response */
862 if ((msgreq
.lmr_errno
!= EINPROGRESS
) &&
863 !(msg
->lm_flags
& LOCKD_MSG_DENIED_GRACE
) &&
864 (nmp
->nm_tprintf_initial_delay
!= 0) &&
865 ((lastmsg
+ nmp
->nm_tprintf_delay
) < now
.tv_sec
)) {
866 lck_mtx_unlock(&nmp
->nm_lock
);
867 lastmsg
= now
.tv_sec
;
868 nfs_down(nmp
, vfs_context_thread(ctx
), 0, NFSSTA_LOCKTIMEO
, "lockd not responding");
871 lck_mtx_unlock(&nmp
->nm_lock
);
873 if (msgreq
.lmr_errno
== EINPROGRESS
) {
875 * We've got a blocked lock request that we are
876 * going to retry. First, we'll want to try to
877 * send a cancel for the previous request.
879 * Clear errno so if we don't get a response
880 * to the resend we'll call nfs_down().
881 * Also reset timeout because we'll expect a
882 * quick response to the cancel/resend (even if
883 * it is NLM_BLOCKED).
885 msg
->lm_flags
|= LOCKD_MSG_CANCEL
;
886 nfs_lockdmsg_dequeue(&msgreq
);
887 msg
->lm_xid
= nfs_lockxid_get();
888 nfs_lockdmsg_enqueue(&msgreq
);
889 msgreq
.lmr_saved_errno
= msgreq
.lmr_errno
;
890 msgreq
.lmr_errno
= 0;
891 msgreq
.lmr_answered
= 0;
893 /* send cancel then resend request */
897 if (msg
->lm_flags
& LOCKD_MSG_DENIED_GRACE
) {
899 * Time to resend a request previously denied due to a grace period.
901 msg
->lm_flags
&= ~LOCKD_MSG_DENIED_GRACE
;
902 nfs_lockdmsg_dequeue(&msgreq
);
903 msg
->lm_xid
= nfs_lockxid_get();
904 nfs_lockdmsg_enqueue(&msgreq
);
905 msgreq
.lmr_saved_errno
= 0;
906 msgreq
.lmr_errno
= 0;
907 msgreq
.lmr_answered
= 0;
914 * We timed out, so we will resend the request.
923 /* we got a reponse, so the server's lockd is OK */
924 nfs_up(VTONMP(vp
), vfs_context_thread(ctx
), NFSSTA_LOCKTIMEO
,
925 wentdown
? "lockd alive again" : NULL
);
928 if (msgreq
.lmr_answered
&& (msg
->lm_flags
& LOCKD_MSG_DENIED_GRACE
)) {
930 * The lock request was denied because the server lockd is
931 * still in its grace period. So, we need to try the
932 * request again in a little bit.
935 msgreq
.lmr_answered
= 0;
936 goto wait_for_granted
;
939 if (msgreq
.lmr_errno
== EINPROGRESS
) {
940 /* got NLM_BLOCKED response */
941 /* need to wait for NLM_GRANTED */
943 msgreq
.lmr_answered
= 0;
944 goto wait_for_granted
;
947 if ((msg
->lm_flags
& LOCKD_MSG_CANCEL
) &&
948 (msgreq
.lmr_saved_errno
== EINPROGRESS
)) {
950 * We just got a successful reply to the
951 * cancel of the previous blocked lock request.
952 * Now, go ahead and resend the request.
954 msg
->lm_flags
&= ~LOCKD_MSG_CANCEL
;
955 nfs_lockdmsg_dequeue(&msgreq
);
956 msg
->lm_xid
= nfs_lockxid_get();
957 nfs_lockdmsg_enqueue(&msgreq
);
958 msgreq
.lmr_saved_errno
= 0;
959 msgreq
.lmr_errno
= 0;
960 msgreq
.lmr_answered
= 0;
966 if ((msg
->lm_flags
& LOCKD_MSG_TEST
) && msgreq
.lmr_errno
== 0) {
967 if (msg
->lm_fl
.l_type
!= F_UNLCK
) {
968 fl
->l_type
= msg
->lm_fl
.l_type
;
969 fl
->l_pid
= msg
->lm_fl
.l_pid
;
970 fl
->l_start
= msg
->lm_fl
.l_start
;
971 fl
->l_len
= msg
->lm_fl
.l_len
;
972 fl
->l_whence
= SEEK_SET
;
974 fl
->l_type
= F_UNLCK
;
978 * If the blocked lock request was cancelled.
979 * Restore the error condition from when we
980 * originally bailed on the request.
982 if (msg
->lm_flags
& LOCKD_MSG_CANCEL
) {
983 msg
->lm_flags
&= ~LOCKD_MSG_CANCEL
;
984 error
= msgreq
.lmr_saved_errno
;
986 error
= msgreq
.lmr_errno
;
989 if ((error
== ENOTSUP
) && nmp
&& !(nmp
->nm_state
& NFSSTA_LOCKSWORK
)) {
991 * We have NO evidence that locks work and lockd
992 * returned ENOTSUP. Let's take this as a hint
993 * that locks aren't supported and disable them
996 lck_mtx_lock(&nmp
->nm_lock
);
997 nmp
->nm_flag
|= NFSMNT_NOLOCKS
;
998 nmp
->nm_state
&= ~NFSSTA_LOCKTIMEO
;
999 lck_mtx_unlock(&nmp
->nm_lock
);
1000 printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
1001 vfs_statfs(nmp
->nm_mountp
)->f_mntfromname
);
1004 /* record that NFS file locking has worked on this mount */
1006 lck_mtx_lock(&nmp
->nm_lock
);
1007 if (!(nmp
->nm_state
& NFSSTA_LOCKSWORK
))
1008 nmp
->nm_state
|= NFSSTA_LOCKSWORK
;
1009 lck_mtx_unlock(&nmp
->nm_lock
);
1012 * If we successfully acquired a lock, make sure this pid
1013 * is in the nfs_lock_pid hash table so we know we can't
1014 * short-circuit unlock requests.
1016 if ((lockpidcheck
== ENOENT
) &&
1017 ((ap
->a_op
== F_SETLK
) || (ap
->a_op
== F_SETLKW
))) {
1018 error
= nfs_lock_pid_check(p
, 1);
1021 * We couldn't add the pid to the table,
1022 * so we can no longer trust that a pid
1023 * not in the table has no locks.
1025 nfs_lock_pid_hash_trusted
= 0;
1026 printf("nfs_vnop_advlock: pid add failed - no longer trusted\n");
1033 nfs_lockdmsg_dequeue(&msgreq
);
1035 lck_mtx_unlock(nfs_lock_mutex
);
1042 * NFS advisory byte-level locks answer from the lock daemon.
1045 nfslockdans(proc_t p
, struct lockd_ans
*ansp
)
1047 LOCKD_MSG_REQUEST
*msgreq
;
1050 /* Let root make this call. */
1051 error
= proc_suser(p
);
1055 /* the version should match, or we're out of sync */
1056 if (ansp
->la_version
!= LOCKD_ANS_VERSION
)
1059 lck_mtx_lock(nfs_lock_mutex
);
1061 /* try to find the lockd message by transaction id (cookie) */
1062 msgreq
= nfs_lockdmsg_find_by_xid(ansp
->la_xid
);
1063 if (ansp
->la_flags
& LOCKD_ANS_GRANTED
) {
1065 * We can't depend on the granted message having our cookie,
1066 * so we check the answer against the lockd message found.
1067 * If no message was found or it doesn't match the answer,
1068 * we look for the lockd message by the answer's lock info.
1070 if (!msgreq
|| nfs_lockdmsg_compare_to_answer(msgreq
, ansp
))
1071 msgreq
= nfs_lockdmsg_find_by_answer(ansp
);
1073 * We need to make sure this request isn't being cancelled
1074 * If it is, we don't want to accept the granted message.
1076 if (msgreq
&& (msgreq
->lmr_msg
.lm_flags
& LOCKD_MSG_CANCEL
))
1080 lck_mtx_unlock(nfs_lock_mutex
);
1084 msgreq
->lmr_errno
= ansp
->la_errno
;
1085 if ((msgreq
->lmr_msg
.lm_flags
& LOCKD_MSG_TEST
) && msgreq
->lmr_errno
== 0) {
1086 if (ansp
->la_flags
& LOCKD_ANS_LOCK_INFO
) {
1087 if (ansp
->la_flags
& LOCKD_ANS_LOCK_EXCL
)
1088 msgreq
->lmr_msg
.lm_fl
.l_type
= F_WRLCK
;
1090 msgreq
->lmr_msg
.lm_fl
.l_type
= F_RDLCK
;
1091 msgreq
->lmr_msg
.lm_fl
.l_pid
= ansp
->la_pid
;
1092 msgreq
->lmr_msg
.lm_fl
.l_start
= ansp
->la_start
;
1093 msgreq
->lmr_msg
.lm_fl
.l_len
= ansp
->la_len
;
1095 msgreq
->lmr_msg
.lm_fl
.l_type
= F_UNLCK
;
1098 if (ansp
->la_flags
& LOCKD_ANS_DENIED_GRACE
)
1099 msgreq
->lmr_msg
.lm_flags
|= LOCKD_MSG_DENIED_GRACE
;
1101 msgreq
->lmr_answered
= 1;
1102 lck_mtx_unlock(nfs_lock_mutex
);