2 * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
26 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. Berkeley Software Design Inc's name may not be used to endorse or
37 * promote products derived from this software without specific prior
40 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
55 #include <sys/cdefs.h>
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/fcntl.h>
59 #include <sys/kernel.h> /* for hz */
62 #include <sys/malloc.h>
63 #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
65 #include <sys/mount.h>
66 #include <sys/namei.h>
68 #include <sys/resourcevar.h>
69 #include <sys/socket.h>
70 #include <sys/socket.h>
71 #include <sys/unistd.h>
73 #include <sys/vnode.h>
75 #include <kern/thread_act.h>
77 #include <machine/limits.h>
81 #include <nfs/rpcv2.h>
82 #include <nfs/nfsproto.h>
84 #include <nfs/nfsmount.h>
85 #include <nfs/nfsnode.h>
86 #include <nfs/nfs_lock.h>
87 #include <nfs/nlminfo.h>
89 #define OFF_MAX QUAD_MAX
91 uint64_t nfsadvlocks
= 0;
92 struct timeval nfsadvlock_longest
= {0, 0};
93 struct timeval nfsadvlocks_time
= {0, 0};
95 pid_t nfslockdpid
= 0;
96 struct file
*nfslockdfp
= 0;
97 int nfslockdwaiting
= 0;
98 int nfslockdfifowritten
= 0;
99 int nfslockdfifolock
= 0;
100 #define NFSLOCKDFIFOLOCK_LOCKED 1
101 #define NFSLOCKDFIFOLOCK_WANT 2
105 * We have to let the process know if the call succeeded. I'm using an extra
106 * field in the uu_nlminfo field in the uthread structure, as it is already for
112 * NFS advisory byte-level locks.
115 nfs_dolock(struct vop_advlock_args
*ap
)
116 /* struct vop_advlock_args {
117 struct vnodeop_desc *a_desc;
127 struct vnode
*vp
, *wvp
;
134 struct timeval elapsed
;
135 struct nfsmount
*nmp
;
139 ut
= get_bsdthread_info(current_act());
146 nmp
= VFSTONFS(vp
->v_mount
);
149 if (nmp
->nm_flag
& NFSMNT_NOLOCKS
)
153 * The NLM protocol doesn't allow the server to return an error
154 * on ranges, so we do it. Pre LFS (Large File Summit)
155 * standards required EINVAL for the range errors. More recent
156 * standards use EOVERFLOW, but their EINVAL wording still
157 * encompasses these errors.
158 * Any code sensitive to this is either:
159 * 1) written pre-LFS and so can handle only EINVAL, or
160 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
162 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
164 if (fl
->l_whence
!= SEEK_END
) {
165 if ((fl
->l_whence
!= SEEK_CUR
&& fl
->l_whence
!= SEEK_SET
) ||
167 (fl
->l_len
> 0 && fl
->l_len
- 1 > OFF_MAX
- fl
->l_start
) ||
168 (fl
->l_len
< 0 && fl
->l_start
+ fl
->l_len
< 0))
172 * If daemon is running take a ref on its fifo
174 if (!nfslockdfp
|| !(wvp
= (struct vnode
*)nfslockdfp
->f_data
)) {
175 if (!nfslockdwaiting
)
178 * Don't wake lock daemon if it hasn't been started yet and
179 * this is an unlock request (since we couldn't possibly
180 * actually have a lock on the file). This could be an
181 * uninformed unlock request due to closef()'s behavior of doing
182 * unlocks on all files if a process has had a lock on ANY file.
184 if (!nfslockdfp
&& (fl
->l_type
== F_UNLCK
))
186 /* wake up lock daemon */
187 (void)wakeup((void *)&nfslockdwaiting
);
188 /* wait on nfslockdfp for a while to allow daemon to start */
189 tsleep((void *)&nfslockdfp
, PCATCH
| PUSER
, "lockd", 60*hz
);
190 /* check for nfslockdfp and f_data */
191 if (!nfslockdfp
|| !(wvp
= (struct vnode
*)nfslockdfp
->f_data
))
196 * if there is no nfsowner table yet, allocate one.
198 if (ut
->uu_nlminfo
== NULL
) {
199 if (ap
->a_op
== F_UNLCK
) {
203 MALLOC(ut
->uu_nlminfo
, struct nlminfo
*,
204 sizeof(struct nlminfo
), M_LOCKF
, M_WAITOK
| M_ZERO
);
205 ut
->uu_nlminfo
->pid_start
= p
->p_stats
->p_start
;
208 * Fill in the information structure.
210 msg
.lm_version
= LOCKD_MSG_VERSION
;
211 msg
.lm_msg_ident
.pid
= p
->p_pid
;
212 msg
.lm_msg_ident
.ut
= ut
;
213 msg
.lm_msg_ident
.pid_start
= ut
->uu_nlminfo
->pid_start
;
214 msg
.lm_msg_ident
.msg_seq
= ++(ut
->uu_nlminfo
->msg_seq
);
217 * The NFS Lock Manager protocol doesn't directly handle
218 * negative lengths or SEEK_END, so we need to normalize
219 * things here where we have all the info.
220 * (Note: SEEK_CUR is already adjusted for at this point)
222 /* Convert the flock structure into a start and end. */
223 switch (fl
->l_whence
) {
227 * Caller is responsible for adding any necessary offset
228 * to fl->l_start when SEEK_CUR is used.
233 /* need to flush, and refetch attributes to make */
234 /* sure we have the correct end of file offset */
235 if (np
->n_flag
& NMODIFIED
) {
237 error
= nfs_vinvalbuf(vp
, V_SAVE
, p
->p_ucred
, p
, 1);
244 error
= VOP_GETATTR(vp
, &vattr
, p
->p_ucred
, p
);
249 start
= np
->n_size
+ fl
->l_start
;
257 else if (fl
->l_len
> 0)
258 end
= start
+ fl
->l_len
- 1;
259 else { /* l_len is negative */
269 msg
.lm_fl
.l_start
= start
;
271 msg
.lm_fl
.l_len
= end
- start
+ 1;
273 msg
.lm_wait
= ap
->a_flags
& F_WAIT
;
274 msg
.lm_getlk
= ap
->a_op
== F_GETLK
;
276 nmp
= VFSTONFS(vp
->v_mount
);
282 bcopy(mtod(nmp
->nm_nam
, struct sockaddr
*), &msg
.lm_addr
,
283 min(sizeof msg
.lm_addr
,
284 mtod(nmp
->nm_nam
, struct sockaddr
*)->sa_len
));
285 msg
.lm_fh_len
= NFS_ISV3(vp
) ? VTONFS(vp
)->n_fhsize
: NFSX_V2FH
;
286 bcopy(VTONFS(vp
)->n_fhp
, msg
.lm_fh
, msg
.lm_fh_len
);
287 msg
.lm_nfsv3
= NFS_ISV3(vp
);
288 cru2x(p
->p_ucred
, &msg
.lm_cred
);
290 microuptime(&ut
->uu_nlminfo
->nlm_lockstart
);
292 fmode
= FFLAGS(O_WRONLY
);
293 if ((error
= VOP_OPEN(wvp
, fmode
, kernproc
->p_ucred
, p
))) {
299 #define IO_NOMACCHECK 0;
300 ioflg
= IO_UNIT
| IO_NOMACCHECK
;
302 VOP_LEASE(wvp
, p
, kernproc
->p_ucred
, LEASE_WRITE
);
304 while (nfslockdfifolock
& NFSLOCKDFIFOLOCK_LOCKED
) {
305 nfslockdfifolock
|= NFSLOCKDFIFOLOCK_WANT
;
306 if (tsleep((void *)&nfslockdfifolock
, PCATCH
| PUSER
, "lockdfifo", 20*hz
))
309 nfslockdfifolock
|= NFSLOCKDFIFOLOCK_LOCKED
;
311 error
= vn_rdwr(UIO_WRITE
, wvp
, (caddr_t
)&msg
, sizeof(msg
), 0,
312 UIO_SYSSPACE
, ioflg
, kernproc
->p_ucred
, NULL
, p
);
314 nfslockdfifowritten
= 1;
316 nfslockdfifolock
&= ~NFSLOCKDFIFOLOCK_LOCKED
;
317 if (nfslockdfifolock
& NFSLOCKDFIFOLOCK_WANT
) {
318 nfslockdfifolock
&= ~NFSLOCKDFIFOLOCK_WANT
;
319 wakeup((void *)&nfslockdfifolock
);
321 /* wake up lock daemon */
323 (void)wakeup((void *)&nfslockdwaiting
);
325 if (error
&& (((ioflg
& IO_NDELAY
) == 0) || error
!= EAGAIN
)) {
329 * If we're locking a file, wait for an answer. Unlocks succeed
332 if (fl
->l_type
== F_UNLCK
)
334 * XXX this isn't exactly correct. The client side
335 * needs to continue sending it's unlock until
336 * it gets a response back.
341 * retry after 20 seconds if we haven't gotten a response yet.
342 * This number was picked out of thin air... but is longer
343 * then even a reasonably loaded system should take (at least
344 * on a local network). XXX Probably should use a back-off
347 if ((error
= tsleep((void *)ut
->uu_nlminfo
,
348 PCATCH
| PUSER
, "lockd", 20*hz
)) != 0) {
349 if (error
== EWOULDBLOCK
) {
351 * We timed out, so we rewrite the request
352 * to the fifo, but only if it isn't already
362 if (msg
.lm_getlk
&& ut
->uu_nlminfo
->retcode
== 0) {
363 if (ut
->uu_nlminfo
->set_getlk
) {
364 fl
->l_pid
= ut
->uu_nlminfo
->getlk_pid
;
365 fl
->l_start
= ut
->uu_nlminfo
->getlk_start
;
366 fl
->l_len
= ut
->uu_nlminfo
->getlk_len
;
367 fl
->l_whence
= SEEK_SET
;
369 fl
->l_type
= F_UNLCK
;
372 error
= ut
->uu_nlminfo
->retcode
;
378 microuptime(&elapsed
);
379 timevalsub(&elapsed
, &ut
->uu_nlminfo
->nlm_lockstart
);
380 if (timevalcmp(&elapsed
, &nfsadvlock_longest
, >))
381 nfsadvlock_longest
= elapsed
;
382 timevaladd(&nfsadvlocks_time
, &elapsed
);
383 timerclear(&ut
->uu_nlminfo
->nlm_lockstart
);
385 error1
= vn_close(wvp
, FWRITE
, kernproc
->p_ucred
, p
);
386 /* prefer any previous 'error' to our vn_close 'error1'. */
387 return (error
!= 0 ? error
: error1
);
392 * NFS advisory byte-level locks answer from the lock daemon.
395 nfslockdans(struct proc
*p
, struct lockd_ans
*ansp
)
397 struct proc
*targetp
;
398 struct uthread
*targetut
, *uth
;
402 * Let root, or someone who once was root (lockd generally
403 * switches to the daemon uid once it is done setting up) make
406 * XXX This authorization check is probably not right.
408 if ((error
= suser(p
->p_ucred
, &p
->p_acflag
)) != 0 &&
409 p
->p_cred
->p_svuid
!= 0)
412 /* the version should match, or we're out of sync */
413 if (ansp
->la_vers
!= LOCKD_ANS_VERSION
)
416 /* Find the process & thread */
417 if ((targetp
= pfind(ansp
->la_msg_ident
.pid
)) == NULL
)
419 targetut
= ansp
->la_msg_ident
.ut
;
420 TAILQ_FOREACH(uth
, &targetp
->p_uthlist
, uu_list
) {
425 * Verify the pid hasn't been reused (if we can), and it isn't waiting
426 * for an answer from a more recent request. We return an EPIPE if
427 * the match fails, because we've already used ESRCH above, and this
428 * is sort of like writing on a pipe after the reader has closed it.
429 * If only the seq# is off, don't return an error just return. It could
430 * just be a response to a retransmitted request.
432 if (uth
== NULL
|| uth
!= targetut
|| targetut
->uu_nlminfo
== NULL
)
434 if (ansp
->la_msg_ident
.msg_seq
!= -1) {
435 if (timevalcmp(&targetut
->uu_nlminfo
->pid_start
,
436 &ansp
->la_msg_ident
.pid_start
, !=))
438 if (targetut
->uu_nlminfo
->msg_seq
!= ansp
->la_msg_ident
.msg_seq
)
442 /* Found the thread, so set its return errno and wake it up. */
444 targetut
->uu_nlminfo
->retcode
= ansp
->la_errno
;
445 targetut
->uu_nlminfo
->set_getlk
= ansp
->la_getlk_set
;
446 targetut
->uu_nlminfo
->getlk_pid
= ansp
->la_getlk_pid
;
447 targetut
->uu_nlminfo
->getlk_start
= ansp
->la_getlk_start
;
448 targetut
->uu_nlminfo
->getlk_len
= ansp
->la_getlk_len
;
450 (void)wakeup((void *)targetut
->uu_nlminfo
);
457 * NFS advisory byte-level locks: fifo file# from the lock daemon.
460 nfslockdfd(struct proc
*p
, int fd
)
463 struct file
*fp
, *ofp
;
465 error
= suser(p
->p_ucred
, &p
->p_acflag
);
471 error
= getvnode(p
, fd
, &fp
);
480 nfslockdpid
= nfslockdfp
? p
->p_pid
: 0;
481 (void)wakeup((void *)&nfslockdfp
);
487 * lock daemon waiting for lock request
490 nfslockdwait(struct proc
*p
)
493 struct file
*fp
, *ofp
;
495 if (p
->p_pid
!= nfslockdpid
) {
496 error
= suser(p
->p_ucred
, &p
->p_acflag
);
502 if (nfslockdfifowritten
) {
503 nfslockdfifowritten
= 0;
508 tsleep((void *)&nfslockdwaiting
, PCATCH
| PUSER
, "lockd", 0);