]>
Commit | Line | Data |
---|---|---|
55e303ae A |
1 | /* |
2 | * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. | |
7 | * | |
8 | * This file contains Original Code and/or Modifications of Original Code | |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
22 | * | |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | /*- | |
26 | * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. | |
27 | * | |
28 | * Redistribution and use in source and binary forms, with or without | |
29 | * modification, are permitted provided that the following conditions | |
30 | * are met: | |
31 | * 1. Redistributions of source code must retain the above copyright | |
32 | * notice, this list of conditions and the following disclaimer. | |
33 | * 2. Redistributions in binary form must reproduce the above copyright | |
34 | * notice, this list of conditions and the following disclaimer in the | |
35 | * documentation and/or other materials provided with the distribution. | |
36 | * 3. Berkeley Software Design Inc's name may not be used to endorse or | |
37 | * promote products derived from this software without specific prior | |
38 | * written permission. | |
39 | * | |
40 | * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND | |
41 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
42 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
43 | * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE | |
44 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
45 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
46 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
47 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
48 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
49 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
50 | * SUCH DAMAGE. | |
51 | * | |
52 | * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp | |
53 | */ | |
54 | ||
55 | #include <sys/cdefs.h> | |
56 | #include <sys/param.h> | |
57 | #include <sys/systm.h> | |
58 | #include <sys/fcntl.h> | |
59 | #include <sys/kernel.h> /* for hz */ | |
60 | #include <sys/file.h> | |
61 | #include <sys/lock.h> | |
62 | #include <sys/malloc.h> | |
63 | #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */ | |
64 | #include <sys/mbuf.h> | |
65 | #include <sys/mount.h> | |
66 | #include <sys/namei.h> | |
67 | #include <sys/proc.h> | |
68 | #include <sys/resourcevar.h> | |
69 | #include <sys/socket.h> | |
70 | #include <sys/socket.h> | |
71 | #include <sys/unistd.h> | |
72 | #include <sys/user.h> | |
73 | #include <sys/vnode.h> | |
74 | ||
75 | #include <kern/thread_act.h> | |
76 | ||
77 | #include <machine/limits.h> | |
78 | ||
79 | #include <net/if.h> | |
80 | ||
81 | #include <nfs/rpcv2.h> | |
82 | #include <nfs/nfsproto.h> | |
83 | #include <nfs/nfs.h> | |
84 | #include <nfs/nfsmount.h> | |
85 | #include <nfs/nfsnode.h> | |
86 | #include <nfs/nfs_lock.h> | |
87 | #include <nfs/nlminfo.h> | |
88 | ||
89 | #define OFF_MAX QUAD_MAX | |
90 | ||
91 | uint64_t nfsadvlocks = 0; | |
92 | struct timeval nfsadvlock_longest = {0, 0}; | |
93 | struct timeval nfsadvlocks_time = {0, 0}; | |
94 | ||
95 | pid_t nfslockdpid = 0; | |
96 | struct file *nfslockdfp = 0; | |
97 | int nfslockdwaiting = 0; | |
98 | int nfslockdfifowritten = 0; | |
99 | int nfslockdfifolock = 0; | |
100 | #define NFSLOCKDFIFOLOCK_LOCKED 1 | |
101 | #define NFSLOCKDFIFOLOCK_WANT 2 | |
102 | ||
103 | /* | |
104 | * XXX | |
105 | * We have to let the process know if the call succeeded. I'm using an extra | |
106 | * field in the uu_nlminfo field in the uthread structure, as it is already for | |
107 | * lockd stuff. | |
108 | */ | |
109 | ||
110 | /* | |
111 | * nfs_advlock -- | |
112 | * NFS advisory byte-level locks. | |
113 | */ | |
114 | int | |
115 | nfs_dolock(struct vop_advlock_args *ap) | |
116 | /* struct vop_advlock_args { | |
117 | struct vnodeop_desc *a_desc; | |
118 | struct vnode *a_vp; | |
119 | caddr_t a_id; | |
120 | int a_op; | |
121 | struct flock *a_fl; | |
122 | int a_flags; | |
123 | }; */ | |
124 | { | |
125 | LOCKD_MSG msg; | |
126 | struct nameidata nd; | |
127 | struct vnode *vp, *wvp; | |
128 | struct nfsnode *np; | |
129 | int error, error1; | |
130 | struct flock *fl; | |
131 | int fmode, ioflg; | |
132 | struct proc *p; | |
133 | struct uthread *ut; | |
134 | struct timeval elapsed; | |
135 | struct nfsmount *nmp; | |
136 | struct vattr vattr; | |
137 | off_t start, end; | |
138 | ||
139 | ut = get_bsdthread_info(current_act()); | |
140 | p = current_proc(); | |
141 | ||
142 | vp = ap->a_vp; | |
143 | fl = ap->a_fl; | |
144 | np = VTONFS(vp); | |
145 | ||
146 | nmp = VFSTONFS(vp->v_mount); | |
147 | if (!nmp) | |
148 | return (ENXIO); | |
149 | if (nmp->nm_flag & NFSMNT_NOLOCKS) | |
150 | return (EOPNOTSUPP); | |
151 | ||
152 | /* | |
153 | * The NLM protocol doesn't allow the server to return an error | |
154 | * on ranges, so we do it. Pre LFS (Large File Summit) | |
155 | * standards required EINVAL for the range errors. More recent | |
156 | * standards use EOVERFLOW, but their EINVAL wording still | |
157 | * encompasses these errors. | |
158 | * Any code sensitive to this is either: | |
159 | * 1) written pre-LFS and so can handle only EINVAL, or | |
160 | * 2) written post-LFS and thus ought to be tolerant of pre-LFS | |
161 | * implementations. | |
162 | * Since returning EOVERFLOW certainly breaks 1), we return EINVAL. | |
163 | */ | |
164 | if (fl->l_whence != SEEK_END) { | |
165 | if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) || | |
166 | fl->l_start < 0 || | |
167 | (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) || | |
168 | (fl->l_len < 0 && fl->l_start + fl->l_len < 0)) | |
169 | return (EINVAL); | |
170 | } | |
171 | /* | |
172 | * If daemon is running take a ref on its fifo | |
173 | */ | |
174 | if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data)) { | |
175 | if (!nfslockdwaiting) | |
176 | return (EOPNOTSUPP); | |
177 | /* | |
178 | * Don't wake lock daemon if it hasn't been started yet and | |
179 | * this is an unlock request (since we couldn't possibly | |
180 | * actually have a lock on the file). This could be an | |
181 | * uninformed unlock request due to closef()'s behavior of doing | |
182 | * unlocks on all files if a process has had a lock on ANY file. | |
183 | */ | |
184 | if (!nfslockdfp && (fl->l_type == F_UNLCK)) | |
185 | return (EINVAL); | |
186 | /* wake up lock daemon */ | |
187 | (void)wakeup((void *)&nfslockdwaiting); | |
188 | /* wait on nfslockdfp for a while to allow daemon to start */ | |
189 | tsleep((void *)&nfslockdfp, PCATCH | PUSER, "lockd", 60*hz); | |
190 | /* check for nfslockdfp and f_data */ | |
191 | if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data)) | |
192 | return (EOPNOTSUPP); | |
193 | } | |
194 | VREF(wvp); | |
195 | /* | |
196 | * if there is no nfsowner table yet, allocate one. | |
197 | */ | |
198 | if (ut->uu_nlminfo == NULL) { | |
199 | if (ap->a_op == F_UNLCK) { | |
200 | vrele(wvp); | |
201 | return (0); | |
202 | } | |
203 | MALLOC(ut->uu_nlminfo, struct nlminfo *, | |
204 | sizeof(struct nlminfo), M_LOCKF, M_WAITOK | M_ZERO); | |
205 | ut->uu_nlminfo->pid_start = p->p_stats->p_start; | |
206 | } | |
207 | /* | |
208 | * Fill in the information structure. | |
209 | */ | |
210 | msg.lm_version = LOCKD_MSG_VERSION; | |
211 | msg.lm_msg_ident.pid = p->p_pid; | |
212 | msg.lm_msg_ident.ut = ut; | |
213 | msg.lm_msg_ident.pid_start = ut->uu_nlminfo->pid_start; | |
214 | msg.lm_msg_ident.msg_seq = ++(ut->uu_nlminfo->msg_seq); | |
215 | ||
216 | /* | |
217 | * The NFS Lock Manager protocol doesn't directly handle | |
218 | * negative lengths or SEEK_END, so we need to normalize | |
219 | * things here where we have all the info. | |
220 | * (Note: SEEK_CUR is already adjusted for at this point) | |
221 | */ | |
222 | /* Convert the flock structure into a start and end. */ | |
223 | switch (fl->l_whence) { | |
224 | case SEEK_SET: | |
225 | case SEEK_CUR: | |
226 | /* | |
227 | * Caller is responsible for adding any necessary offset | |
228 | * to fl->l_start when SEEK_CUR is used. | |
229 | */ | |
230 | start = fl->l_start; | |
231 | break; | |
232 | case SEEK_END: | |
233 | /* need to flush, and refetch attributes to make */ | |
234 | /* sure we have the correct end of file offset */ | |
235 | if (np->n_flag & NMODIFIED) { | |
236 | np->n_attrstamp = 0; | |
237 | error = nfs_vinvalbuf(vp, V_SAVE, p->p_ucred, p, 1); | |
238 | if (error) { | |
239 | vrele(wvp); | |
240 | return (error); | |
241 | } | |
242 | } | |
243 | np->n_attrstamp = 0; | |
244 | error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); | |
245 | if (error) { | |
246 | vrele(wvp); | |
247 | return (error); | |
248 | } | |
249 | start = np->n_size + fl->l_start; | |
250 | break; | |
251 | default: | |
252 | vrele(wvp); | |
253 | return (EINVAL); | |
254 | } | |
255 | if (fl->l_len == 0) | |
256 | end = -1; | |
257 | else if (fl->l_len > 0) | |
258 | end = start + fl->l_len - 1; | |
259 | else { /* l_len is negative */ | |
260 | end = start - 1; | |
261 | start += fl->l_len; | |
262 | } | |
263 | if (start < 0) { | |
264 | vrele(wvp); | |
265 | return (EINVAL); | |
266 | } | |
267 | ||
268 | msg.lm_fl = *fl; | |
269 | msg.lm_fl.l_start = start; | |
270 | if (end != -1) | |
271 | msg.lm_fl.l_len = end - start + 1; | |
272 | ||
273 | msg.lm_wait = ap->a_flags & F_WAIT; | |
274 | msg.lm_getlk = ap->a_op == F_GETLK; | |
275 | ||
276 | nmp = VFSTONFS(vp->v_mount); | |
277 | if (!nmp) { | |
278 | vrele(wvp); | |
279 | return (ENXIO); | |
280 | } | |
281 | ||
282 | bcopy(mtod(nmp->nm_nam, struct sockaddr *), &msg.lm_addr, | |
283 | min(sizeof msg.lm_addr, | |
284 | mtod(nmp->nm_nam, struct sockaddr *)->sa_len)); | |
285 | msg.lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH; | |
286 | bcopy(VTONFS(vp)->n_fhp, msg.lm_fh, msg.lm_fh_len); | |
287 | msg.lm_nfsv3 = NFS_ISV3(vp); | |
288 | cru2x(p->p_ucred, &msg.lm_cred); | |
289 | ||
290 | microuptime(&ut->uu_nlminfo->nlm_lockstart); | |
291 | ||
292 | fmode = FFLAGS(O_WRONLY); | |
293 | if ((error = VOP_OPEN(wvp, fmode, kernproc->p_ucred, p))) { | |
294 | vrele(wvp); | |
295 | return (error); | |
296 | } | |
297 | ++wvp->v_writecount; | |
298 | ||
299 | #define IO_NOMACCHECK 0; | |
300 | ioflg = IO_UNIT | IO_NOMACCHECK; | |
301 | for (;;) { | |
302 | VOP_LEASE(wvp, p, kernproc->p_ucred, LEASE_WRITE); | |
303 | ||
304 | while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) { | |
305 | nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT; | |
306 | if (tsleep((void *)&nfslockdfifolock, PCATCH | PUSER, "lockdfifo", 20*hz)) | |
307 | break; | |
308 | } | |
309 | nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED; | |
310 | ||
311 | error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)&msg, sizeof(msg), 0, | |
312 | UIO_SYSSPACE, ioflg, kernproc->p_ucred, NULL, p); | |
313 | ||
314 | nfslockdfifowritten = 1; | |
315 | ||
316 | nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED; | |
317 | if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) { | |
318 | nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT; | |
319 | wakeup((void *)&nfslockdfifolock); | |
320 | } | |
321 | /* wake up lock daemon */ | |
322 | if (nfslockdwaiting) | |
323 | (void)wakeup((void *)&nfslockdwaiting); | |
324 | ||
325 | if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) { | |
326 | break; | |
327 | } | |
328 | /* | |
329 | * If we're locking a file, wait for an answer. Unlocks succeed | |
330 | * immediately. | |
331 | */ | |
332 | if (fl->l_type == F_UNLCK) | |
333 | /* | |
334 | * XXX this isn't exactly correct. The client side | |
335 | * needs to continue sending it's unlock until | |
336 | * it gets a response back. | |
337 | */ | |
338 | break; | |
339 | ||
340 | /* | |
341 | * retry after 20 seconds if we haven't gotten a response yet. | |
342 | * This number was picked out of thin air... but is longer | |
343 | * then even a reasonably loaded system should take (at least | |
344 | * on a local network). XXX Probably should use a back-off | |
345 | * scheme. | |
346 | */ | |
347 | if ((error = tsleep((void *)ut->uu_nlminfo, | |
348 | PCATCH | PUSER, "lockd", 20*hz)) != 0) { | |
349 | if (error == EWOULDBLOCK) { | |
350 | /* | |
351 | * We timed out, so we rewrite the request | |
352 | * to the fifo, but only if it isn't already | |
353 | * full. | |
354 | */ | |
355 | ioflg |= IO_NDELAY; | |
356 | continue; | |
357 | } | |
358 | ||
359 | break; | |
360 | } | |
361 | ||
362 | if (msg.lm_getlk && ut->uu_nlminfo->retcode == 0) { | |
363 | if (ut->uu_nlminfo->set_getlk) { | |
364 | fl->l_pid = ut->uu_nlminfo->getlk_pid; | |
365 | fl->l_start = ut->uu_nlminfo->getlk_start; | |
366 | fl->l_len = ut->uu_nlminfo->getlk_len; | |
367 | fl->l_whence = SEEK_SET; | |
368 | } else { | |
369 | fl->l_type = F_UNLCK; | |
370 | } | |
371 | } | |
372 | error = ut->uu_nlminfo->retcode; | |
373 | break; | |
374 | } | |
375 | ||
376 | /* XXX stats */ | |
377 | nfsadvlocks++; | |
378 | microuptime(&elapsed); | |
379 | timevalsub(&elapsed, &ut->uu_nlminfo->nlm_lockstart); | |
380 | if (timevalcmp(&elapsed, &nfsadvlock_longest, >)) | |
381 | nfsadvlock_longest = elapsed; | |
382 | timevaladd(&nfsadvlocks_time, &elapsed); | |
383 | timerclear(&ut->uu_nlminfo->nlm_lockstart); | |
384 | ||
385 | error1 = vn_close(wvp, FWRITE, kernproc->p_ucred, p); | |
386 | /* prefer any previous 'error' to our vn_close 'error1'. */ | |
387 | return (error != 0 ? error : error1); | |
388 | } | |
389 | ||
390 | /* | |
391 | * nfslockdans -- | |
392 | * NFS advisory byte-level locks answer from the lock daemon. | |
393 | */ | |
394 | int | |
395 | nfslockdans(struct proc *p, struct lockd_ans *ansp) | |
396 | { | |
397 | struct proc *targetp; | |
398 | struct uthread *targetut, *uth; | |
399 | int error; | |
400 | ||
401 | /* | |
402 | * Let root, or someone who once was root (lockd generally | |
403 | * switches to the daemon uid once it is done setting up) make | |
404 | * this call. | |
405 | * | |
406 | * XXX This authorization check is probably not right. | |
407 | */ | |
408 | if ((error = suser(p->p_ucred, &p->p_acflag)) != 0 && | |
409 | p->p_cred->p_svuid != 0) | |
410 | return (error); | |
411 | ||
412 | /* the version should match, or we're out of sync */ | |
413 | if (ansp->la_vers != LOCKD_ANS_VERSION) | |
414 | return (EINVAL); | |
415 | ||
416 | /* Find the process & thread */ | |
417 | if ((targetp = pfind(ansp->la_msg_ident.pid)) == NULL) | |
418 | return (ESRCH); | |
419 | targetut = ansp->la_msg_ident.ut; | |
420 | TAILQ_FOREACH(uth, &targetp->p_uthlist, uu_list) { | |
421 | if (uth == targetut) | |
422 | break; | |
423 | } | |
424 | /* | |
425 | * Verify the pid hasn't been reused (if we can), and it isn't waiting | |
426 | * for an answer from a more recent request. We return an EPIPE if | |
427 | * the match fails, because we've already used ESRCH above, and this | |
428 | * is sort of like writing on a pipe after the reader has closed it. | |
429 | * If only the seq# is off, don't return an error just return. It could | |
430 | * just be a response to a retransmitted request. | |
431 | */ | |
432 | if (uth == NULL || uth != targetut || targetut->uu_nlminfo == NULL) | |
433 | return (EPIPE); | |
434 | if (ansp->la_msg_ident.msg_seq != -1) { | |
435 | if (timevalcmp(&targetut->uu_nlminfo->pid_start, | |
436 | &ansp->la_msg_ident.pid_start, !=)) | |
437 | return (EPIPE); | |
438 | if (targetut->uu_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq) | |
439 | return (0); | |
440 | } | |
441 | ||
442 | /* Found the thread, so set its return errno and wake it up. */ | |
443 | ||
444 | targetut->uu_nlminfo->retcode = ansp->la_errno; | |
445 | targetut->uu_nlminfo->set_getlk = ansp->la_getlk_set; | |
446 | targetut->uu_nlminfo->getlk_pid = ansp->la_getlk_pid; | |
447 | targetut->uu_nlminfo->getlk_start = ansp->la_getlk_start; | |
448 | targetut->uu_nlminfo->getlk_len = ansp->la_getlk_len; | |
449 | ||
450 | (void)wakeup((void *)targetut->uu_nlminfo); | |
451 | ||
452 | return (0); | |
453 | } | |
454 | ||
455 | /* | |
456 | * nfslockdfd -- | |
457 | * NFS advisory byte-level locks: fifo file# from the lock daemon. | |
458 | */ | |
459 | int | |
460 | nfslockdfd(struct proc *p, int fd) | |
461 | { | |
462 | int error; | |
463 | struct file *fp, *ofp; | |
464 | ||
465 | error = suser(p->p_ucred, &p->p_acflag); | |
466 | if (error) | |
467 | return (error); | |
468 | if (fd < 0) { | |
469 | fp = 0; | |
470 | } else { | |
471 | error = getvnode(p, fd, &fp); | |
472 | if (error) | |
473 | return (error); | |
474 | (void)fref(fp); | |
475 | } | |
476 | ofp = nfslockdfp; | |
477 | nfslockdfp = fp; | |
478 | if (ofp) | |
479 | (void)frele(ofp); | |
480 | nfslockdpid = nfslockdfp ? p->p_pid : 0; | |
481 | (void)wakeup((void *)&nfslockdfp); | |
482 | return (0); | |
483 | } | |
484 | ||
485 | /* | |
486 | * nfslockdwait -- | |
487 | * lock daemon waiting for lock request | |
488 | */ | |
489 | int | |
490 | nfslockdwait(struct proc *p) | |
491 | { | |
492 | int error; | |
493 | struct file *fp, *ofp; | |
494 | ||
495 | if (p->p_pid != nfslockdpid) { | |
496 | error = suser(p->p_ucred, &p->p_acflag); | |
497 | if (error) | |
498 | return (error); | |
499 | } | |
500 | if (nfslockdwaiting) | |
501 | return (EBUSY); | |
502 | if (nfslockdfifowritten) { | |
503 | nfslockdfifowritten = 0; | |
504 | return (0); | |
505 | } | |
506 | ||
507 | nfslockdwaiting = 1; | |
508 | tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0); | |
509 | nfslockdwaiting = 0; | |
510 | ||
511 | return (0); | |
512 | } |