]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_lock.c
xnu-1228.3.13.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
CommitLineData
55e303ae 1/*
2d21ac55 2 * Copyright (c) 2002-2007 Apple Inc. All rights reserved.
55e303ae 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
55e303ae 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28/*-
29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. Berkeley Software Design Inc's name may not be used to endorse or
40 * promote products derived from this software without specific prior
41 * written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
56 */
57
58#include <sys/cdefs.h>
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/fcntl.h>
62#include <sys/kernel.h> /* for hz */
91447636 63#include <sys/file_internal.h>
55e303ae
A
64#include <sys/malloc.h>
65#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
91447636
A
66#include <sys/kpi_mbuf.h>
67#include <sys/mount_internal.h>
68#include <sys/proc_internal.h> /* for p_start */
69#include <sys/kauth.h>
55e303ae
A
70#include <sys/resourcevar.h>
71#include <sys/socket.h>
55e303ae
A
72#include <sys/unistd.h>
73#include <sys/user.h>
91447636 74#include <sys/vnode_internal.h>
55e303ae 75
91447636 76#include <kern/thread.h>
2d21ac55 77#include <kern/host.h>
55e303ae
A
78
79#include <machine/limits.h>
80
81#include <net/if.h>
82
83#include <nfs/rpcv2.h>
84#include <nfs/nfsproto.h>
85#include <nfs/nfs.h>
2d21ac55 86#include <nfs/nfs_gss.h>
55e303ae
A
87#include <nfs/nfsmount.h>
88#include <nfs/nfsnode.h>
89#include <nfs/nfs_lock.h>
55e303ae 90
2d21ac55
A
91#include <mach/host_priv.h>
92#include <mach/mig_errors.h>
93#include <mach/host_special_ports.h>
94#include <lockd/lockd_mach.h>
55e303ae 95
2d21ac55
A
96extern void ipc_port_release_send(ipc_port_t);
97
98#define OFF_MAX QUAD_MAX
55e303ae
A
99
100/*
e5568f75
A
101 * pending lock request messages are kept in this queue which is
102 * kept sorted by transaction ID (xid).
103 */
2d21ac55
A
104static uint64_t nfs_lockxid = 0;
105static LOCKD_MSG_QUEUE nfs_pendlockq;
e5568f75
A
106
107/*
108 * This structure is used to identify processes which have acquired NFS locks.
109 * Knowing which processes have ever acquired locks allows us to short-circuit
110 * unlock requests for processes that have never had an NFS file lock. Thus
111 * avoiding a costly and unnecessary lockd request.
112 */
113struct nfs_lock_pid {
114 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */
115 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */
116 int lp_valid; /* valid entry? */
117 int lp_time; /* last time seen valid */
118 pid_t lp_pid; /* The process ID. */
119 struct timeval lp_pid_start; /* Start time of process id */
120};
121
122#define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
123#define NFS_LOCK_PID_HASH(pid) \
124 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
2d21ac55
A
125static LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
126static TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
127static u_long nfs_lock_pid_hash, nfs_lock_pid_hash_trusted;
128
129static lck_grp_t *nfs_lock_lck_grp;
130static lck_mtx_t *nfs_lock_mutex;
e5568f75
A
131
132
133/*
134 * initialize global nfs lock state
135 */
136void
137nfs_lockinit(void)
138{
139 TAILQ_INIT(&nfs_pendlockq);
2d21ac55 140 nfs_lock_pid_hash_trusted = 1;
e5568f75
A
141 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
142 M_TEMP, &nfs_lock_pid_hash);
143 TAILQ_INIT(&nfs_lock_pid_lru);
2d21ac55
A
144
145 nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
146 nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
147}
148
149/*
150 * change the count of NFS mounts that may need to make lockd requests
151 *
152 * If the mount count drops to zero, then send a shutdown request to
153 * lockd if we've sent any requests to it.
154 */
155void
156nfs_lockd_mount_change(int i)
157{
158 mach_port_t lockd_port = IPC_PORT_NULL;
159 kern_return_t kr;
160 int send_shutdown;
161
162 lck_mtx_lock(nfs_lock_mutex);
163
164 nfs_lockd_mounts += i;
165
166 /* send a shutdown request if there are no more lockd mounts */
167 send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
168 if (send_shutdown)
169 nfs_lockd_request_sent = 0;
170
171 lck_mtx_unlock(nfs_lock_mutex);
172
173 if (!send_shutdown)
174 return;
175
176 /*
177 * Let lockd know that it is no longer need for any NFS mounts
178 */
179 kr = host_get_lockd_port(host_priv_self(), &lockd_port);
180 if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
181 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
182 kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
183 (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
184 return;
185 }
186
187 kr = lockd_shutdown(lockd_port);
188 if (kr != KERN_SUCCESS)
189 printf("nfs_lockd_mount_change: shutdown %d\n", kr);
190
191 ipc_port_release_send(lockd_port);
e5568f75
A
192}
193
194/*
195 * insert a lock request message into the pending queue
2d21ac55 196 * (nfs_lock_mutex must be held)
e5568f75
A
197 */
198static inline void
199nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
200{
201 LOCKD_MSG_REQUEST *mr;
202
203 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
204 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
205 /* fast path: empty queue or new largest xid */
206 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
207 return;
208 }
209 /* slow path: need to walk list to find insertion point */
210 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
211 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
212 }
213 if (mr) {
214 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
215 } else {
216 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
217 }
218}
219
220/*
221 * remove a lock request message from the pending queue
2d21ac55 222 * (nfs_lock_mutex must be held)
e5568f75
A
223 */
224static inline void
225nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
226{
227 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
228}
229
230/*
231 * find a pending lock request message by xid
232 *
233 * We search from the head of the list assuming that the message we're
234 * looking for is for an older request (because we have an answer to it).
235 * This assumes that lock request will be answered primarily in FIFO order.
236 * However, this may not be the case if there are blocked requests. We may
237 * want to move blocked requests to a separate queue (but that'll complicate
238 * duplicate xid checking).
2d21ac55
A
239 *
240 * (nfs_lock_mutex must be held)
e5568f75
A
241 */
242static inline LOCKD_MSG_REQUEST *
243nfs_lockdmsg_find_by_xid(uint64_t lockxid)
244{
245 LOCKD_MSG_REQUEST *mr;
246
247 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
248 if (mr->lmr_msg.lm_xid == lockxid)
249 return mr;
250 if (mr->lmr_msg.lm_xid > lockxid)
251 return NULL;
252 }
253 return mr;
254}
255
256/*
257 * Because we can't depend on nlm_granted messages containing the same
258 * cookie we sent with the original lock request, we need code test if
259 * an nlm_granted answer matches the lock request. We also need code
260 * that can find a lockd message based solely on the nlm_granted answer.
261 */
262
263/*
264 * compare lockd message to answer
265 *
266 * returns 0 on equality and 1 if different
267 */
268static inline int
269nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
270{
271 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
272 return 1;
273 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
274 return 1;
275 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
276 return 1;
277 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
278 return 1;
279 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
280 return 1;
281 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
282 return 1;
283 return 0;
284}
285
286/*
287 * find a pending lock request message based on the lock info provided
288 * in the lockd_ans/nlm_granted data. We need this because we can't
289 * depend on nlm_granted messages containing the same cookie we sent
290 * with the original lock request.
291 *
292 * We search from the head of the list assuming that the message we're
293 * looking for is for an older request (because we have an answer to it).
294 * This assumes that lock request will be answered primarily in FIFO order.
295 * However, this may not be the case if there are blocked requests. We may
296 * want to move blocked requests to a separate queue (but that'll complicate
297 * duplicate xid checking).
2d21ac55
A
298 *
299 * (nfs_lock_mutex must be held)
55e303ae 300 */
e5568f75
A
301static inline LOCKD_MSG_REQUEST *
302nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
303{
304 LOCKD_MSG_REQUEST *mr;
305
306 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
307 return NULL;
308 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
309 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
310 break;
311 }
312 return mr;
313}
314
315/*
316 * return the next unique lock request transaction ID
2d21ac55 317 * (nfs_lock_mutex must be held)
e5568f75
A
318 */
319static inline uint64_t
320nfs_lockxid_get(void)
321{
322 LOCKD_MSG_REQUEST *mr;
323
324 /* derive initial lock xid from system time */
325 if (!nfs_lockxid) {
326 /*
327 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
328 * due to a broken clock) because we immediately increment it
329 * and we guarantee to never use xid 0. So, nfs_lockxid should only
330 * ever be 0 the first time this function is called.
331 */
332 struct timeval tv;
333 microtime(&tv);
334 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
335 }
336
337 /* make sure we get a unique xid */
338 do {
339 /* Skip zero xid if it should ever happen. */
340 if (++nfs_lockxid == 0)
341 nfs_lockxid++;
342 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
343 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
344 /* fast path: empty queue or new largest xid */
345 break;
346 }
347 /* check if xid is already in use */
348 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
349
350 return nfs_lockxid;
351}
352
353
354/*
355 * Check the nfs_lock_pid hash table for an entry and, if requested,
356 * add the entry if it is not found.
357 *
358 * (Also, if adding, try to clean up some stale entries.)
2d21ac55 359 * (nfs_lock_mutex must be held)
e5568f75
A
360 */
361static int
2d21ac55 362nfs_lock_pid_check(proc_t p, int addflag)
e5568f75 363{
2d21ac55
A
364 struct nfs_lock_pid *lp, *lplru, *lplru_next, *mlp;
365 TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_free;
366 proc_t plru = PROC_NULL;
367 pid_t pid;
e5568f75
A
368 int error = 0;
369 struct timeval now;
370
2d21ac55
A
371 TAILQ_INIT(&nfs_lock_pid_free);
372 mlp = NULL;
e5568f75 373
2d21ac55 374loop:
e5568f75 375 /* Search hash chain */
2d21ac55 376 pid = proc_pid(p);
e5568f75 377 error = ENOENT;
2d21ac55 378 lp = NFS_LOCK_PID_HASH(pid)->lh_first;
e5568f75 379 for (; lp != NULL; lp = lp->lp_hash.le_next)
2d21ac55 380 if (lp->lp_pid == pid) {
e5568f75 381 /* found pid... */
2d21ac55 382 if (timevalcmp(&lp->lp_pid_start, &p->p_start, ==)) {
e5568f75
A
383 /* ...and it's valid */
384 /* move to tail of LRU */
385 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
386 microuptime(&now);
387 lp->lp_time = now.tv_sec;
388 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
389 error = 0;
390 break;
391 }
392 /* ...but it's no longer valid */
393 /* remove from hash, invalidate, and move to lru head */
394 LIST_REMOVE(lp, lp_hash);
395 lp->lp_valid = 0;
396 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
397 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
398 lp = NULL;
399 break;
400 }
401
2d21ac55
A
402 /* if we didn't find it (valid), use any newly allocated one */
403 if (!lp)
404 lp = mlp;
405
406 /* if we don't have an lp and we've been asked to add it */
407 if ((error == ENOENT) && addflag && !lp) {
e5568f75
A
408 /* scan lru list for invalid, stale entries to reuse/free */
409 int lrucnt = 0;
410 microuptime(&now);
411 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
412 lplru_next = TAILQ_NEXT(lplru, lp_lru);
413 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
414 /*
415 * If the oldest LRU entry is relatively new, then don't
416 * bother scanning any further.
417 */
418 break;
419 }
420 /* remove entry from LRU, and check if it's still in use */
421 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
2d21ac55
A
422 if (!lplru->lp_valid || !(plru = proc_find(lplru->lp_pid)) ||
423 timevalcmp(&lplru->lp_pid_start, &plru->p_start, !=)) {
424 if (plru != PROC_NULL) {
425 proc_rele(plru);
426 plru = PROC_NULL;
427 }
e5568f75
A
428 /* no longer in use */
429 LIST_REMOVE(lplru, lp_hash);
430 if (!lp) {
431 /* we'll reuse this one */
432 lp = lplru;
433 } else {
2d21ac55
A
434 /* queue it up for freeing */
435 TAILQ_INSERT_HEAD(&nfs_lock_pid_free, lplru, lp_lru);
e5568f75
A
436 }
437 } else {
438 /* still in use */
2d21ac55
A
439 if (plru != PROC_NULL) {
440 proc_rele(plru);
441 plru = PROC_NULL;
442 }
e5568f75
A
443 lplru->lp_time = now.tv_sec;
444 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
445 }
446 /* don't check too many entries at once */
447 if (++lrucnt > 8)
448 break;
449 }
450 if (!lp) {
451 /* we need to allocate a new one */
2d21ac55
A
452 lck_mtx_unlock(nfs_lock_mutex);
453 MALLOC(mlp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
e5568f75 454 M_TEMP, M_WAITOK | M_ZERO);
2d21ac55
A
455 lck_mtx_lock(nfs_lock_mutex);
456 if (mlp) /* make sure somebody hasn't already added this guy */
457 goto loop;
91447636 458 error = ENOMEM;
91447636 459 }
e5568f75 460 }
2d21ac55
A
461 if ((error == ENOENT) && addflag && lp) {
462 /* (re)initialize nfs_lock_pid info */
463 lp->lp_pid = pid;
464 lp->lp_pid_start = p->p_start;
465 /* insert pid in hash */
466 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
467 lp->lp_valid = 1;
468 lp->lp_time = now.tv_sec;
469 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
470 error = 0;
471 }
e5568f75 472
2d21ac55
A
473 if ((mlp && (lp != mlp)) || TAILQ_FIRST(&nfs_lock_pid_free)) {
474 lck_mtx_unlock(nfs_lock_mutex);
475 if (mlp && (lp != mlp)) {
476 /* we didn't need this one, so we can free it */
477 FREE(mlp, M_TEMP);
478 }
479 /* free up any stale entries */
480 while ((lp = TAILQ_FIRST(&nfs_lock_pid_free))) {
481 TAILQ_REMOVE(&nfs_lock_pid_free, lp, lp_lru);
482 FREE(lp, M_TEMP);
483 }
484 lck_mtx_lock(nfs_lock_mutex);
485 }
e5568f75
A
486
487 return (error);
488}
489
2d21ac55
A
490#define MACH_MAX_TRIES 3
491
492static int
493send_request(LOCKD_MSG *msg, int interruptable)
494{
495 kern_return_t kr;
496 int retries = 0;
497 mach_port_t lockd_port = IPC_PORT_NULL;
498
499 kr = host_get_lockd_port(host_priv_self(), &lockd_port);
500 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port))
501 return (ENOTSUP);
502
503 do {
504 /* In the kernel all mach messaging is interruptable */
505 do {
506 kr = lockd_request(
507 lockd_port,
508 msg->lm_version,
509 msg->lm_flags,
510 msg->lm_xid,
511 msg->lm_fl.l_start,
512 msg->lm_fl.l_len,
513 msg->lm_fl.l_pid,
514 msg->lm_fl.l_type,
515 msg->lm_fl.l_whence,
516 (uint32_t *)&msg->lm_addr,
517 (uint32_t *)&msg->lm_cred,
518 msg->lm_fh_len,
519 msg->lm_fh);
520 if (kr != KERN_SUCCESS)
521 printf("lockd_request received %d!\n", kr);
522 } while (!interruptable && kr == MACH_SEND_INTERRUPTED);
523 } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
524
525 ipc_port_release_send(lockd_port);
526 switch (kr) {
527 case MACH_SEND_INTERRUPTED:
528 return (EINTR);
529 default:
530 /*
531 * Other MACH or MIG errors we will retry. Eventually
532 * we will call nfs_down and allow the user to disable
533 * locking.
534 */
535 return (EAGAIN);
536 }
537 return (kr);
538}
539
55e303ae
A
540
541/*
2d21ac55 542 * NFS advisory byte-level locks (client)
55e303ae
A
543 */
544int
2d21ac55
A
545nfs3_vnop_advlock(
546 struct vnop_advlock_args /* {
547 struct vnodeop_desc *a_desc;
548 vnode_t a_vp;
549 caddr_t a_id;
550 int a_op;
551 struct flock *a_fl;
552 int a_flags;
553 vfs_context_t a_context;
554 } */ *ap)
55e303ae 555{
2d21ac55
A
556 vfs_context_t ctx;
557 proc_t p;
e5568f75
A
558 LOCKD_MSG_REQUEST msgreq;
559 LOCKD_MSG *msg;
2d21ac55
A
560 vnode_t vp;
561 nfsnode_t np;
562 int error, error2;
563 int interruptable;
55e303ae 564 struct flock *fl;
55e303ae 565 struct nfsmount *nmp;
91447636 566 struct nfs_vattr nvattr;
55e303ae 567 off_t start, end;
e5568f75
A
568 struct timeval now;
569 int timeo, endtime, lastmsg, wentdown = 0;
2d21ac55 570 int lockpidcheck, nfsvers;
91447636 571 struct sockaddr *saddr;
2d21ac55 572 struct timespec ts;
55e303ae 573
2d21ac55
A
574 ctx = ap->a_context;
575 p = vfs_context_proc(ctx);
55e303ae
A
576 vp = ap->a_vp;
577 fl = ap->a_fl;
578 np = VTONFS(vp);
579
2d21ac55 580 nmp = VTONMP(vp);
55e303ae
A
581 if (!nmp)
582 return (ENXIO);
2d21ac55
A
583 lck_mtx_lock(&nmp->nm_lock);
584 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
585 lck_mtx_unlock(&nmp->nm_lock);
91447636 586 return (ENOTSUP);
2d21ac55
A
587 }
588 nfsvers = nmp->nm_vers;
589 lck_mtx_unlock(&nmp->nm_lock);
55e303ae
A
590
591 /*
592 * The NLM protocol doesn't allow the server to return an error
593 * on ranges, so we do it. Pre LFS (Large File Summit)
594 * standards required EINVAL for the range errors. More recent
595 * standards use EOVERFLOW, but their EINVAL wording still
596 * encompasses these errors.
597 * Any code sensitive to this is either:
598 * 1) written pre-LFS and so can handle only EINVAL, or
599 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
600 * implementations.
601 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
602 */
603 if (fl->l_whence != SEEK_END) {
604 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
605 fl->l_start < 0 ||
606 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
607 (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
608 return (EINVAL);
609 }
2d21ac55
A
610
611 lck_mtx_lock(nfs_lock_mutex);
e5568f75 612
55e303ae 613 /*
e5568f75
A
614 * Need to check if this process has successfully acquired an NFS lock before.
615 * If not, and this is an unlock request we can simply return success here.
55e303ae 616 */
2d21ac55
A
617 lockpidcheck = nfs_lock_pid_check(p, 0);
618 lck_mtx_unlock(nfs_lock_mutex);
e5568f75 619 if (lockpidcheck) {
2d21ac55 620 if (lockpidcheck != ENOENT)
e5568f75 621 return (lockpidcheck);
2d21ac55 622 if ((ap->a_op == F_UNLCK) && nfs_lock_pid_hash_trusted)
55e303ae 623 return (0);
55e303ae 624 }
55e303ae
A
625
626 /*
627 * The NFS Lock Manager protocol doesn't directly handle
628 * negative lengths or SEEK_END, so we need to normalize
629 * things here where we have all the info.
630 * (Note: SEEK_CUR is already adjusted for at this point)
631 */
632 /* Convert the flock structure into a start and end. */
633 switch (fl->l_whence) {
634 case SEEK_SET:
635 case SEEK_CUR:
636 /*
637 * Caller is responsible for adding any necessary offset
638 * to fl->l_start when SEEK_CUR is used.
639 */
640 start = fl->l_start;
641 break;
642 case SEEK_END:
643 /* need to flush, and refetch attributes to make */
644 /* sure we have the correct end of file offset */
2d21ac55
A
645 error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
646 if (error)
647 return (error);
648 NATTRINVALIDATE(np);
55e303ae 649 if (np->n_flag & NMODIFIED) {
2d21ac55
A
650 nfs_unlock(np);
651 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
652 if (error)
55e303ae 653 return (error);
2d21ac55
A
654 } else
655 nfs_unlock(np);
91447636 656
2d21ac55
A
657 error = nfs_getattr(np, &nvattr, ctx, 0);
658 nfs_data_lock(np, NFS_NODE_LOCK_SHARED);
659 if (!error)
660 error = nfs_lock(np, NFS_NODE_LOCK_SHARED);
55e303ae 661 if (error) {
2d21ac55 662 nfs_data_unlock(np);
55e303ae
A
663 return (error);
664 }
665 start = np->n_size + fl->l_start;
2d21ac55
A
666 nfs_unlock(np);
667 nfs_data_unlock(np);
55e303ae
A
668 break;
669 default:
55e303ae
A
670 return (EINVAL);
671 }
672 if (fl->l_len == 0)
673 end = -1;
674 else if (fl->l_len > 0)
675 end = start + fl->l_len - 1;
676 else { /* l_len is negative */
677 end = start - 1;
678 start += fl->l_len;
679 }
2d21ac55 680 if (start < 0)
55e303ae 681 return (EINVAL);
2d21ac55
A
682
683 if ((nfsvers == NFS_VER2) &&
684 ((start >= 0x80000000) || (end >= 0x80000000)))
e5568f75 685 return (EINVAL);
55e303ae 686
e5568f75
A
687 /*
688 * Fill in the information structure.
689 */
690 msgreq.lmr_answered = 0;
691 msgreq.lmr_errno = 0;
692 msgreq.lmr_saved_errno = 0;
693 msg = &msgreq.lmr_msg;
694 msg->lm_version = LOCKD_MSG_VERSION;
695 msg->lm_flags = 0;
696
697 msg->lm_fl = *fl;
698 msg->lm_fl.l_start = start;
55e303ae 699 if (end != -1)
e5568f75 700 msg->lm_fl.l_len = end - start + 1;
2d21ac55 701 msg->lm_fl.l_pid = vfs_context_pid(ctx);
55e303ae 702
e5568f75
A
703 if (ap->a_flags & F_WAIT)
704 msg->lm_flags |= LOCKD_MSG_BLOCK;
705 if (ap->a_op == F_GETLK)
706 msg->lm_flags |= LOCKD_MSG_TEST;
55e303ae 707
2d21ac55
A
708 nmp = VTONMP(vp);
709 if (!nmp)
55e303ae 710 return (ENXIO);
55e303ae 711
2d21ac55 712 lck_mtx_lock(&nmp->nm_lock);
91447636
A
713 saddr = mbuf_data(nmp->nm_nam);
714 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
2d21ac55
A
715 msg->lm_fh_len = (nfsvers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
716 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
717 if (nfsvers == NFS_VER3)
e5568f75 718 msg->lm_flags |= LOCKD_MSG_NFSV3;
2d21ac55 719 cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
55e303ae 720
e5568f75
A
721 microuptime(&now);
722 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
2d21ac55
A
723 interruptable = nmp->nm_flag & NFSMNT_INT;
724 lck_mtx_unlock(&nmp->nm_lock);
55e303ae 725
2d21ac55 726 lck_mtx_lock(nfs_lock_mutex);
55e303ae 727
e5568f75
A
728 /* allocate unique xid */
729 msg->lm_xid = nfs_lockxid_get();
730 nfs_lockdmsg_enqueue(&msgreq);
731
2d21ac55 732 timeo = 2;
55e303ae 733
2d21ac55
A
734 for (;;) {
735 nfs_lockd_request_sent = 1;
55e303ae 736
2d21ac55
A
737 /* need to drop nfs_lock_mutex while calling send_request() */
738 lck_mtx_unlock(nfs_lock_mutex);
739 error = send_request(msg, interruptable);
740 lck_mtx_lock(nfs_lock_mutex);
741 if (error && error != EAGAIN)
55e303ae 742 break;
e5568f75 743
55e303ae 744 /*
e5568f75
A
745 * Always wait for an answer. Not waiting for unlocks could
746 * cause a lock to be left if the unlock request gets dropped.
55e303ae 747 */
55e303ae
A
748
749 /*
e5568f75
A
750 * Retry if it takes too long to get a response.
751 *
752 * The timeout numbers were picked out of thin air... they start
753 * at 2 and double each timeout with a max of 60 seconds.
754 *
755 * In order to maintain responsiveness, we pass a small timeout
2d21ac55 756 * to msleep and calculate the timeouts ourselves. This allows
e5568f75 757 * us to pick up on mount changes quicker.
55e303ae 758 */
e5568f75
A
759wait_for_granted:
760 error = EWOULDBLOCK;
2d21ac55
A
761 ts.tv_sec = 2;
762 ts.tv_nsec = 0;
e5568f75 763 microuptime(&now);
2d21ac55 764 endtime = now.tv_sec + timeo;
e5568f75 765 while (now.tv_sec < endtime) {
2d21ac55
A
766 error = error2 = 0;
767 if (!msgreq.lmr_answered)
768 error = msleep(&msgreq, nfs_lock_mutex, PCATCH | PUSER, "lockd", &ts);
e5568f75 769 if (msgreq.lmr_answered) {
55e303ae 770 /*
e5568f75
A
771 * Note: it's possible to have a lock granted at
772 * essentially the same time that we get interrupted.
773 * Since the lock may be granted, we can't return an
774 * error from this request or we might not unlock the
775 * lock that's been granted.
55e303ae 776 */
2d21ac55
A
777 nmp = VTONMP(vp);
778 if ((msgreq.lmr_errno == ENOTSUP) && nmp &&
779 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
780 /*
781 * We have evidence that locks work, yet lockd
782 * returned ENOTSUP. This is probably because
783 * it was unable to contact the server's lockd
784 * to send it the request.
785 *
786 * Because we know locks work, we'll consider
787 * this failure to be a timeout.
788 */
789 error = EWOULDBLOCK;
790 } else {
791 error = 0;
792 }
e5568f75
A
793 break;
794 }
795 if (error != EWOULDBLOCK)
796 break;
797 /* check that we still have our mount... */
798 /* ...and that we still support locks */
2d21ac55
A
799 nmp = VTONMP(vp);
800 if ((error2 = nfs_sigintr(nmp, NULL, vfs_context_thread(ctx), 0))) {
801 error = error2;
e5568f75 802 if (fl->l_type == F_UNLCK)
2d21ac55
A
803 printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error);
804 break;
805 }
806 lck_mtx_lock(&nmp->nm_lock);
807 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
808 lck_mtx_unlock(&nmp->nm_lock);
e5568f75
A
809 break;
810 }
2d21ac55
A
811 interruptable = nmp->nm_flag & NFSMNT_INT;
812 lck_mtx_unlock(&nmp->nm_lock);
e5568f75
A
813 microuptime(&now);
814 }
815 if (error) {
816 /* check that we still have our mount... */
2d21ac55
A
817 nmp = VTONMP(vp);
818 if ((error2 = nfs_sigintr(nmp, NULL, vfs_context_thread(ctx), 0))) {
819 error = error2;
820 if (error2 != EINTR) {
821 if (fl->l_type == F_UNLCK)
822 printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error);
823 break;
824 }
e5568f75
A
825 }
826 /* ...and that we still support locks */
2d21ac55 827 lck_mtx_lock(&nmp->nm_lock);
e5568f75
A
828 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
829 if (error == EWOULDBLOCK)
91447636 830 error = ENOTSUP;
2d21ac55 831 lck_mtx_unlock(&nmp->nm_lock);
e5568f75
A
832 break;
833 }
2d21ac55 834 interruptable = nmp->nm_flag & NFSMNT_INT;
e5568f75 835 if (error != EWOULDBLOCK) {
2d21ac55 836 lck_mtx_unlock(&nmp->nm_lock);
e5568f75
A
837 /*
838 * We're going to bail on this request.
839 * If we were a blocked lock request, send a cancel.
840 */
841 if ((msgreq.lmr_errno == EINPROGRESS) &&
842 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
843 /* set this request up as a cancel */
844 msg->lm_flags |= LOCKD_MSG_CANCEL;
845 nfs_lockdmsg_dequeue(&msgreq);
846 msg->lm_xid = nfs_lockxid_get();
847 nfs_lockdmsg_enqueue(&msgreq);
848 msgreq.lmr_saved_errno = error;
849 msgreq.lmr_errno = 0;
850 msgreq.lmr_answered = 0;
851 /* reset timeout */
2d21ac55 852 timeo = 2;
e5568f75
A
853 /* send cancel request */
854 continue;
855 }
856 break;
857 }
858
e5568f75
A
859 /* warn if we're not getting any response */
860 microuptime(&now);
861 if ((msgreq.lmr_errno != EINPROGRESS) &&
862 (nmp->nm_tprintf_initial_delay != 0) &&
863 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
2d21ac55 864 lck_mtx_unlock(&nmp->nm_lock);
e5568f75 865 lastmsg = now.tv_sec;
2d21ac55 866 nfs_down(nmp, vfs_context_thread(ctx), 0, NFSSTA_LOCKTIMEO, "lockd not responding");
e5568f75 867 wentdown = 1;
2d21ac55
A
868 } else
869 lck_mtx_unlock(&nmp->nm_lock);
870
e5568f75
A
871 if (msgreq.lmr_errno == EINPROGRESS) {
872 /*
873 * We've got a blocked lock request that we are
874 * going to retry. First, we'll want to try to
875 * send a cancel for the previous request.
876 *
877 * Clear errno so if we don't get a response
878 * to the resend we'll call nfs_down().
879 * Also reset timeout because we'll expect a
880 * quick response to the cancel/resend (even if
881 * it is NLM_BLOCKED).
882 */
883 msg->lm_flags |= LOCKD_MSG_CANCEL;
884 nfs_lockdmsg_dequeue(&msgreq);
885 msg->lm_xid = nfs_lockxid_get();
886 nfs_lockdmsg_enqueue(&msgreq);
887 msgreq.lmr_saved_errno = msgreq.lmr_errno;
888 msgreq.lmr_errno = 0;
889 msgreq.lmr_answered = 0;
2d21ac55 890 timeo = 2;
e5568f75 891 /* send cancel then resend request */
55e303ae
A
892 continue;
893 }
e5568f75 894 /*
2d21ac55 895 * We timed out, so we will resend the request.
e5568f75 896 */
e5568f75 897 timeo *= 2;
2d21ac55
A
898 if (timeo > 60)
899 timeo = 60;
e5568f75
A
900 /* resend request */
901 continue;
902 }
55e303ae 903
91447636 904 /* we got a reponse, so the server's lockd is OK */
2d21ac55 905 nfs_up(VTONMP(vp), vfs_context_thread(ctx), NFSSTA_LOCKTIMEO,
91447636
A
906 wentdown ? "lockd alive again" : NULL);
907 wentdown = 0;
55e303ae 908
e5568f75
A
909 if (msgreq.lmr_errno == EINPROGRESS) {
910 /* got NLM_BLOCKED response */
911 /* need to wait for NLM_GRANTED */
2d21ac55 912 timeo = 60;
e5568f75
A
913 msgreq.lmr_answered = 0;
914 goto wait_for_granted;
915 }
916
917 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
918 (msgreq.lmr_saved_errno == EINPROGRESS)) {
919 /*
920 * We just got a successful reply to the
921 * cancel of the previous blocked lock request.
922 * Now, go ahead and resend the request.
923 */
924 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
925 nfs_lockdmsg_dequeue(&msgreq);
926 msg->lm_xid = nfs_lockxid_get();
927 nfs_lockdmsg_enqueue(&msgreq);
928 msgreq.lmr_saved_errno = 0;
929 msgreq.lmr_errno = 0;
930 msgreq.lmr_answered = 0;
2d21ac55 931 timeo = 2;
e5568f75
A
932 /* resend request */
933 continue;
934 }
935
936 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
937 if (msg->lm_fl.l_type != F_UNLCK) {
938 fl->l_type = msg->lm_fl.l_type;
939 fl->l_pid = msg->lm_fl.l_pid;
940 fl->l_start = msg->lm_fl.l_start;
941 fl->l_len = msg->lm_fl.l_len;
55e303ae 942 fl->l_whence = SEEK_SET;
2d21ac55 943 } else
55e303ae 944 fl->l_type = F_UNLCK;
55e303ae 945 }
e5568f75
A
946
947 /*
948 * If the blocked lock request was cancelled.
949 * Restore the error condition from when we
950 * originally bailed on the request.
951 */
952 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
953 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
954 error = msgreq.lmr_saved_errno;
955 } else
956 error = msgreq.lmr_errno;
957
2d21ac55
A
958 nmp = VTONMP(vp);
959 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
960 /*
961 * We have NO evidence that locks work and lockd
962 * returned ENOTSUP. Let's take this as a hint
963 * that locks aren't supported and disable them
964 * for this mount.
965 */
966 lck_mtx_lock(&nmp->nm_lock);
967 nmp->nm_flag |= NFSMNT_NOLOCKS;
968 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
969 lck_mtx_unlock(&nmp->nm_lock);
970 printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
971 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
972 }
e5568f75
A
973 if (!error) {
974 /* record that NFS file locking has worked on this mount */
2d21ac55
A
975 if (nmp) {
976 lck_mtx_lock(&nmp->nm_lock);
977 if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
978 nmp->nm_state |= NFSSTA_LOCKSWORK;
979 lck_mtx_unlock(&nmp->nm_lock);
980 }
e5568f75
A
981 /*
982 * If we successfully acquired a lock, make sure this pid
983 * is in the nfs_lock_pid hash table so we know we can't
984 * short-circuit unlock requests.
985 */
986 if ((lockpidcheck == ENOENT) &&
2d21ac55
A
987 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW))) {
988 error = nfs_lock_pid_check(p, 1);
989 if (error) {
990 /*
991 * We couldn't add the pid to the table,
992 * so we can no longer trust that a pid
993 * not in the table has no locks.
994 */
995 nfs_lock_pid_hash_trusted = 0;
996 printf("nfs_vnop_advlock: pid add failed - no longer trusted\n");
997 }
998 }
e5568f75 999 }
55e303ae
A
1000 break;
1001 }
91447636 1002
e5568f75 1003 nfs_lockdmsg_dequeue(&msgreq);
55e303ae 1004
2d21ac55
A
1005 lck_mtx_unlock(nfs_lock_mutex);
1006
1007 return (error);
55e303ae
A
1008}
1009
1010/*
1011 * nfslockdans --
1012 * NFS advisory byte-level locks answer from the lock daemon.
1013 */
1014int
91447636 1015nfslockdans(proc_t p, struct lockd_ans *ansp)
55e303ae 1016{
e5568f75 1017 LOCKD_MSG_REQUEST *msgreq;
55e303ae
A
1018 int error;
1019
91447636
A
1020 /* Let root make this call. */
1021 error = proc_suser(p);
1022 if (error)
55e303ae
A
1023 return (error);
1024
1025 /* the version should match, or we're out of sync */
e5568f75 1026 if (ansp->la_version != LOCKD_ANS_VERSION)
55e303ae
A
1027 return (EINVAL);
1028
2d21ac55
A
1029 lck_mtx_lock(nfs_lock_mutex);
1030
e5568f75
A
1031 /* try to find the lockd message by transaction id (cookie) */
1032 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
1033 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
1034 /*
1035 * We can't depend on the granted message having our cookie,
1036 * so we check the answer against the lockd message found.
1037 * If no message was found or it doesn't match the answer,
1038 * we look for the lockd message by the answer's lock info.
1039 */
1040 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
1041 msgreq = nfs_lockdmsg_find_by_answer(ansp);
1042 /*
1043 * We need to make sure this request isn't being cancelled
1044 * If it is, we don't want to accept the granted message.
1045 */
1046 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
1047 msgreq = NULL;
55e303ae 1048 }
2d21ac55
A
1049 if (!msgreq) {
1050 lck_mtx_unlock(nfs_lock_mutex);
55e303ae 1051 return (EPIPE);
2d21ac55 1052 }
55e303ae 1053
e5568f75
A
1054 msgreq->lmr_errno = ansp->la_errno;
1055 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
1056 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
1057 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
1058 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
1059 else
1060 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
1061 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
1062 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
1063 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
1064 } else {
1065 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
1066 }
1067 }
55e303ae 1068
e5568f75 1069 msgreq->lmr_answered = 1;
2d21ac55
A
1070 lck_mtx_unlock(nfs_lock_mutex);
1071 wakeup(msgreq);
55e303ae
A
1072
1073 return (0);
1074}
1075