]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2002-2014 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /*- | |
29 | * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. | |
30 | * | |
31 | * Redistribution and use in source and binary forms, with or without | |
32 | * modification, are permitted provided that the following conditions | |
33 | * are met: | |
34 | * 1. Redistributions of source code must retain the above copyright | |
35 | * notice, this list of conditions and the following disclaimer. | |
36 | * 2. Redistributions in binary form must reproduce the above copyright | |
37 | * notice, this list of conditions and the following disclaimer in the | |
38 | * documentation and/or other materials provided with the distribution. | |
39 | * 3. Berkeley Software Design Inc's name may not be used to endorse or | |
40 | * promote products derived from this software without specific prior | |
41 | * written permission. | |
42 | * | |
43 | * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND | |
44 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
45 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
46 | * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE | |
47 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
48 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
49 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
50 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
51 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
52 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
53 | * SUCH DAMAGE. | |
54 | * | |
55 | * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp | |
56 | */ | |
57 | ||
58 | #include <sys/cdefs.h> | |
59 | #include <sys/param.h> | |
60 | #include <sys/systm.h> | |
61 | #include <sys/fcntl.h> | |
62 | #include <sys/kernel.h> /* for hz */ | |
63 | #include <sys/file_internal.h> | |
64 | #include <sys/malloc.h> | |
65 | #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */ | |
66 | #include <sys/kpi_mbuf.h> | |
67 | #include <sys/mount_internal.h> | |
68 | #include <sys/proc_internal.h> /* for p_start */ | |
69 | #include <sys/kauth.h> | |
70 | #include <sys/resourcevar.h> | |
71 | #include <sys/socket.h> | |
72 | #include <sys/unistd.h> | |
73 | #include <sys/user.h> | |
74 | #include <sys/vnode_internal.h> | |
75 | ||
76 | #include <kern/thread.h> | |
77 | #include <kern/host.h> | |
78 | ||
79 | #include <machine/limits.h> | |
80 | ||
81 | #include <net/if.h> | |
82 | ||
83 | #include <nfs/rpcv2.h> | |
84 | #include <nfs/nfsproto.h> | |
85 | #include <nfs/nfs.h> | |
86 | #include <nfs/nfs_gss.h> | |
87 | #include <nfs/nfsmount.h> | |
88 | #include <nfs/nfsnode.h> | |
89 | #include <nfs/nfs_lock.h> | |
90 | ||
91 | #include <mach/host_priv.h> | |
92 | #include <mach/mig_errors.h> | |
93 | #include <mach/host_special_ports.h> | |
94 | #include <lockd/lockd_mach.h> | |
95 | ||
96 | extern void ipc_port_release_send(ipc_port_t); | |
97 | ||
98 | /* | |
99 | * pending lock request messages are kept in this queue which is | |
100 | * kept sorted by transaction ID (xid). | |
101 | */ | |
102 | static uint64_t nfs_lockxid = 0; | |
103 | static LOCKD_MSG_QUEUE nfs_pendlockq; | |
104 | ||
105 | /* list of mounts that are (potentially) making lockd requests */ | |
106 | TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list; | |
107 | ||
108 | static lck_grp_t *nfs_lock_lck_grp; | |
109 | static lck_mtx_t *nfs_lock_mutex; | |
110 | ||
111 | void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *); | |
112 | void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *); | |
113 | int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *); | |
114 | LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *); | |
115 | LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t); | |
116 | uint64_t nfs_lockxid_get(void); | |
117 | int nfs_lockd_send_request(LOCKD_MSG *, int); | |
118 | ||
119 | /* | |
120 | * initialize global nfs lock state | |
121 | */ | |
122 | void | |
123 | nfs_lockinit(void) | |
124 | { | |
125 | TAILQ_INIT(&nfs_pendlockq); | |
126 | TAILQ_INIT(&nfs_lockd_mount_list); | |
127 | ||
128 | nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL); | |
129 | nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL); | |
130 | } | |
131 | ||
132 | /* | |
133 | * Register a mount as (potentially) making lockd requests. | |
134 | */ | |
135 | void | |
136 | nfs_lockd_mount_register(struct nfsmount *nmp) | |
137 | { | |
138 | lck_mtx_lock(nfs_lock_mutex); | |
139 | TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink); | |
140 | nfs_lockd_mounts++; | |
141 | lck_mtx_unlock(nfs_lock_mutex); | |
142 | } | |
143 | ||
144 | /* | |
145 | * Unregister a mount as (potentially) making lockd requests. | |
146 | * | |
147 | * When the lockd mount count drops to zero, then send a shutdown request to | |
148 | * lockd if we've sent any requests to it. | |
149 | */ | |
150 | void | |
151 | nfs_lockd_mount_unregister(struct nfsmount *nmp) | |
152 | { | |
153 | int send_shutdown; | |
154 | mach_port_t lockd_port = IPC_PORT_NULL; | |
155 | kern_return_t kr; | |
156 | ||
157 | lck_mtx_lock(nfs_lock_mutex); | |
158 | if (nmp->nm_ldlink.tqe_next == NFSNOLIST) { | |
159 | lck_mtx_unlock(nfs_lock_mutex); | |
160 | return; | |
161 | } | |
162 | ||
163 | TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink); | |
164 | nmp->nm_ldlink.tqe_next = NFSNOLIST; | |
165 | ||
166 | nfs_lockd_mounts--; | |
167 | ||
168 | /* send a shutdown request if there are no more lockd mounts */ | |
169 | send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent); | |
170 | if (send_shutdown) | |
171 | nfs_lockd_request_sent = 0; | |
172 | ||
173 | lck_mtx_unlock(nfs_lock_mutex); | |
174 | ||
175 | if (!send_shutdown) | |
176 | return; | |
177 | ||
178 | /* | |
179 | * Let lockd know that it is no longer needed for any NFS mounts | |
180 | */ | |
181 | kr = host_get_lockd_port(host_priv_self(), &lockd_port); | |
182 | if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) { | |
183 | printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n", | |
184 | kr, (lockd_port == IPC_PORT_NULL) ? "NULL" : | |
185 | (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID"); | |
186 | return; | |
187 | } | |
188 | ||
189 | kr = lockd_shutdown(lockd_port); | |
190 | if (kr != KERN_SUCCESS) | |
191 | printf("nfs_lockd_mount_change: shutdown %d\n", kr); | |
192 | ||
193 | ipc_port_release_send(lockd_port); | |
194 | } | |
195 | ||
196 | /* | |
197 | * insert a lock request message into the pending queue | |
198 | * (nfs_lock_mutex must be held) | |
199 | */ | |
200 | void | |
201 | nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq) | |
202 | { | |
203 | LOCKD_MSG_REQUEST *mr; | |
204 | ||
205 | mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue); | |
206 | if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { | |
207 | /* fast path: empty queue or new largest xid */ | |
208 | TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next); | |
209 | return; | |
210 | } | |
211 | /* slow path: need to walk list to find insertion point */ | |
212 | while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { | |
213 | mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next); | |
214 | } | |
215 | if (mr) { | |
216 | TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next); | |
217 | } else { | |
218 | TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next); | |
219 | } | |
220 | } | |
221 | ||
222 | /* | |
223 | * remove a lock request message from the pending queue | |
224 | * (nfs_lock_mutex must be held) | |
225 | */ | |
226 | void | |
227 | nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq) | |
228 | { | |
229 | TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next); | |
230 | } | |
231 | ||
232 | /* | |
233 | * find a pending lock request message by xid | |
234 | * | |
235 | * We search from the head of the list assuming that the message we're | |
236 | * looking for is for an older request (because we have an answer to it). | |
237 | * This assumes that lock request will be answered primarily in FIFO order. | |
238 | * However, this may not be the case if there are blocked requests. We may | |
239 | * want to move blocked requests to a separate queue (but that'll complicate | |
240 | * duplicate xid checking). | |
241 | * | |
242 | * (nfs_lock_mutex must be held) | |
243 | */ | |
244 | LOCKD_MSG_REQUEST * | |
245 | nfs_lockdmsg_find_by_xid(uint64_t lockxid) | |
246 | { | |
247 | LOCKD_MSG_REQUEST *mr; | |
248 | ||
249 | TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { | |
250 | if (mr->lmr_msg.lm_xid == lockxid) | |
251 | return mr; | |
252 | if (mr->lmr_msg.lm_xid > lockxid) | |
253 | return NULL; | |
254 | } | |
255 | return mr; | |
256 | } | |
257 | ||
258 | /* | |
259 | * Because we can't depend on nlm_granted messages containing the same | |
260 | * cookie we sent with the original lock request, we need code to test | |
261 | * if an nlm_granted answer matches the lock request. We also need code | |
262 | * that can find a lockd message based solely on the nlm_granted answer. | |
263 | */ | |
264 | ||
265 | /* | |
266 | * compare lockd message to answer | |
267 | * | |
268 | * returns 0 on equality and 1 if different | |
269 | */ | |
270 | int | |
271 | nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp) | |
272 | { | |
273 | if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) | |
274 | return 1; | |
275 | if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) | |
276 | return 1; | |
277 | if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) | |
278 | return 1; | |
279 | if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) | |
280 | return 1; | |
281 | if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) | |
282 | return 1; | |
283 | if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) | |
284 | return 1; | |
285 | return 0; | |
286 | } | |
287 | ||
288 | /* | |
289 | * find a pending lock request message based on the lock info provided | |
290 | * in the lockd_ans/nlm_granted data. We need this because we can't | |
291 | * depend on nlm_granted messages containing the same cookie we sent | |
292 | * with the original lock request. | |
293 | * | |
294 | * We search from the head of the list assuming that the message we're | |
295 | * looking for is for an older request (because we have an answer to it). | |
296 | * This assumes that lock request will be answered primarily in FIFO order. | |
297 | * However, this may not be the case if there are blocked requests. We may | |
298 | * want to move blocked requests to a separate queue (but that'll complicate | |
299 | * duplicate xid checking). | |
300 | * | |
301 | * (nfs_lock_mutex must be held) | |
302 | */ | |
303 | LOCKD_MSG_REQUEST * | |
304 | nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp) | |
305 | { | |
306 | LOCKD_MSG_REQUEST *mr; | |
307 | ||
308 | if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) | |
309 | return NULL; | |
310 | TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { | |
311 | if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) | |
312 | break; | |
313 | } | |
314 | return mr; | |
315 | } | |
316 | ||
317 | /* | |
318 | * return the next unique lock request transaction ID | |
319 | * (nfs_lock_mutex must be held) | |
320 | */ | |
321 | uint64_t | |
322 | nfs_lockxid_get(void) | |
323 | { | |
324 | LOCKD_MSG_REQUEST *mr; | |
325 | ||
326 | /* derive initial lock xid from system time */ | |
327 | if (!nfs_lockxid) { | |
328 | /* | |
329 | * Note: it's OK if this code inits nfs_lockxid to 0 (for example, | |
330 | * due to a broken clock) because we immediately increment it | |
331 | * and we guarantee to never use xid 0. So, nfs_lockxid should only | |
332 | * ever be 0 the first time this function is called. | |
333 | */ | |
334 | struct timeval tv; | |
335 | microtime(&tv); | |
336 | nfs_lockxid = (uint64_t)tv.tv_sec << 12; | |
337 | } | |
338 | ||
339 | /* make sure we get a unique xid */ | |
340 | do { | |
341 | /* Skip zero xid if it should ever happen. */ | |
342 | if (++nfs_lockxid == 0) | |
343 | nfs_lockxid++; | |
344 | if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) || | |
345 | (mr->lmr_msg.lm_xid < nfs_lockxid)) { | |
346 | /* fast path: empty queue or new largest xid */ | |
347 | break; | |
348 | } | |
349 | /* check if xid is already in use */ | |
350 | } while (nfs_lockdmsg_find_by_xid(nfs_lockxid)); | |
351 | ||
352 | return nfs_lockxid; | |
353 | } | |
354 | ||
355 | #define MACH_MAX_TRIES 3 | |
356 | ||
357 | int | |
358 | nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable) | |
359 | { | |
360 | kern_return_t kr; | |
361 | int retries = 0; | |
362 | mach_port_t lockd_port = IPC_PORT_NULL; | |
363 | ||
364 | kr = host_get_lockd_port(host_priv_self(), &lockd_port); | |
365 | if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) | |
366 | return (ENOTSUP); | |
367 | ||
368 | do { | |
369 | /* In the kernel all mach messaging is interruptable */ | |
370 | do { | |
371 | kr = lockd_request( | |
372 | lockd_port, | |
373 | msg->lm_version, | |
374 | msg->lm_flags, | |
375 | msg->lm_xid, | |
376 | msg->lm_fl.l_start, | |
377 | msg->lm_fl.l_len, | |
378 | msg->lm_fl.l_pid, | |
379 | msg->lm_fl.l_type, | |
380 | msg->lm_fl.l_whence, | |
381 | (uint32_t *)&msg->lm_addr, | |
382 | (uint32_t *)&msg->lm_cred, | |
383 | msg->lm_fh_len, | |
384 | msg->lm_fh); | |
385 | if (kr != KERN_SUCCESS) | |
386 | printf("lockd_request received %d!\n", kr); | |
387 | } while (!interruptable && kr == MACH_SEND_INTERRUPTED); | |
388 | } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES); | |
389 | ||
390 | ipc_port_release_send(lockd_port); | |
391 | switch (kr) { | |
392 | case MACH_SEND_INTERRUPTED: | |
393 | return (EINTR); | |
394 | default: | |
395 | /* | |
396 | * Other MACH or MIG errors we will retry. Eventually | |
397 | * we will call nfs_down and allow the user to disable | |
398 | * locking. | |
399 | */ | |
400 | return (EAGAIN); | |
401 | } | |
402 | return (kr); | |
403 | } | |
404 | ||
405 | ||
406 | /* | |
407 | * NFS advisory byte-level locks (client) | |
408 | */ | |
409 | int | |
410 | nfs3_lockd_request( | |
411 | nfsnode_t np, | |
412 | int type, | |
413 | LOCKD_MSG_REQUEST *msgreq, | |
414 | int flags, | |
415 | thread_t thd) | |
416 | { | |
417 | LOCKD_MSG *msg = &msgreq->lmr_msg; | |
418 | int error, error2; | |
419 | int interruptable, slpflag; | |
420 | struct nfsmount *nmp; | |
421 | struct timeval now; | |
422 | int timeo, starttime, endtime, lastmsg, wentdown = 0; | |
423 | struct timespec ts; | |
424 | struct sockaddr *saddr; | |
425 | ||
426 | nmp = NFSTONMP(np); | |
427 | if (!nmp || !nmp->nm_saddr) | |
428 | return (ENXIO); | |
429 | ||
430 | lck_mtx_lock(&nmp->nm_lock); | |
431 | saddr = nmp->nm_saddr; | |
432 | bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len)); | |
433 | if (nmp->nm_vers == NFS_VER3) | |
434 | msg->lm_flags |= LOCKD_MSG_NFSV3; | |
435 | ||
436 | if (nmp->nm_sotype != SOCK_DGRAM) | |
437 | msg->lm_flags |= LOCKD_MSG_TCP; | |
438 | ||
439 | microuptime(&now); | |
440 | starttime = now.tv_sec; | |
441 | lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); | |
442 | interruptable = NMFLAG(nmp, INTR); | |
443 | lck_mtx_unlock(&nmp->nm_lock); | |
444 | ||
445 | lck_mtx_lock(nfs_lock_mutex); | |
446 | ||
447 | /* allocate unique xid */ | |
448 | msg->lm_xid = nfs_lockxid_get(); | |
449 | nfs_lockdmsg_enqueue(msgreq); | |
450 | ||
451 | timeo = 4; | |
452 | ||
453 | for (;;) { | |
454 | nfs_lockd_request_sent = 1; | |
455 | ||
456 | /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */ | |
457 | lck_mtx_unlock(nfs_lock_mutex); | |
458 | error = nfs_lockd_send_request(msg, interruptable); | |
459 | lck_mtx_lock(nfs_lock_mutex); | |
460 | if (error && error != EAGAIN) | |
461 | break; | |
462 | ||
463 | /* | |
464 | * Always wait for an answer. Not waiting for unlocks could | |
465 | * cause a lock to be left if the unlock request gets dropped. | |
466 | */ | |
467 | ||
468 | /* | |
469 | * Retry if it takes too long to get a response. | |
470 | * | |
471 | * The timeout numbers were picked out of thin air... they start | |
472 | * at 4 and double each timeout with a max of 30 seconds. | |
473 | * | |
474 | * In order to maintain responsiveness, we pass a small timeout | |
475 | * to msleep and calculate the timeouts ourselves. This allows | |
476 | * us to pick up on mount changes quicker. | |
477 | */ | |
478 | wait_for_granted: | |
479 | error = EWOULDBLOCK; | |
480 | slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0; | |
481 | ts.tv_sec = 2; | |
482 | ts.tv_nsec = 0; | |
483 | microuptime(&now); | |
484 | endtime = now.tv_sec + timeo; | |
485 | while (now.tv_sec < endtime) { | |
486 | error = error2 = 0; | |
487 | if (!msgreq->lmr_answered) { | |
488 | error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts); | |
489 | slpflag = 0; | |
490 | } | |
491 | if (msgreq->lmr_answered) { | |
492 | /* | |
493 | * Note: it's possible to have a lock granted at | |
494 | * essentially the same time that we get interrupted. | |
495 | * Since the lock may be granted, we can't return an | |
496 | * error from this request or we might not unlock the | |
497 | * lock that's been granted. | |
498 | */ | |
499 | nmp = NFSTONMP(np); | |
500 | if ((msgreq->lmr_errno == ENOTSUP) && nmp && | |
501 | (nmp->nm_state & NFSSTA_LOCKSWORK)) { | |
502 | /* | |
503 | * We have evidence that locks work, yet lockd | |
504 | * returned ENOTSUP. This is probably because | |
505 | * it was unable to contact the server's lockd | |
506 | * to send it the request. | |
507 | * | |
508 | * Because we know locks work, we'll consider | |
509 | * this failure to be a timeout. | |
510 | */ | |
511 | error = EWOULDBLOCK; | |
512 | } else { | |
513 | error = 0; | |
514 | } | |
515 | break; | |
516 | } | |
517 | if (error != EWOULDBLOCK) | |
518 | break; | |
519 | /* check that we still have our mount... */ | |
520 | /* ...and that we still support locks */ | |
521 | /* ...and that there isn't a recovery pending */ | |
522 | nmp = NFSTONMP(np); | |
523 | if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) { | |
524 | error = error2; | |
525 | if (type == F_UNLCK) | |
526 | printf("nfs3_lockd_request: aborting unlock request, error %d\n", error); | |
527 | break; | |
528 | } | |
529 | lck_mtx_lock(&nmp->nm_lock); | |
530 | if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) { | |
531 | lck_mtx_unlock(&nmp->nm_lock); | |
532 | break; | |
533 | } | |
534 | if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { | |
535 | /* recovery pending... return an error that'll get this operation restarted */ | |
536 | error = NFSERR_GRACE; | |
537 | lck_mtx_unlock(&nmp->nm_lock); | |
538 | break; | |
539 | } | |
540 | interruptable = NMFLAG(nmp, INTR); | |
541 | lck_mtx_unlock(&nmp->nm_lock); | |
542 | microuptime(&now); | |
543 | } | |
544 | if (error) { | |
545 | /* check that we still have our mount... */ | |
546 | nmp = NFSTONMP(np); | |
547 | if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) { | |
548 | error = error2; | |
549 | if (error2 != EINTR) { | |
550 | if (type == F_UNLCK) | |
551 | printf("nfs3_lockd_request: aborting unlock request, error %d\n", error); | |
552 | break; | |
553 | } | |
554 | } | |
555 | /* ...and that we still support locks */ | |
556 | lck_mtx_lock(&nmp->nm_lock); | |
557 | if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) { | |
558 | if (error == EWOULDBLOCK) | |
559 | error = ENOTSUP; | |
560 | lck_mtx_unlock(&nmp->nm_lock); | |
561 | break; | |
562 | } | |
563 | /* ...and that there isn't a recovery pending */ | |
564 | if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { | |
565 | /* recovery pending... return to allow recovery to occur */ | |
566 | error = NFSERR_DENIED; | |
567 | lck_mtx_unlock(&nmp->nm_lock); | |
568 | break; | |
569 | } | |
570 | interruptable = NMFLAG(nmp, INTR); | |
571 | if ((error != EWOULDBLOCK) || | |
572 | ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) || | |
573 | ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) { | |
574 | if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) { | |
575 | /* give up if this is for recovery and taking too long */ | |
576 | error = ETIMEDOUT; | |
577 | } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { | |
578 | /* recovery pending... return an error that'll get this operation restarted */ | |
579 | error = NFSERR_GRACE; | |
580 | } | |
581 | lck_mtx_unlock(&nmp->nm_lock); | |
582 | /* | |
583 | * We're going to bail on this request. | |
584 | * If we were a blocked lock request, send a cancel. | |
585 | */ | |
586 | if ((msgreq->lmr_errno == EINPROGRESS) && | |
587 | !(msg->lm_flags & LOCKD_MSG_CANCEL)) { | |
588 | /* set this request up as a cancel */ | |
589 | msg->lm_flags |= LOCKD_MSG_CANCEL; | |
590 | nfs_lockdmsg_dequeue(msgreq); | |
591 | msg->lm_xid = nfs_lockxid_get(); | |
592 | nfs_lockdmsg_enqueue(msgreq); | |
593 | msgreq->lmr_saved_errno = error; | |
594 | msgreq->lmr_errno = 0; | |
595 | msgreq->lmr_answered = 0; | |
596 | /* reset timeout */ | |
597 | timeo = 2; | |
598 | /* send cancel request */ | |
599 | continue; | |
600 | } | |
601 | break; | |
602 | } | |
603 | ||
604 | /* warn if we're not getting any response */ | |
605 | microuptime(&now); | |
606 | if ((msgreq->lmr_errno != EINPROGRESS) && | |
607 | !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) && | |
608 | (nmp->nm_tprintf_initial_delay != 0) && | |
609 | ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { | |
610 | lck_mtx_unlock(&nmp->nm_lock); | |
611 | lastmsg = now.tv_sec; | |
612 | nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 0); | |
613 | wentdown = 1; | |
614 | } else | |
615 | lck_mtx_unlock(&nmp->nm_lock); | |
616 | ||
617 | if (msgreq->lmr_errno == EINPROGRESS) { | |
618 | /* | |
619 | * We've got a blocked lock request that we are | |
620 | * going to retry. First, we'll want to try to | |
621 | * send a cancel for the previous request. | |
622 | * | |
623 | * Clear errno so if we don't get a response | |
624 | * to the resend we'll call nfs_down(). | |
625 | * Also reset timeout because we'll expect a | |
626 | * quick response to the cancel/resend (even if | |
627 | * it is NLM_BLOCKED). | |
628 | */ | |
629 | msg->lm_flags |= LOCKD_MSG_CANCEL; | |
630 | nfs_lockdmsg_dequeue(msgreq); | |
631 | msg->lm_xid = nfs_lockxid_get(); | |
632 | nfs_lockdmsg_enqueue(msgreq); | |
633 | msgreq->lmr_saved_errno = msgreq->lmr_errno; | |
634 | msgreq->lmr_errno = 0; | |
635 | msgreq->lmr_answered = 0; | |
636 | timeo = 2; | |
637 | /* send cancel then resend request */ | |
638 | continue; | |
639 | } | |
640 | ||
641 | /* | |
642 | * We timed out, so we will resend the request. | |
643 | */ | |
644 | if (!(flags & R_RECOVER)) | |
645 | timeo *= 2; | |
646 | if (timeo > 30) | |
647 | timeo = 30; | |
648 | /* resend request */ | |
649 | continue; | |
650 | } | |
651 | ||
652 | /* we got a reponse, so the server's lockd is OK */ | |
653 | nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO, | |
654 | wentdown ? "lockd alive again" : NULL); | |
655 | wentdown = 0; | |
656 | ||
657 | if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) { | |
658 | /* | |
659 | * The lock request was denied because the server lockd is | |
660 | * still in its grace period. So, we need to try the | |
661 | * request again in a little bit. Return the GRACE error so | |
662 | * the higher levels can perform the retry. | |
663 | */ | |
664 | msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE; | |
665 | } | |
666 | ||
667 | if (msgreq->lmr_errno == EINPROGRESS) { | |
668 | /* got NLM_BLOCKED response */ | |
669 | /* need to wait for NLM_GRANTED */ | |
670 | timeo = 30; | |
671 | msgreq->lmr_answered = 0; | |
672 | goto wait_for_granted; | |
673 | } | |
674 | ||
675 | if ((msg->lm_flags & LOCKD_MSG_CANCEL) && | |
676 | (msgreq->lmr_saved_errno == EINPROGRESS)) { | |
677 | /* | |
678 | * We just got a successful reply to the | |
679 | * cancel of the previous blocked lock request. | |
680 | * Now, go ahead and return a DENIED error so the | |
681 | * higher levels can resend the request. | |
682 | */ | |
683 | msg->lm_flags &= ~LOCKD_MSG_CANCEL; | |
684 | nfs_lockdmsg_dequeue(msgreq); | |
685 | error = NFSERR_DENIED; | |
686 | break; | |
687 | } | |
688 | ||
689 | /* | |
690 | * If the blocked lock request was cancelled. | |
691 | * Restore the error condition from when we | |
692 | * originally bailed on the request. | |
693 | */ | |
694 | if (msg->lm_flags & LOCKD_MSG_CANCEL) { | |
695 | msg->lm_flags &= ~LOCKD_MSG_CANCEL; | |
696 | error = msgreq->lmr_saved_errno; | |
697 | } else { | |
698 | error = msgreq->lmr_errno; | |
699 | } | |
700 | ||
701 | nmp = NFSTONMP(np); | |
702 | if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) { | |
703 | /* | |
704 | * We have NO evidence that locks work and lockd | |
705 | * returned ENOTSUP. Let's take this as a hint | |
706 | * that locks aren't supported and disable them | |
707 | * for this mount. | |
708 | */ | |
709 | nfs_lockdmsg_dequeue(msgreq); | |
710 | lck_mtx_unlock(nfs_lock_mutex); | |
711 | lck_mtx_lock(&nmp->nm_lock); | |
712 | if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) { | |
713 | nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED; | |
714 | nfs_lockd_mount_unregister(nmp); | |
715 | } | |
716 | nmp->nm_state &= ~NFSSTA_LOCKTIMEO; | |
717 | lck_mtx_unlock(&nmp->nm_lock); | |
718 | printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n", | |
719 | vfs_statfs(nmp->nm_mountp)->f_mntfromname); | |
720 | return (error); | |
721 | } | |
722 | if (!error) { | |
723 | /* record that NFS file locking has worked on this mount */ | |
724 | if (nmp) { | |
725 | lck_mtx_lock(&nmp->nm_lock); | |
726 | if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) | |
727 | nmp->nm_state |= NFSSTA_LOCKSWORK; | |
728 | lck_mtx_unlock(&nmp->nm_lock); | |
729 | } | |
730 | } | |
731 | break; | |
732 | } | |
733 | ||
734 | nfs_lockdmsg_dequeue(msgreq); | |
735 | ||
736 | lck_mtx_unlock(nfs_lock_mutex); | |
737 | ||
738 | return (error); | |
739 | } | |
740 | ||
741 | /* | |
742 | * Send an NLM LOCK message to the server | |
743 | */ | |
744 | int | |
745 | nfs3_setlock_rpc( | |
746 | nfsnode_t np, | |
747 | struct nfs_open_file *nofp, | |
748 | struct nfs_file_lock *nflp, | |
749 | int reclaim, | |
750 | int flags, | |
751 | thread_t thd, | |
752 | kauth_cred_t cred) | |
753 | { | |
754 | struct nfs_lock_owner *nlop = nflp->nfl_owner; | |
755 | struct nfsmount *nmp; | |
756 | int error; | |
757 | LOCKD_MSG_REQUEST msgreq; | |
758 | LOCKD_MSG *msg; | |
759 | ||
760 | nmp = NFSTONMP(np); | |
761 | if (nfs_mount_gone(nmp)) | |
762 | return (ENXIO); | |
763 | ||
764 | if (!nlop->nlo_open_owner) { | |
765 | nfs_open_owner_ref(nofp->nof_owner); | |
766 | nlop->nlo_open_owner = nofp->nof_owner; | |
767 | } | |
768 | if ((error = nfs_lock_owner_set_busy(nlop, thd))) | |
769 | return (error); | |
770 | ||
771 | /* set up lock message request structure */ | |
772 | bzero(&msgreq, sizeof(msgreq)); | |
773 | msg = &msgreq.lmr_msg; | |
774 | msg->lm_version = LOCKD_MSG_VERSION; | |
775 | if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) | |
776 | msg->lm_flags |= LOCKD_MSG_BLOCK; | |
777 | if (reclaim) | |
778 | msg->lm_flags |= LOCKD_MSG_RECLAIM; | |
779 | msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; | |
780 | bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); | |
781 | cru2x(cred, &msg->lm_cred); | |
782 | ||
783 | msg->lm_fl.l_whence = SEEK_SET; | |
784 | msg->lm_fl.l_start = nflp->nfl_start; | |
785 | msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end); | |
786 | msg->lm_fl.l_type = nflp->nfl_type; | |
787 | msg->lm_fl.l_pid = nlop->nlo_pid; | |
788 | ||
789 | error = nfs3_lockd_request(np, 0, &msgreq, flags, thd); | |
790 | ||
791 | nfs_lock_owner_clear_busy(nlop); | |
792 | return (error); | |
793 | } | |
794 | ||
795 | /* | |
796 | * Send an NLM UNLOCK message to the server | |
797 | */ | |
798 | int | |
799 | nfs3_unlock_rpc( | |
800 | nfsnode_t np, | |
801 | struct nfs_lock_owner *nlop, | |
802 | __unused int type, | |
803 | uint64_t start, | |
804 | uint64_t end, | |
805 | int flags, | |
806 | thread_t thd, | |
807 | kauth_cred_t cred) | |
808 | { | |
809 | struct nfsmount *nmp; | |
810 | LOCKD_MSG_REQUEST msgreq; | |
811 | LOCKD_MSG *msg; | |
812 | ||
813 | nmp = NFSTONMP(np); | |
814 | if (!nmp) | |
815 | return (ENXIO); | |
816 | ||
817 | /* set up lock message request structure */ | |
818 | bzero(&msgreq, sizeof(msgreq)); | |
819 | msg = &msgreq.lmr_msg; | |
820 | msg->lm_version = LOCKD_MSG_VERSION; | |
821 | msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; | |
822 | bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); | |
823 | cru2x(cred, &msg->lm_cred); | |
824 | ||
825 | msg->lm_fl.l_whence = SEEK_SET; | |
826 | msg->lm_fl.l_start = start; | |
827 | msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end); | |
828 | msg->lm_fl.l_type = F_UNLCK; | |
829 | msg->lm_fl.l_pid = nlop->nlo_pid; | |
830 | ||
831 | return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd)); | |
832 | } | |
833 | ||
834 | /* | |
835 | * Send an NLM LOCK TEST message to the server | |
836 | */ | |
837 | int | |
838 | nfs3_getlock_rpc( | |
839 | nfsnode_t np, | |
840 | struct nfs_lock_owner *nlop, | |
841 | struct flock *fl, | |
842 | uint64_t start, | |
843 | uint64_t end, | |
844 | vfs_context_t ctx) | |
845 | { | |
846 | struct nfsmount *nmp; | |
847 | int error; | |
848 | LOCKD_MSG_REQUEST msgreq; | |
849 | LOCKD_MSG *msg; | |
850 | ||
851 | nmp = NFSTONMP(np); | |
852 | if (nfs_mount_gone(nmp)) | |
853 | return (ENXIO); | |
854 | ||
855 | /* set up lock message request structure */ | |
856 | bzero(&msgreq, sizeof(msgreq)); | |
857 | msg = &msgreq.lmr_msg; | |
858 | msg->lm_version = LOCKD_MSG_VERSION; | |
859 | msg->lm_flags |= LOCKD_MSG_TEST; | |
860 | msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; | |
861 | bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); | |
862 | cru2x(vfs_context_ucred(ctx), &msg->lm_cred); | |
863 | ||
864 | msg->lm_fl.l_whence = SEEK_SET; | |
865 | msg->lm_fl.l_start = start; | |
866 | msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end); | |
867 | msg->lm_fl.l_type = fl->l_type; | |
868 | msg->lm_fl.l_pid = nlop->nlo_pid; | |
869 | ||
870 | error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx)); | |
871 | ||
872 | if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) { | |
873 | if (msg->lm_fl.l_type != F_UNLCK) { | |
874 | fl->l_type = msg->lm_fl.l_type; | |
875 | fl->l_pid = msg->lm_fl.l_pid; | |
876 | fl->l_start = msg->lm_fl.l_start; | |
877 | fl->l_len = msg->lm_fl.l_len; | |
878 | fl->l_whence = SEEK_SET; | |
879 | } else | |
880 | fl->l_type = F_UNLCK; | |
881 | } | |
882 | ||
883 | return (error); | |
884 | } | |
885 | ||
886 | /* | |
887 | * nfslockdans -- | |
888 | * NFS advisory byte-level locks answer from the lock daemon. | |
889 | */ | |
890 | int | |
891 | nfslockdans(proc_t p, struct lockd_ans *ansp) | |
892 | { | |
893 | LOCKD_MSG_REQUEST *msgreq; | |
894 | int error; | |
895 | ||
896 | /* Let root make this call. */ | |
897 | error = proc_suser(p); | |
898 | if (error) | |
899 | return (error); | |
900 | ||
901 | /* the version should match, or we're out of sync */ | |
902 | if (ansp->la_version != LOCKD_ANS_VERSION) | |
903 | return (EINVAL); | |
904 | ||
905 | lck_mtx_lock(nfs_lock_mutex); | |
906 | ||
907 | /* try to find the lockd message by transaction id (cookie) */ | |
908 | msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid); | |
909 | if (ansp->la_flags & LOCKD_ANS_GRANTED) { | |
910 | /* | |
911 | * We can't depend on the granted message having our cookie, | |
912 | * so we check the answer against the lockd message found. | |
913 | * If no message was found or it doesn't match the answer, | |
914 | * we look for the lockd message by the answer's lock info. | |
915 | */ | |
916 | if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) | |
917 | msgreq = nfs_lockdmsg_find_by_answer(ansp); | |
918 | /* | |
919 | * We need to make sure this request isn't being cancelled | |
920 | * If it is, we don't want to accept the granted message. | |
921 | */ | |
922 | if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) | |
923 | msgreq = NULL; | |
924 | } | |
925 | if (!msgreq) { | |
926 | lck_mtx_unlock(nfs_lock_mutex); | |
927 | return (EPIPE); | |
928 | } | |
929 | ||
930 | msgreq->lmr_errno = ansp->la_errno; | |
931 | if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) { | |
932 | if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) { | |
933 | if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) | |
934 | msgreq->lmr_msg.lm_fl.l_type = F_WRLCK; | |
935 | else | |
936 | msgreq->lmr_msg.lm_fl.l_type = F_RDLCK; | |
937 | msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid; | |
938 | msgreq->lmr_msg.lm_fl.l_start = ansp->la_start; | |
939 | msgreq->lmr_msg.lm_fl.l_len = ansp->la_len; | |
940 | } else { | |
941 | msgreq->lmr_msg.lm_fl.l_type = F_UNLCK; | |
942 | } | |
943 | } | |
944 | if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) | |
945 | msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE; | |
946 | ||
947 | msgreq->lmr_answered = 1; | |
948 | lck_mtx_unlock(nfs_lock_mutex); | |
949 | wakeup(msgreq); | |
950 | ||
951 | return (0); | |
952 | } | |
953 | ||
954 | /* | |
955 | * nfslockdnotify -- | |
956 | * NFS host restart notification from the lock daemon. | |
957 | * | |
958 | * Used to initiate reclaiming of held locks when a server we | |
959 | * have mounted reboots. | |
960 | */ | |
961 | int | |
962 | nfslockdnotify(proc_t p, user_addr_t argp) | |
963 | { | |
964 | int error, i, headsize; | |
965 | struct lockd_notify ln; | |
966 | struct nfsmount *nmp; | |
967 | struct sockaddr *saddr; | |
968 | ||
969 | /* Let root make this call. */ | |
970 | error = proc_suser(p); | |
971 | if (error) | |
972 | return (error); | |
973 | ||
974 | headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version; | |
975 | error = copyin(argp, &ln, headsize); | |
976 | if (error) | |
977 | return (error); | |
978 | if (ln.ln_version != LOCKD_NOTIFY_VERSION) | |
979 | return (EINVAL); | |
980 | if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) | |
981 | return (EINVAL); | |
982 | argp += headsize; | |
983 | saddr = (struct sockaddr *)&ln.ln_addr[0]; | |
984 | ||
985 | lck_mtx_lock(nfs_lock_mutex); | |
986 | ||
987 | for (i=0; i < ln.ln_addrcount; i++) { | |
988 | error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0])); | |
989 | if (error) | |
990 | break; | |
991 | argp += sizeof(ln.ln_addr[0]); | |
992 | /* scan lockd mount list for match to this address */ | |
993 | TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) { | |
994 | /* check if address matches this mount's server address */ | |
995 | if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) | |
996 | continue; | |
997 | /* We have a match! Mark it as needing recovery. */ | |
998 | lck_mtx_lock(&nmp->nm_lock); | |
999 | nfs_need_recover(nmp, 0); | |
1000 | lck_mtx_unlock(&nmp->nm_lock); | |
1001 | } | |
1002 | } | |
1003 | ||
1004 | lck_mtx_unlock(nfs_lock_mutex); | |
1005 | ||
1006 | return (error); | |
1007 | } | |
1008 |