bsd/nfs/nfs_lock.c

   1 /*
   2  * Copyright (c) 2002-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  40  *    promote products derived from this software without specific prior
  41  *    written permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  *
  55  *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
  56  */
  57
  58 #include <nfs/nfs_conf.h>
  59 #if CONFIG_NFS_CLIENT
  60
  61 #include <sys/cdefs.h>
  62 #include <sys/param.h>
  63 #include <sys/systm.h>
  64 #include <sys/fcntl.h>
  65 #include <sys/kernel.h>         /* for hz */
  66 #include <sys/file_internal.h>
  67 #include <sys/malloc.h>
  68 #include <sys/lockf.h>          /* for hz */ /* Must come after sys/malloc.h */
  69 #include <sys/kpi_mbuf.h>
  70 #include <sys/mount_internal.h>
  71 #include <sys/proc_internal.h>  /* for p_start */
  72 #include <sys/kauth.h>
  73 #include <sys/resourcevar.h>
  74 #include <sys/socket.h>
  75 #include <sys/unistd.h>
  76 #include <sys/user.h>
  77 #include <sys/vnode_internal.h>
  78
  79 #include <kern/thread.h>
  80 #include <kern/host.h>
  81
  82 #include <machine/limits.h>
  83
  84 #include <net/if.h>
  85
  86 #include <nfs/rpcv2.h>
  87 #include <nfs/nfsproto.h>
  88 #include <nfs/nfs.h>
  89 #include <nfs/nfs_gss.h>
  90 #include <nfs/nfsmount.h>
  91 #include <nfs/nfsnode.h>
  92 #include <nfs/nfs_lock.h>
  93
  94 #include <mach/host_priv.h>
  95 #include <mach/mig_errors.h>
  96 #include <mach/host_special_ports.h>
  97 #include <lockd/lockd_mach.h>
  98
  99 extern void ipc_port_release_send(ipc_port_t);
 100
 101 /*
 102  * pending lock request messages are kept in this queue which is
 103  * kept sorted by transaction ID (xid).
 104  */
 105 static uint64_t nfs_lockxid = 0;
 106 static LOCKD_MSG_QUEUE nfs_pendlockq = TAILQ_HEAD_INITIALIZER(nfs_pendlockq);
 107
 108 /* list of mounts that are (potentially) making lockd requests */
 109 TAILQ_HEAD(nfs_lockd_mount_list, nfsmount) nfs_lockd_mount_list =
 110     TAILQ_HEAD_INITIALIZER(nfs_lockd_mount_list);
 111
 112 static LCK_GRP_DECLARE(nfs_lock_lck_grp, "nfs_lock");
 113 static LCK_MTX_DECLARE(nfs_lock_mutex, &nfs_lock_lck_grp);
 114
 115 void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
 116 void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
 117 int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
 118 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
 119 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
 120 uint64_t nfs_lockxid_get(void);
 121 int nfs_lockd_send_request(LOCKD_MSG *, int);
 122
 123 /*
 124  * Register a mount as (potentially) making lockd requests.
 125  */
 126 void
 127 nfs_lockd_mount_register(struct nfsmount *nmp)
 128 {
 129         lck_mtx_lock(&nfs_lock_mutex);
 130         TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
 131         nfs_lockd_mounts++;
 132         lck_mtx_unlock(&nfs_lock_mutex);
 133 }
 134
 135 /*
 136  * Unregister a mount as (potentially) making lockd requests.
 137  *
 138  * When the lockd mount count drops to zero, then send a shutdown request to
 139  * lockd if we've sent any requests to it.
 140  */
 141 void
 142 nfs_lockd_mount_unregister(struct nfsmount *nmp)
 143 {
 144         int send_shutdown;
 145         mach_port_t lockd_port = IPC_PORT_NULL;
 146         kern_return_t kr;
 147
 148         lck_mtx_lock(&nfs_lock_mutex);
 149         if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
 150                 lck_mtx_unlock(&nfs_lock_mutex);
 151                 return;
 152         }
 153
 154         TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
 155         nmp->nm_ldlink.tqe_next = NFSNOLIST;
 156
 157         nfs_lockd_mounts--;
 158
 159         /* send a shutdown request if there are no more lockd mounts */
 160         send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
 161         if (send_shutdown) {
 162                 nfs_lockd_request_sent = 0;
 163         }
 164
 165         lck_mtx_unlock(&nfs_lock_mutex);
 166
 167         if (!send_shutdown) {
 168                 return;
 169         }
 170
 171         /*
 172          * Let lockd know that it is no longer needed for any NFS mounts
 173          */
 174         kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 175         if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
 176                 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
 177                     kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
 178                     (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
 179                 return;
 180         }
 181
 182         kr = lockd_shutdown(lockd_port);
 183         if (kr != KERN_SUCCESS) {
 184                 printf("nfs_lockd_mount_change: shutdown %d\n", kr);
 185         }
 186
 187         ipc_port_release_send(lockd_port);
 188 }
 189
 190 /*
 191  * insert a lock request message into the pending queue
 192  * (nfs_lock_mutex must be held)
 193  */
 194 void
 195 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
 196 {
 197         LOCKD_MSG_REQUEST *mr;
 198
 199         mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
 200         if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
 201                 /* fast path: empty queue or new largest xid */
 202                 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
 203                 return;
 204         }
 205         /* slow path: need to walk list to find insertion point */
 206         while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
 207                 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
 208         }
 209         if (mr) {
 210                 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
 211         } else {
 212                 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
 213         }
 214 }
 215
 216 /*
 217  * remove a lock request message from the pending queue
 218  * (nfs_lock_mutex must be held)
 219  */
 220 void
 221 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
 222 {
 223         TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
 224 }
 225
 226 /*
 227  * find a pending lock request message by xid
 228  *
 229  * We search from the head of the list assuming that the message we're
 230  * looking for is for an older request (because we have an answer to it).
 231  * This assumes that lock request will be answered primarily in FIFO order.
 232  * However, this may not be the case if there are blocked requests.  We may
 233  * want to move blocked requests to a separate queue (but that'll complicate
 234  * duplicate xid checking).
 235  *
 236  * (nfs_lock_mutex must be held)
 237  */
 238 LOCKD_MSG_REQUEST *
 239 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
 240 {
 241         LOCKD_MSG_REQUEST *mr;
 242
 243         TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
 244                 if (mr->lmr_msg.lm_xid == lockxid) {
 245                         return mr;
 246                 }
 247                 if (mr->lmr_msg.lm_xid > lockxid) {
 248                         return NULL;
 249                 }
 250         }
 251         return mr;
 252 }
 253
 254 /*
 255  * Because we can't depend on nlm_granted messages containing the same
 256  * cookie we sent with the original lock request, we need code to test
 257  * if an nlm_granted answer matches the lock request.  We also need code
 258  * that can find a lockd message based solely on the nlm_granted answer.
 259  */
 260
 261 /*
 262  * compare lockd message to answer
 263  *
 264  * returns 0 on equality and 1 if different
 265  */
 266 int
 267 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
 268 {
 269         if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
 270                 return 1;
 271         }
 272         if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) {
 273                 return 1;
 274         }
 275         if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) {
 276                 return 1;
 277         }
 278         if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) {
 279                 return 1;
 280         }
 281         if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) {
 282                 return 1;
 283         }
 284         if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) {
 285                 return 1;
 286         }
 287         return 0;
 288 }
 289
 290 /*
 291  * find a pending lock request message based on the lock info provided
 292  * in the lockd_ans/nlm_granted data.  We need this because we can't
 293  * depend on nlm_granted messages containing the same cookie we sent
 294  * with the original lock request.
 295  *
 296  * We search from the head of the list assuming that the message we're
 297  * looking for is for an older request (because we have an answer to it).
 298  * This assumes that lock request will be answered primarily in FIFO order.
 299  * However, this may not be the case if there are blocked requests.  We may
 300  * want to move blocked requests to a separate queue (but that'll complicate
 301  * duplicate xid checking).
 302  *
 303  * (nfs_lock_mutex must be held)
 304  */
 305 LOCKD_MSG_REQUEST *
 306 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
 307 {
 308         LOCKD_MSG_REQUEST *mr;
 309
 310         if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
 311                 return NULL;
 312         }
 313         TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
 314                 if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) {
 315                         break;
 316                 }
 317         }
 318         return mr;
 319 }
 320
 321 /*
 322  * return the next unique lock request transaction ID
 323  * (nfs_lock_mutex must be held)
 324  */
 325 uint64_t
 326 nfs_lockxid_get(void)
 327 {
 328         LOCKD_MSG_REQUEST *mr;
 329
 330         /* derive initial lock xid from system time */
 331         if (!nfs_lockxid) {
 332                 /*
 333                  * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
 334                  * due to a broken clock) because we immediately increment it
 335                  * and we guarantee to never use xid 0.  So, nfs_lockxid should only
 336                  * ever be 0 the first time this function is called.
 337                  */
 338                 struct timeval tv;
 339                 microtime(&tv);
 340                 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
 341         }
 342
 343         /* make sure we get a unique xid */
 344         do {
 345                 /* Skip zero xid if it should ever happen.  */
 346                 if (++nfs_lockxid == 0) {
 347                         nfs_lockxid++;
 348                 }
 349                 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
 350                     (mr->lmr_msg.lm_xid < nfs_lockxid)) {
 351                         /* fast path: empty queue or new largest xid */
 352                         break;
 353                 }
 354                 /* check if xid is already in use */
 355         } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
 356
 357         return nfs_lockxid;
 358 }
 359
 360 #define MACH_MAX_TRIES 3
 361
 362 int
 363 nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
 364 {
 365         kern_return_t kr;
 366         int retries = 0;
 367         mach_port_t lockd_port = IPC_PORT_NULL;
 368
 369         kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 370         if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) {
 371                 return ENOTSUP;
 372         }
 373
 374         do {
 375                 /* In the kernel all mach messaging is interruptable */
 376                 do {
 377                         kr = lockd_request(
 378                                 lockd_port,
 379                                 msg->lm_version,
 380                                 msg->lm_flags,
 381                                 msg->lm_xid,
 382                                 msg->lm_fl.l_start,
 383                                 msg->lm_fl.l_len,
 384                                 msg->lm_fl.l_pid,
 385                                 msg->lm_fl.l_type,
 386                                 msg->lm_fl.l_whence,
 387                                 (uint32_t *)&msg->lm_addr,
 388                                 (uint32_t *)&msg->lm_cred,
 389                                 msg->lm_fh_len,
 390                                 msg->lm_fh);
 391                         if (kr != KERN_SUCCESS) {
 392                                 printf("lockd_request received %d!\n", kr);
 393                         }
 394                 } while (!interruptable && kr == MACH_SEND_INTERRUPTED);
 395         } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
 396
 397         ipc_port_release_send(lockd_port);
 398         switch (kr) {
 399         case MACH_SEND_INTERRUPTED:
 400                 return EINTR;
 401         default:
 402                 /*
 403                  * Other MACH or MIG errors we will retry. Eventually
 404                  * we will call nfs_down and allow the user to disable
 405                  * locking.
 406                  */
 407                 return EAGAIN;
 408         }
 409 }
 410
 411 /*
 412  * NFS advisory byte-level locks (client)
 413  */
 414 int
 415 nfs3_lockd_request(
 416         nfsnode_t np,
 417         int type,
 418         LOCKD_MSG_REQUEST *msgreq,
 419         int flags,
 420         thread_t thd)
 421 {
 422         LOCKD_MSG *msg = &msgreq->lmr_msg;
 423         int error, error2;
 424         int interruptable, slpflag;
 425         struct nfsmount *nmp;
 426         struct timeval now;
 427         int timeo, wentdown = 0;
 428         long starttime, endtime, lastmsg;
 429         struct timespec ts;
 430         struct sockaddr *saddr;
 431
 432         nmp = NFSTONMP(np);
 433         if (!nmp || !nmp->nm_saddr) {
 434                 return ENXIO;
 435         }
 436
 437         lck_mtx_lock(&nmp->nm_lock);
 438         saddr = nmp->nm_saddr;
 439         bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
 440         if (nmp->nm_vers == NFS_VER3) {
 441                 msg->lm_flags |= LOCKD_MSG_NFSV3;
 442         }
 443
 444         if (nmp->nm_sotype != SOCK_DGRAM) {
 445                 msg->lm_flags |= LOCKD_MSG_TCP;
 446         }
 447
 448         microuptime(&now);
 449         starttime = now.tv_sec;
 450         lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
 451         interruptable = NMFLAG(nmp, INTR);
 452         lck_mtx_unlock(&nmp->nm_lock);
 453
 454         lck_mtx_lock(&nfs_lock_mutex);
 455
 456         /* allocate unique xid */
 457         msg->lm_xid = nfs_lockxid_get();
 458         nfs_lockdmsg_enqueue(msgreq);
 459
 460         timeo = 4;
 461
 462         for (;;) {
 463                 nfs_lockd_request_sent = 1;
 464
 465                 /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
 466                 lck_mtx_unlock(&nfs_lock_mutex);
 467                 error = nfs_lockd_send_request(msg, interruptable);
 468                 lck_mtx_lock(&nfs_lock_mutex);
 469                 if (error && error != EAGAIN) {
 470                         break;
 471                 }
 472
 473                 /*
 474                  * Always wait for an answer.  Not waiting for unlocks could
 475                  * cause a lock to be left if the unlock request gets dropped.
 476                  */
 477
 478                 /*
 479                  * Retry if it takes too long to get a response.
 480                  *
 481                  * The timeout numbers were picked out of thin air... they start
 482                  * at 4 and double each timeout with a max of 30 seconds.
 483                  *
 484                  * In order to maintain responsiveness, we pass a small timeout
 485                  * to msleep and calculate the timeouts ourselves.  This allows
 486                  * us to pick up on mount changes quicker.
 487                  */
 488 wait_for_granted:
 489                 error = EWOULDBLOCK;
 490                 slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
 491                 ts.tv_sec = 2;
 492                 ts.tv_nsec = 0;
 493                 microuptime(&now);
 494                 endtime = now.tv_sec + timeo;
 495                 while (now.tv_sec < endtime) {
 496                         error = error2 = 0;
 497                         if (!msgreq->lmr_answered) {
 498                                 error = msleep(msgreq, &nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
 499                                 slpflag = 0;
 500                         }
 501                         if (msgreq->lmr_answered) {
 502                                 /*
 503                                  * Note: it's possible to have a lock granted at
 504                                  * essentially the same time that we get interrupted.
 505                                  * Since the lock may be granted, we can't return an
 506                                  * error from this request or we might not unlock the
 507                                  * lock that's been granted.
 508                                  */
 509                                 nmp = NFSTONMP(np);
 510                                 if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
 511                                     (nmp->nm_state & NFSSTA_LOCKSWORK)) {
 512                                         /*
 513                                          * We have evidence that locks work, yet lockd
 514                                          * returned ENOTSUP.  This is probably because
 515                                          * it was unable to contact the server's lockd
 516                                          * to send it the request.
 517                                          *
 518                                          * Because we know locks work, we'll consider
 519                                          * this failure to be a timeout.
 520                                          */
 521                                         error = EWOULDBLOCK;
 522                                 } else {
 523                                         error = 0;
 524                                 }
 525                                 break;
 526                         }
 527                         if (error != EWOULDBLOCK) {
 528                                 break;
 529                         }
 530                         /* check that we still have our mount... */
 531                         /* ...and that we still support locks */
 532                         /* ...and that there isn't a recovery pending */
 533                         nmp = NFSTONMP(np);
 534                         if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 535                                 error = error2;
 536                                 if (type == F_UNLCK) {
 537                                         printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 538                                 }
 539                                 break;
 540                         }
 541                         lck_mtx_lock(&nmp->nm_lock);
 542                         if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 543                                 lck_mtx_unlock(&nmp->nm_lock);
 544                                 break;
 545                         }
 546                         if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 547                                 /* recovery pending... return an error that'll get this operation restarted */
 548                                 error = NFSERR_GRACE;
 549                                 lck_mtx_unlock(&nmp->nm_lock);
 550                                 break;
 551                         }
 552                         interruptable = NMFLAG(nmp, INTR);
 553                         lck_mtx_unlock(&nmp->nm_lock);
 554                         microuptime(&now);
 555                 }
 556                 if (error) {
 557                         /* check that we still have our mount... */
 558                         nmp = NFSTONMP(np);
 559                         if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 560                                 error = error2;
 561                                 if (error2 != EINTR) {
 562                                         if (type == F_UNLCK) {
 563                                                 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 564                                         }
 565                                         break;
 566                                 }
 567                         }
 568                         /* ...and that we still support locks */
 569                         lck_mtx_lock(&nmp->nm_lock);
 570                         if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 571                                 if (error == EWOULDBLOCK) {
 572                                         error = ENOTSUP;
 573                                 }
 574                                 lck_mtx_unlock(&nmp->nm_lock);
 575                                 break;
 576                         }
 577                         /* ...and that there isn't a recovery pending */
 578                         if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 579                                 /* recovery pending... return to allow recovery to occur */
 580                                 error = NFSERR_DENIED;
 581                                 lck_mtx_unlock(&nmp->nm_lock);
 582                                 break;
 583                         }
 584                         interruptable = NMFLAG(nmp, INTR);
 585                         if ((error != EWOULDBLOCK) ||
 586                             ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
 587                             ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
 588                                 if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
 589                                         /* give up if this is for recovery and taking too long */
 590                                         error = ETIMEDOUT;
 591                                 } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 592                                         /* recovery pending... return an error that'll get this operation restarted */
 593                                         error = NFSERR_GRACE;
 594                                 }
 595                                 lck_mtx_unlock(&nmp->nm_lock);
 596                                 /*
 597                                  * We're going to bail on this request.
 598                                  * If we were a blocked lock request, send a cancel.
 599                                  */
 600                                 if ((msgreq->lmr_errno == EINPROGRESS) &&
 601                                     !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
 602                                         /* set this request up as a cancel */
 603                                         msg->lm_flags |= LOCKD_MSG_CANCEL;
 604                                         nfs_lockdmsg_dequeue(msgreq);
 605                                         msg->lm_xid = nfs_lockxid_get();
 606                                         nfs_lockdmsg_enqueue(msgreq);
 607                                         msgreq->lmr_saved_errno = error;
 608                                         msgreq->lmr_errno = 0;
 609                                         msgreq->lmr_answered = 0;
 610                                         /* reset timeout */
 611                                         timeo = 2;
 612                                         /* send cancel request */
 613                                         continue;
 614                                 }
 615                                 break;
 616                         }
 617
 618                         /* warn if we're not getting any response */
 619                         microuptime(&now);
 620                         if ((msgreq->lmr_errno != EINPROGRESS) &&
 621                             !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
 622                             (nmp->nm_tprintf_initial_delay != 0) &&
 623                             ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
 624                                 lck_mtx_unlock(&nmp->nm_lock);
 625                                 lastmsg = now.tv_sec;
 626                                 nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 1);
 627                                 wentdown = 1;
 628                         } else {
 629                                 lck_mtx_unlock(&nmp->nm_lock);
 630                         }
 631
 632                         if (msgreq->lmr_errno == EINPROGRESS) {
 633                                 /*
 634                                  * We've got a blocked lock request that we are
 635                                  * going to retry.  First, we'll want to try to
 636                                  * send a cancel for the previous request.
 637                                  *
 638                                  * Clear errno so if we don't get a response
 639                                  * to the resend we'll call nfs_down().
 640                                  * Also reset timeout because we'll expect a
 641                                  * quick response to the cancel/resend (even if
 642                                  * it is NLM_BLOCKED).
 643                                  */
 644                                 msg->lm_flags |= LOCKD_MSG_CANCEL;
 645                                 nfs_lockdmsg_dequeue(msgreq);
 646                                 msg->lm_xid = nfs_lockxid_get();
 647                                 nfs_lockdmsg_enqueue(msgreq);
 648                                 msgreq->lmr_saved_errno = msgreq->lmr_errno;
 649                                 msgreq->lmr_errno = 0;
 650                                 msgreq->lmr_answered = 0;
 651                                 timeo = 2;
 652                                 /* send cancel then resend request */
 653                                 continue;
 654                         }
 655
 656                         /*
 657                          * We timed out, so we will resend the request.
 658                          */
 659                         if (!(flags & R_RECOVER)) {
 660                                 timeo *= 2;
 661                         }
 662                         if (timeo > 30) {
 663                                 timeo = 30;
 664                         }
 665                         /* resend request */
 666                         continue;
 667                 }
 668
 669                 /* we got a reponse, so the server's lockd is OK */
 670                 nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
 671                     wentdown ? "lockd alive again" : NULL);
 672                 wentdown = 0;
 673
 674                 if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
 675                         /*
 676                          * The lock request was denied because the server lockd is
 677                          * still in its grace period.  So, we need to try the
 678                          * request again in a little bit.  Return the GRACE error so
 679                          * the higher levels can perform the retry.
 680                          */
 681                         msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
 682                 }
 683
 684                 if (msgreq->lmr_errno == EINPROGRESS) {
 685                         /* got NLM_BLOCKED response */
 686                         /* need to wait for NLM_GRANTED */
 687                         timeo = 30;
 688                         msgreq->lmr_answered = 0;
 689                         goto wait_for_granted;
 690                 }
 691
 692                 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
 693                     (msgreq->lmr_saved_errno == EINPROGRESS)) {
 694                         /*
 695                          * We just got a successful reply to the
 696                          * cancel of the previous blocked lock request.
 697                          * Now, go ahead and return a DENIED error so the
 698                          * higher levels can resend the request.
 699                          */
 700                         msg->lm_flags &= ~LOCKD_MSG_CANCEL;
 701                         error = NFSERR_DENIED;
 702                         /* Will dequeue msgreq after the following break at the end of this routine */
 703                         break;
 704                 }
 705
 706                 /*
 707                  * If the blocked lock request was cancelled.
 708                  * Restore the error condition from when we
 709                  * originally bailed on the request.
 710                  */
 711                 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
 712                         msg->lm_flags &= ~LOCKD_MSG_CANCEL;
 713                         error = msgreq->lmr_saved_errno;
 714                 } else {
 715                         error = msgreq->lmr_errno;
 716                 }
 717
 718                 nmp = NFSTONMP(np);
 719                 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 720                         /*
 721                          * We have NO evidence that locks work and lockd
 722                          * returned ENOTSUP.  Let's take this as a hint
 723                          * that locks aren't supported and disable them
 724                          * for this mount.
 725                          */
 726                         nfs_lockdmsg_dequeue(msgreq);
 727                         lck_mtx_unlock(&nfs_lock_mutex);
 728                         lck_mtx_lock(&nmp->nm_lock);
 729                         if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
 730                                 nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
 731                                 nfs_lockd_mount_unregister(nmp);
 732                         }
 733                         nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
 734                         lck_mtx_unlock(&nmp->nm_lock);
 735                         printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
 736                             vfs_statfs(nmp->nm_mountp)->f_mntfromname);
 737                         return error;
 738                 }
 739                 if (!error) {
 740                         /* record that NFS file locking has worked on this mount */
 741                         if (nmp) {
 742                                 lck_mtx_lock(&nmp->nm_lock);
 743                                 if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 744                                         nmp->nm_state |= NFSSTA_LOCKSWORK;
 745                                 }
 746                                 lck_mtx_unlock(&nmp->nm_lock);
 747                         }
 748                 }
 749                 break;
 750         }
 751
 752         nfs_lockdmsg_dequeue(msgreq);
 753
 754         lck_mtx_unlock(&nfs_lock_mutex);
 755
 756         return error;
 757 }
 758
 759 /*
 760  * Send an NLM LOCK message to the server
 761  */
 762 int
 763 nfs3_setlock_rpc(
 764         nfsnode_t np,
 765         struct nfs_open_file *nofp,
 766         struct nfs_file_lock *nflp,
 767         int reclaim,
 768         int flags,
 769         thread_t thd,
 770         kauth_cred_t cred)
 771 {
 772         struct nfs_lock_owner *nlop = nflp->nfl_owner;
 773         struct nfsmount *nmp;
 774         int error;
 775         LOCKD_MSG_REQUEST msgreq;
 776         LOCKD_MSG *msg;
 777
 778         nmp = NFSTONMP(np);
 779         if (nfs_mount_gone(nmp)) {
 780                 return ENXIO;
 781         }
 782
 783         if (!nlop->nlo_open_owner) {
 784                 nfs_open_owner_ref(nofp->nof_owner);
 785                 nlop->nlo_open_owner = nofp->nof_owner;
 786         }
 787         if ((error = nfs_lock_owner_set_busy(nlop, thd))) {
 788                 return error;
 789         }
 790
 791         /* set up lock message request structure */
 792         bzero(&msgreq, sizeof(msgreq));
 793         msg = &msgreq.lmr_msg;
 794         msg->lm_version = LOCKD_MSG_VERSION;
 795         if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) {
 796                 msg->lm_flags |= LOCKD_MSG_BLOCK;
 797         }
 798         if (reclaim) {
 799                 msg->lm_flags |= LOCKD_MSG_RECLAIM;
 800         }
 801         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 802         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 803         cru2x(cred, &msg->lm_cred);
 804
 805         msg->lm_fl.l_whence = SEEK_SET;
 806         msg->lm_fl.l_start = nflp->nfl_start;
 807         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
 808         msg->lm_fl.l_type = nflp->nfl_type;
 809         msg->lm_fl.l_pid = nlop->nlo_pid;
 810
 811         error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
 812
 813         nfs_lock_owner_clear_busy(nlop);
 814         return error;
 815 }
 816
 817 /*
 818  * Send an NLM UNLOCK message to the server
 819  */
 820 int
 821 nfs3_unlock_rpc(
 822         nfsnode_t np,
 823         struct nfs_lock_owner *nlop,
 824         __unused int type,
 825         uint64_t start,
 826         uint64_t end,
 827         int flags,
 828         thread_t thd,
 829         kauth_cred_t cred)
 830 {
 831         struct nfsmount *nmp;
 832         LOCKD_MSG_REQUEST msgreq;
 833         LOCKD_MSG *msg;
 834
 835         nmp = NFSTONMP(np);
 836         if (!nmp) {
 837                 return ENXIO;
 838         }
 839
 840         /* set up lock message request structure */
 841         bzero(&msgreq, sizeof(msgreq));
 842         msg = &msgreq.lmr_msg;
 843         msg->lm_version = LOCKD_MSG_VERSION;
 844         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 845         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 846         cru2x(cred, &msg->lm_cred);
 847
 848         msg->lm_fl.l_whence = SEEK_SET;
 849         msg->lm_fl.l_start = start;
 850         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
 851         msg->lm_fl.l_type = F_UNLCK;
 852         msg->lm_fl.l_pid = nlop->nlo_pid;
 853
 854         return nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd);
 855 }
 856
 857 /*
 858  * Send an NLM LOCK TEST message to the server
 859  */
 860 int
 861 nfs3_getlock_rpc(
 862         nfsnode_t np,
 863         struct nfs_lock_owner *nlop,
 864         struct flock *fl,
 865         uint64_t start,
 866         uint64_t end,
 867         vfs_context_t ctx)
 868 {
 869         struct nfsmount *nmp;
 870         int error;
 871         LOCKD_MSG_REQUEST msgreq;
 872         LOCKD_MSG *msg;
 873
 874         nmp = NFSTONMP(np);
 875         if (nfs_mount_gone(nmp)) {
 876                 return ENXIO;
 877         }
 878
 879         /* set up lock message request structure */
 880         bzero(&msgreq, sizeof(msgreq));
 881         msg = &msgreq.lmr_msg;
 882         msg->lm_version = LOCKD_MSG_VERSION;
 883         msg->lm_flags |= LOCKD_MSG_TEST;
 884         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 885         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 886         cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
 887
 888         msg->lm_fl.l_whence = SEEK_SET;
 889         msg->lm_fl.l_start = start;
 890         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
 891         msg->lm_fl.l_type = fl->l_type;
 892         msg->lm_fl.l_pid = nlop->nlo_pid;
 893
 894         error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
 895
 896         if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
 897                 if (msg->lm_fl.l_type != F_UNLCK) {
 898                         fl->l_type = msg->lm_fl.l_type;
 899                         fl->l_pid = msg->lm_fl.l_pid;
 900                         fl->l_start = msg->lm_fl.l_start;
 901                         fl->l_len = msg->lm_fl.l_len;
 902                         fl->l_whence = SEEK_SET;
 903                 } else {
 904                         fl->l_type = F_UNLCK;
 905                 }
 906         }
 907
 908         return error;
 909 }
 910
 911 /*
 912  * nfslockdans --
 913  *      NFS advisory byte-level locks answer from the lock daemon.
 914  */
 915 int
 916 nfslockdans(proc_t p, struct lockd_ans *ansp)
 917 {
 918         LOCKD_MSG_REQUEST *msgreq;
 919         int error;
 920
 921         /* Let root make this call. */
 922         error = proc_suser(p);
 923         if (error) {
 924                 return error;
 925         }
 926
 927         /* the version should match, or we're out of sync */
 928         if (ansp->la_version != LOCKD_ANS_VERSION) {
 929                 return EINVAL;
 930         }
 931
 932         lck_mtx_lock(&nfs_lock_mutex);
 933
 934         /* try to find the lockd message by transaction id (cookie) */
 935         msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
 936         if (ansp->la_flags & LOCKD_ANS_GRANTED) {
 937                 /*
 938                  * We can't depend on the granted message having our cookie,
 939                  * so we check the answer against the lockd message found.
 940                  * If no message was found or it doesn't match the answer,
 941                  * we look for the lockd message by the answer's lock info.
 942                  */
 943                 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) {
 944                         msgreq = nfs_lockdmsg_find_by_answer(ansp);
 945                 }
 946                 /*
 947                  * We need to make sure this request isn't being cancelled
 948                  * If it is, we don't want to accept the granted message.
 949                  */
 950                 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) {
 951                         msgreq = NULL;
 952                 }
 953         }
 954         if (!msgreq) {
 955                 lck_mtx_unlock(&nfs_lock_mutex);
 956                 return EPIPE;
 957         }
 958
 959         msgreq->lmr_errno = ansp->la_errno;
 960         if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
 961                 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
 962                         if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) {
 963                                 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
 964                         } else {
 965                                 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
 966                         }
 967                         msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
 968                         msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
 969                         msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
 970                 } else {
 971                         msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
 972                 }
 973         }
 974         if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) {
 975                 msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
 976         }
 977
 978         msgreq->lmr_answered = 1;
 979         lck_mtx_unlock(&nfs_lock_mutex);
 980         wakeup(msgreq);
 981
 982         return 0;
 983 }
 984
 985 /*
 986  * nfslockdnotify --
 987  *      NFS host restart notification from the lock daemon.
 988  *
 989  * Used to initiate reclaiming of held locks when a server we
 990  * have mounted reboots.
 991  */
 992 int
 993 nfslockdnotify(proc_t p, user_addr_t argp)
 994 {
 995         int error, i, headsize;
 996         struct lockd_notify ln;
 997         struct nfsmount *nmp;
 998         struct sockaddr *saddr;
 999
1000         /* Let root make this call. */
1001         error = proc_suser(p);
1002         if (error) {
1003                 return error;
1004         }
1005
1006         headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
1007         error = copyin(argp, &ln, headsize);
1008         if (error) {
1009                 return error;
1010         }
1011         if (ln.ln_version != LOCKD_NOTIFY_VERSION) {
1012                 return EINVAL;
1013         }
1014         if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) {
1015                 return EINVAL;
1016         }
1017         argp += headsize;
1018         saddr = (struct sockaddr *)&ln.ln_addr[0];
1019
1020         lck_mtx_lock(&nfs_lock_mutex);
1021
1022         for (i = 0; i < ln.ln_addrcount; i++) {
1023                 error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
1024                 if (error) {
1025                         break;
1026                 }
1027                 argp += sizeof(ln.ln_addr[0]);
1028                 /* scan lockd mount list for match to this address */
1029                 TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
1030                         /* check if address matches this mount's server address */
1031                         if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) {
1032                                 continue;
1033                         }
1034                         /* We have a match!  Mark it as needing recovery. */
1035                         lck_mtx_lock(&nmp->nm_lock);
1036                         nfs_need_recover(nmp, 0);
1037                         lck_mtx_unlock(&nmp->nm_lock);
1038                 }
1039         }
1040
1041         lck_mtx_unlock(&nfs_lock_mutex);
1042
1043         return error;
1044 }
1045
1046 #endif /* CONFIG_NFS_CLIENT */