bsd/nfs/nfs_lock.c

   1 /*
   2  * Copyright (c) 2002-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  40  *    promote products derived from this software without specific prior
  41  *    written permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  *
  55  *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
  56  */
  57
  58 #include <sys/cdefs.h>
  59 #include <sys/param.h>
  60 #include <sys/systm.h>
  61 #include <sys/fcntl.h>
  62 #include <sys/kernel.h>         /* for hz */
  63 #include <sys/file_internal.h>
  64 #include <sys/malloc.h>
  65 #include <sys/lockf.h>          /* for hz */ /* Must come after sys/malloc.h */
  66 #include <sys/kpi_mbuf.h>
  67 #include <sys/mount_internal.h>
  68 #include <sys/proc_internal.h>  /* for p_start */
  69 #include <sys/kauth.h>
  70 #include <sys/resourcevar.h>
  71 #include <sys/socket.h>
  72 #include <sys/unistd.h>
  73 #include <sys/user.h>
  74 #include <sys/vnode_internal.h>
  75
  76 #include <kern/thread.h>
  77 #include <kern/host.h>
  78
  79 #include <machine/limits.h>
  80
  81 #include <net/if.h>
  82
  83 #include <nfs/rpcv2.h>
  84 #include <nfs/nfsproto.h>
  85 #include <nfs/nfs.h>
  86 #include <nfs/nfs_gss.h>
  87 #include <nfs/nfsmount.h>
  88 #include <nfs/nfsnode.h>
  89 #include <nfs/nfs_lock.h>
  90
  91 #include <mach/host_priv.h>
  92 #include <mach/mig_errors.h>
  93 #include <mach/host_special_ports.h>
  94 #include <lockd/lockd_mach.h>
  95
  96 extern void ipc_port_release_send(ipc_port_t);
  97
  98 /*
  99  * pending lock request messages are kept in this queue which is
 100  * kept sorted by transaction ID (xid).
 101  */
 102 static uint64_t nfs_lockxid = 0;
 103 static LOCKD_MSG_QUEUE nfs_pendlockq;
 104
 105 /* list of mounts that are (potentially) making lockd requests */
 106 TAILQ_HEAD(nfs_lockd_mount_list, nfsmount) nfs_lockd_mount_list;
 107
 108 static lck_grp_t *nfs_lock_lck_grp;
 109 static lck_mtx_t *nfs_lock_mutex;
 110
 111 void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
 112 void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
 113 int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
 114 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
 115 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
 116 uint64_t nfs_lockxid_get(void);
 117 int nfs_lockd_send_request(LOCKD_MSG *, int);
 118
 119 /*
 120  * initialize global nfs lock state
 121  */
 122 void
 123 nfs_lockinit(void)
 124 {
 125         TAILQ_INIT(&nfs_pendlockq);
 126         TAILQ_INIT(&nfs_lockd_mount_list);
 127
 128         nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
 129         nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
 130 }
 131
 132 /*
 133  * Register a mount as (potentially) making lockd requests.
 134  */
 135 void
 136 nfs_lockd_mount_register(struct nfsmount *nmp)
 137 {
 138         lck_mtx_lock(nfs_lock_mutex);
 139         TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
 140         nfs_lockd_mounts++;
 141         lck_mtx_unlock(nfs_lock_mutex);
 142 }
 143
 144 /*
 145  * Unregister a mount as (potentially) making lockd requests.
 146  *
 147  * When the lockd mount count drops to zero, then send a shutdown request to
 148  * lockd if we've sent any requests to it.
 149  */
 150 void
 151 nfs_lockd_mount_unregister(struct nfsmount *nmp)
 152 {
 153         int send_shutdown;
 154         mach_port_t lockd_port = IPC_PORT_NULL;
 155         kern_return_t kr;
 156
 157         lck_mtx_lock(nfs_lock_mutex);
 158         if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
 159                 lck_mtx_unlock(nfs_lock_mutex);
 160                 return;
 161         }
 162
 163         TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
 164         nmp->nm_ldlink.tqe_next = NFSNOLIST;
 165
 166         nfs_lockd_mounts--;
 167
 168         /* send a shutdown request if there are no more lockd mounts */
 169         send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
 170         if (send_shutdown) {
 171                 nfs_lockd_request_sent = 0;
 172         }
 173
 174         lck_mtx_unlock(nfs_lock_mutex);
 175
 176         if (!send_shutdown) {
 177                 return;
 178         }
 179
 180         /*
 181          * Let lockd know that it is no longer needed for any NFS mounts
 182          */
 183         kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 184         if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
 185                 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
 186                     kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
 187                     (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
 188                 return;
 189         }
 190
 191         kr = lockd_shutdown(lockd_port);
 192         if (kr != KERN_SUCCESS) {
 193                 printf("nfs_lockd_mount_change: shutdown %d\n", kr);
 194         }
 195
 196         ipc_port_release_send(lockd_port);
 197 }
 198
 199 /*
 200  * insert a lock request message into the pending queue
 201  * (nfs_lock_mutex must be held)
 202  */
 203 void
 204 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
 205 {
 206         LOCKD_MSG_REQUEST *mr;
 207
 208         mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
 209         if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
 210                 /* fast path: empty queue or new largest xid */
 211                 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
 212                 return;
 213         }
 214         /* slow path: need to walk list to find insertion point */
 215         while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
 216                 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
 217         }
 218         if (mr) {
 219                 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
 220         } else {
 221                 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
 222         }
 223 }
 224
 225 /*
 226  * remove a lock request message from the pending queue
 227  * (nfs_lock_mutex must be held)
 228  */
 229 void
 230 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
 231 {
 232         TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
 233 }
 234
 235 /*
 236  * find a pending lock request message by xid
 237  *
 238  * We search from the head of the list assuming that the message we're
 239  * looking for is for an older request (because we have an answer to it).
 240  * This assumes that lock request will be answered primarily in FIFO order.
 241  * However, this may not be the case if there are blocked requests.  We may
 242  * want to move blocked requests to a separate queue (but that'll complicate
 243  * duplicate xid checking).
 244  *
 245  * (nfs_lock_mutex must be held)
 246  */
 247 LOCKD_MSG_REQUEST *
 248 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
 249 {
 250         LOCKD_MSG_REQUEST *mr;
 251
 252         TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
 253                 if (mr->lmr_msg.lm_xid == lockxid) {
 254                         return mr;
 255                 }
 256                 if (mr->lmr_msg.lm_xid > lockxid) {
 257                         return NULL;
 258                 }
 259         }
 260         return mr;
 261 }
 262
 263 /*
 264  * Because we can't depend on nlm_granted messages containing the same
 265  * cookie we sent with the original lock request, we need code to test
 266  * if an nlm_granted answer matches the lock request.  We also need code
 267  * that can find a lockd message based solely on the nlm_granted answer.
 268  */
 269
 270 /*
 271  * compare lockd message to answer
 272  *
 273  * returns 0 on equality and 1 if different
 274  */
 275 int
 276 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
 277 {
 278         if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
 279                 return 1;
 280         }
 281         if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) {
 282                 return 1;
 283         }
 284         if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) {
 285                 return 1;
 286         }
 287         if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) {
 288                 return 1;
 289         }
 290         if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) {
 291                 return 1;
 292         }
 293         if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) {
 294                 return 1;
 295         }
 296         return 0;
 297 }
 298
 299 /*
 300  * find a pending lock request message based on the lock info provided
 301  * in the lockd_ans/nlm_granted data.  We need this because we can't
 302  * depend on nlm_granted messages containing the same cookie we sent
 303  * with the original lock request.
 304  *
 305  * We search from the head of the list assuming that the message we're
 306  * looking for is for an older request (because we have an answer to it).
 307  * This assumes that lock request will be answered primarily in FIFO order.
 308  * However, this may not be the case if there are blocked requests.  We may
 309  * want to move blocked requests to a separate queue (but that'll complicate
 310  * duplicate xid checking).
 311  *
 312  * (nfs_lock_mutex must be held)
 313  */
 314 LOCKD_MSG_REQUEST *
 315 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
 316 {
 317         LOCKD_MSG_REQUEST *mr;
 318
 319         if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
 320                 return NULL;
 321         }
 322         TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
 323                 if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) {
 324                         break;
 325                 }
 326         }
 327         return mr;
 328 }
 329
 330 /*
 331  * return the next unique lock request transaction ID
 332  * (nfs_lock_mutex must be held)
 333  */
 334 uint64_t
 335 nfs_lockxid_get(void)
 336 {
 337         LOCKD_MSG_REQUEST *mr;
 338
 339         /* derive initial lock xid from system time */
 340         if (!nfs_lockxid) {
 341                 /*
 342                  * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
 343                  * due to a broken clock) because we immediately increment it
 344                  * and we guarantee to never use xid 0.  So, nfs_lockxid should only
 345                  * ever be 0 the first time this function is called.
 346                  */
 347                 struct timeval tv;
 348                 microtime(&tv);
 349                 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
 350         }
 351
 352         /* make sure we get a unique xid */
 353         do {
 354                 /* Skip zero xid if it should ever happen.  */
 355                 if (++nfs_lockxid == 0) {
 356                         nfs_lockxid++;
 357                 }
 358                 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
 359                     (mr->lmr_msg.lm_xid < nfs_lockxid)) {
 360                         /* fast path: empty queue or new largest xid */
 361                         break;
 362                 }
 363                 /* check if xid is already in use */
 364         } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
 365
 366         return nfs_lockxid;
 367 }
 368
 369 #define MACH_MAX_TRIES 3
 370
 371 int
 372 nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
 373 {
 374         kern_return_t kr;
 375         int retries = 0;
 376         mach_port_t lockd_port = IPC_PORT_NULL;
 377
 378         kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 379         if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) {
 380                 return ENOTSUP;
 381         }
 382
 383         do {
 384                 /* In the kernel all mach messaging is interruptable */
 385                 do {
 386                         kr = lockd_request(
 387                                 lockd_port,
 388                                 msg->lm_version,
 389                                 msg->lm_flags,
 390                                 msg->lm_xid,
 391                                 msg->lm_fl.l_start,
 392                                 msg->lm_fl.l_len,
 393                                 msg->lm_fl.l_pid,
 394                                 msg->lm_fl.l_type,
 395                                 msg->lm_fl.l_whence,
 396                                 (uint32_t *)&msg->lm_addr,
 397                                 (uint32_t *)&msg->lm_cred,
 398                                 msg->lm_fh_len,
 399                                 msg->lm_fh);
 400                         if (kr != KERN_SUCCESS) {
 401                                 printf("lockd_request received %d!\n", kr);
 402                         }
 403                 } while (!interruptable && kr == MACH_SEND_INTERRUPTED);
 404         } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
 405
 406         ipc_port_release_send(lockd_port);
 407         switch (kr) {
 408         case MACH_SEND_INTERRUPTED:
 409                 return EINTR;
 410         default:
 411                 /*
 412                  * Other MACH or MIG errors we will retry. Eventually
 413                  * we will call nfs_down and allow the user to disable
 414                  * locking.
 415                  */
 416                 return EAGAIN;
 417         }
 418 }
 419
 420 /*
 421  * NFS advisory byte-level locks (client)
 422  */
 423 int
 424 nfs3_lockd_request(
 425         nfsnode_t np,
 426         int type,
 427         LOCKD_MSG_REQUEST *msgreq,
 428         int flags,
 429         thread_t thd)
 430 {
 431         LOCKD_MSG *msg = &msgreq->lmr_msg;
 432         int error, error2;
 433         int interruptable, slpflag;
 434         struct nfsmount *nmp;
 435         struct timeval now;
 436         int timeo, starttime, endtime, lastmsg, wentdown = 0;
 437         struct timespec ts;
 438         struct sockaddr *saddr;
 439
 440         nmp = NFSTONMP(np);
 441         if (!nmp || !nmp->nm_saddr) {
 442                 return ENXIO;
 443         }
 444
 445         lck_mtx_lock(&nmp->nm_lock);
 446         saddr = nmp->nm_saddr;
 447         bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
 448         if (nmp->nm_vers == NFS_VER3) {
 449                 msg->lm_flags |= LOCKD_MSG_NFSV3;
 450         }
 451
 452         if (nmp->nm_sotype != SOCK_DGRAM) {
 453                 msg->lm_flags |= LOCKD_MSG_TCP;
 454         }
 455
 456         microuptime(&now);
 457         starttime = now.tv_sec;
 458         lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
 459         interruptable = NMFLAG(nmp, INTR);
 460         lck_mtx_unlock(&nmp->nm_lock);
 461
 462         lck_mtx_lock(nfs_lock_mutex);
 463
 464         /* allocate unique xid */
 465         msg->lm_xid = nfs_lockxid_get();
 466         nfs_lockdmsg_enqueue(msgreq);
 467
 468         timeo = 4;
 469
 470         for (;;) {
 471                 nfs_lockd_request_sent = 1;
 472
 473                 /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
 474                 lck_mtx_unlock(nfs_lock_mutex);
 475                 error = nfs_lockd_send_request(msg, interruptable);
 476                 lck_mtx_lock(nfs_lock_mutex);
 477                 if (error && error != EAGAIN) {
 478                         break;
 479                 }
 480
 481                 /*
 482                  * Always wait for an answer.  Not waiting for unlocks could
 483                  * cause a lock to be left if the unlock request gets dropped.
 484                  */
 485
 486                 /*
 487                  * Retry if it takes too long to get a response.
 488                  *
 489                  * The timeout numbers were picked out of thin air... they start
 490                  * at 4 and double each timeout with a max of 30 seconds.
 491                  *
 492                  * In order to maintain responsiveness, we pass a small timeout
 493                  * to msleep and calculate the timeouts ourselves.  This allows
 494                  * us to pick up on mount changes quicker.
 495                  */
 496 wait_for_granted:
 497                 error = EWOULDBLOCK;
 498                 slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
 499                 ts.tv_sec = 2;
 500                 ts.tv_nsec = 0;
 501                 microuptime(&now);
 502                 endtime = now.tv_sec + timeo;
 503                 while (now.tv_sec < endtime) {
 504                         error = error2 = 0;
 505                         if (!msgreq->lmr_answered) {
 506                                 error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
 507                                 slpflag = 0;
 508                         }
 509                         if (msgreq->lmr_answered) {
 510                                 /*
 511                                  * Note: it's possible to have a lock granted at
 512                                  * essentially the same time that we get interrupted.
 513                                  * Since the lock may be granted, we can't return an
 514                                  * error from this request or we might not unlock the
 515                                  * lock that's been granted.
 516                                  */
 517                                 nmp = NFSTONMP(np);
 518                                 if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
 519                                     (nmp->nm_state & NFSSTA_LOCKSWORK)) {
 520                                         /*
 521                                          * We have evidence that locks work, yet lockd
 522                                          * returned ENOTSUP.  This is probably because
 523                                          * it was unable to contact the server's lockd
 524                                          * to send it the request.
 525                                          *
 526                                          * Because we know locks work, we'll consider
 527                                          * this failure to be a timeout.
 528                                          */
 529                                         error = EWOULDBLOCK;
 530                                 } else {
 531                                         error = 0;
 532                                 }
 533                                 break;
 534                         }
 535                         if (error != EWOULDBLOCK) {
 536                                 break;
 537                         }
 538                         /* check that we still have our mount... */
 539                         /* ...and that we still support locks */
 540                         /* ...and that there isn't a recovery pending */
 541                         nmp = NFSTONMP(np);
 542                         if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 543                                 error = error2;
 544                                 if (type == F_UNLCK) {
 545                                         printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 546                                 }
 547                                 break;
 548                         }
 549                         lck_mtx_lock(&nmp->nm_lock);
 550                         if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 551                                 lck_mtx_unlock(&nmp->nm_lock);
 552                                 break;
 553                         }
 554                         if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 555                                 /* recovery pending... return an error that'll get this operation restarted */
 556                                 error = NFSERR_GRACE;
 557                                 lck_mtx_unlock(&nmp->nm_lock);
 558                                 break;
 559                         }
 560                         interruptable = NMFLAG(nmp, INTR);
 561                         lck_mtx_unlock(&nmp->nm_lock);
 562                         microuptime(&now);
 563                 }
 564                 if (error) {
 565                         /* check that we still have our mount... */
 566                         nmp = NFSTONMP(np);
 567                         if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 568                                 error = error2;
 569                                 if (error2 != EINTR) {
 570                                         if (type == F_UNLCK) {
 571                                                 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 572                                         }
 573                                         break;
 574                                 }
 575                         }
 576                         /* ...and that we still support locks */
 577                         lck_mtx_lock(&nmp->nm_lock);
 578                         if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 579                                 if (error == EWOULDBLOCK) {
 580                                         error = ENOTSUP;
 581                                 }
 582                                 lck_mtx_unlock(&nmp->nm_lock);
 583                                 break;
 584                         }
 585                         /* ...and that there isn't a recovery pending */
 586                         if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 587                                 /* recovery pending... return to allow recovery to occur */
 588                                 error = NFSERR_DENIED;
 589                                 lck_mtx_unlock(&nmp->nm_lock);
 590                                 break;
 591                         }
 592                         interruptable = NMFLAG(nmp, INTR);
 593                         if ((error != EWOULDBLOCK) ||
 594                             ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
 595                             ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
 596                                 if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
 597                                         /* give up if this is for recovery and taking too long */
 598                                         error = ETIMEDOUT;
 599                                 } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 600                                         /* recovery pending... return an error that'll get this operation restarted */
 601                                         error = NFSERR_GRACE;
 602                                 }
 603                                 lck_mtx_unlock(&nmp->nm_lock);
 604                                 /*
 605                                  * We're going to bail on this request.
 606                                  * If we were a blocked lock request, send a cancel.
 607                                  */
 608                                 if ((msgreq->lmr_errno == EINPROGRESS) &&
 609                                     !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
 610                                         /* set this request up as a cancel */
 611                                         msg->lm_flags |= LOCKD_MSG_CANCEL;
 612                                         nfs_lockdmsg_dequeue(msgreq);
 613                                         msg->lm_xid = nfs_lockxid_get();
 614                                         nfs_lockdmsg_enqueue(msgreq);
 615                                         msgreq->lmr_saved_errno = error;
 616                                         msgreq->lmr_errno = 0;
 617                                         msgreq->lmr_answered = 0;
 618                                         /* reset timeout */
 619                                         timeo = 2;
 620                                         /* send cancel request */
 621                                         continue;
 622                                 }
 623                                 break;
 624                         }
 625
 626                         /* warn if we're not getting any response */
 627                         microuptime(&now);
 628                         if ((msgreq->lmr_errno != EINPROGRESS) &&
 629                             !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
 630                             (nmp->nm_tprintf_initial_delay != 0) &&
 631                             ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
 632                                 lck_mtx_unlock(&nmp->nm_lock);
 633                                 lastmsg = now.tv_sec;
 634                                 nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 1);
 635                                 wentdown = 1;
 636                         } else {
 637                                 lck_mtx_unlock(&nmp->nm_lock);
 638                         }
 639
 640                         if (msgreq->lmr_errno == EINPROGRESS) {
 641                                 /*
 642                                  * We've got a blocked lock request that we are
 643                                  * going to retry.  First, we'll want to try to
 644                                  * send a cancel for the previous request.
 645                                  *
 646                                  * Clear errno so if we don't get a response
 647                                  * to the resend we'll call nfs_down().
 648                                  * Also reset timeout because we'll expect a
 649                                  * quick response to the cancel/resend (even if
 650                                  * it is NLM_BLOCKED).
 651                                  */
 652                                 msg->lm_flags |= LOCKD_MSG_CANCEL;
 653                                 nfs_lockdmsg_dequeue(msgreq);
 654                                 msg->lm_xid = nfs_lockxid_get();
 655                                 nfs_lockdmsg_enqueue(msgreq);
 656                                 msgreq->lmr_saved_errno = msgreq->lmr_errno;
 657                                 msgreq->lmr_errno = 0;
 658                                 msgreq->lmr_answered = 0;
 659                                 timeo = 2;
 660                                 /* send cancel then resend request */
 661                                 continue;
 662                         }
 663
 664                         /*
 665                          * We timed out, so we will resend the request.
 666                          */
 667                         if (!(flags & R_RECOVER)) {
 668                                 timeo *= 2;
 669                         }
 670                         if (timeo > 30) {
 671                                 timeo = 30;
 672                         }
 673                         /* resend request */
 674                         continue;
 675                 }
 676
 677                 /* we got a reponse, so the server's lockd is OK */
 678                 nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
 679                     wentdown ? "lockd alive again" : NULL);
 680                 wentdown = 0;
 681
 682                 if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
 683                         /*
 684                          * The lock request was denied because the server lockd is
 685                          * still in its grace period.  So, we need to try the
 686                          * request again in a little bit.  Return the GRACE error so
 687                          * the higher levels can perform the retry.
 688                          */
 689                         msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
 690                 }
 691
 692                 if (msgreq->lmr_errno == EINPROGRESS) {
 693                         /* got NLM_BLOCKED response */
 694                         /* need to wait for NLM_GRANTED */
 695                         timeo = 30;
 696                         msgreq->lmr_answered = 0;
 697                         goto wait_for_granted;
 698                 }
 699
 700                 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
 701                     (msgreq->lmr_saved_errno == EINPROGRESS)) {
 702                         /*
 703                          * We just got a successful reply to the
 704                          * cancel of the previous blocked lock request.
 705                          * Now, go ahead and return a DENIED error so the
 706                          * higher levels can resend the request.
 707                          */
 708                         msg->lm_flags &= ~LOCKD_MSG_CANCEL;
 709                         error = NFSERR_DENIED;
 710                         /* Will dequeue msgreq after the following break at the end of this routine */
 711                         break;
 712                 }
 713
 714                 /*
 715                  * If the blocked lock request was cancelled.
 716                  * Restore the error condition from when we
 717                  * originally bailed on the request.
 718                  */
 719                 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
 720                         msg->lm_flags &= ~LOCKD_MSG_CANCEL;
 721                         error = msgreq->lmr_saved_errno;
 722                 } else {
 723                         error = msgreq->lmr_errno;
 724                 }
 725
 726                 nmp = NFSTONMP(np);
 727                 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 728                         /*
 729                          * We have NO evidence that locks work and lockd
 730                          * returned ENOTSUP.  Let's take this as a hint
 731                          * that locks aren't supported and disable them
 732                          * for this mount.
 733                          */
 734                         nfs_lockdmsg_dequeue(msgreq);
 735                         lck_mtx_unlock(nfs_lock_mutex);
 736                         lck_mtx_lock(&nmp->nm_lock);
 737                         if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
 738                                 nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
 739                                 nfs_lockd_mount_unregister(nmp);
 740                         }
 741                         nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
 742                         lck_mtx_unlock(&nmp->nm_lock);
 743                         printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
 744                             vfs_statfs(nmp->nm_mountp)->f_mntfromname);
 745                         return error;
 746                 }
 747                 if (!error) {
 748                         /* record that NFS file locking has worked on this mount */
 749                         if (nmp) {
 750                                 lck_mtx_lock(&nmp->nm_lock);
 751                                 if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 752                                         nmp->nm_state |= NFSSTA_LOCKSWORK;
 753                                 }
 754                                 lck_mtx_unlock(&nmp->nm_lock);
 755                         }
 756                 }
 757                 break;
 758         }
 759
 760         nfs_lockdmsg_dequeue(msgreq);
 761
 762         lck_mtx_unlock(nfs_lock_mutex);
 763
 764         return error;
 765 }
 766
 767 /*
 768  * Send an NLM LOCK message to the server
 769  */
 770 int
 771 nfs3_setlock_rpc(
 772         nfsnode_t np,
 773         struct nfs_open_file *nofp,
 774         struct nfs_file_lock *nflp,
 775         int reclaim,
 776         int flags,
 777         thread_t thd,
 778         kauth_cred_t cred)
 779 {
 780         struct nfs_lock_owner *nlop = nflp->nfl_owner;
 781         struct nfsmount *nmp;
 782         int error;
 783         LOCKD_MSG_REQUEST msgreq;
 784         LOCKD_MSG *msg;
 785
 786         nmp = NFSTONMP(np);
 787         if (nfs_mount_gone(nmp)) {
 788                 return ENXIO;
 789         }
 790
 791         if (!nlop->nlo_open_owner) {
 792                 nfs_open_owner_ref(nofp->nof_owner);
 793                 nlop->nlo_open_owner = nofp->nof_owner;
 794         }
 795         if ((error = nfs_lock_owner_set_busy(nlop, thd))) {
 796                 return error;
 797         }
 798
 799         /* set up lock message request structure */
 800         bzero(&msgreq, sizeof(msgreq));
 801         msg = &msgreq.lmr_msg;
 802         msg->lm_version = LOCKD_MSG_VERSION;
 803         if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) {
 804                 msg->lm_flags |= LOCKD_MSG_BLOCK;
 805         }
 806         if (reclaim) {
 807                 msg->lm_flags |= LOCKD_MSG_RECLAIM;
 808         }
 809         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 810         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 811         cru2x(cred, &msg->lm_cred);
 812
 813         msg->lm_fl.l_whence = SEEK_SET;
 814         msg->lm_fl.l_start = nflp->nfl_start;
 815         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
 816         msg->lm_fl.l_type = nflp->nfl_type;
 817         msg->lm_fl.l_pid = nlop->nlo_pid;
 818
 819         error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
 820
 821         nfs_lock_owner_clear_busy(nlop);
 822         return error;
 823 }
 824
 825 /*
 826  * Send an NLM UNLOCK message to the server
 827  */
 828 int
 829 nfs3_unlock_rpc(
 830         nfsnode_t np,
 831         struct nfs_lock_owner *nlop,
 832         __unused int type,
 833         uint64_t start,
 834         uint64_t end,
 835         int flags,
 836         thread_t thd,
 837         kauth_cred_t cred)
 838 {
 839         struct nfsmount *nmp;
 840         LOCKD_MSG_REQUEST msgreq;
 841         LOCKD_MSG *msg;
 842
 843         nmp = NFSTONMP(np);
 844         if (!nmp) {
 845                 return ENXIO;
 846         }
 847
 848         /* set up lock message request structure */
 849         bzero(&msgreq, sizeof(msgreq));
 850         msg = &msgreq.lmr_msg;
 851         msg->lm_version = LOCKD_MSG_VERSION;
 852         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 853         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 854         cru2x(cred, &msg->lm_cred);
 855
 856         msg->lm_fl.l_whence = SEEK_SET;
 857         msg->lm_fl.l_start = start;
 858         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
 859         msg->lm_fl.l_type = F_UNLCK;
 860         msg->lm_fl.l_pid = nlop->nlo_pid;
 861
 862         return nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd);
 863 }
 864
 865 /*
 866  * Send an NLM LOCK TEST message to the server
 867  */
 868 int
 869 nfs3_getlock_rpc(
 870         nfsnode_t np,
 871         struct nfs_lock_owner *nlop,
 872         struct flock *fl,
 873         uint64_t start,
 874         uint64_t end,
 875         vfs_context_t ctx)
 876 {
 877         struct nfsmount *nmp;
 878         int error;
 879         LOCKD_MSG_REQUEST msgreq;
 880         LOCKD_MSG *msg;
 881
 882         nmp = NFSTONMP(np);
 883         if (nfs_mount_gone(nmp)) {
 884                 return ENXIO;
 885         }
 886
 887         /* set up lock message request structure */
 888         bzero(&msgreq, sizeof(msgreq));
 889         msg = &msgreq.lmr_msg;
 890         msg->lm_version = LOCKD_MSG_VERSION;
 891         msg->lm_flags |= LOCKD_MSG_TEST;
 892         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 893         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 894         cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
 895
 896         msg->lm_fl.l_whence = SEEK_SET;
 897         msg->lm_fl.l_start = start;
 898         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
 899         msg->lm_fl.l_type = fl->l_type;
 900         msg->lm_fl.l_pid = nlop->nlo_pid;
 901
 902         error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
 903
 904         if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
 905                 if (msg->lm_fl.l_type != F_UNLCK) {
 906                         fl->l_type = msg->lm_fl.l_type;
 907                         fl->l_pid = msg->lm_fl.l_pid;
 908                         fl->l_start = msg->lm_fl.l_start;
 909                         fl->l_len = msg->lm_fl.l_len;
 910                         fl->l_whence = SEEK_SET;
 911                 } else {
 912                         fl->l_type = F_UNLCK;
 913                 }
 914         }
 915
 916         return error;
 917 }
 918
 919 /*
 920  * nfslockdans --
 921  *      NFS advisory byte-level locks answer from the lock daemon.
 922  */
 923 int
 924 nfslockdans(proc_t p, struct lockd_ans *ansp)
 925 {
 926         LOCKD_MSG_REQUEST *msgreq;
 927         int error;
 928
 929         /* Let root make this call. */
 930         error = proc_suser(p);
 931         if (error) {
 932                 return error;
 933         }
 934
 935         /* the version should match, or we're out of sync */
 936         if (ansp->la_version != LOCKD_ANS_VERSION) {
 937                 return EINVAL;
 938         }
 939
 940         lck_mtx_lock(nfs_lock_mutex);
 941
 942         /* try to find the lockd message by transaction id (cookie) */
 943         msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
 944         if (ansp->la_flags & LOCKD_ANS_GRANTED) {
 945                 /*
 946                  * We can't depend on the granted message having our cookie,
 947                  * so we check the answer against the lockd message found.
 948                  * If no message was found or it doesn't match the answer,
 949                  * we look for the lockd message by the answer's lock info.
 950                  */
 951                 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) {
 952                         msgreq = nfs_lockdmsg_find_by_answer(ansp);
 953                 }
 954                 /*
 955                  * We need to make sure this request isn't being cancelled
 956                  * If it is, we don't want to accept the granted message.
 957                  */
 958                 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) {
 959                         msgreq = NULL;
 960                 }
 961         }
 962         if (!msgreq) {
 963                 lck_mtx_unlock(nfs_lock_mutex);
 964                 return EPIPE;
 965         }
 966
 967         msgreq->lmr_errno = ansp->la_errno;
 968         if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
 969                 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
 970                         if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) {
 971                                 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
 972                         } else {
 973                                 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
 974                         }
 975                         msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
 976                         msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
 977                         msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
 978                 } else {
 979                         msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
 980                 }
 981         }
 982         if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) {
 983                 msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
 984         }
 985
 986         msgreq->lmr_answered = 1;
 987         lck_mtx_unlock(nfs_lock_mutex);
 988         wakeup(msgreq);
 989
 990         return 0;
 991 }
 992
 993 /*
 994  * nfslockdnotify --
 995  *      NFS host restart notification from the lock daemon.
 996  *
 997  * Used to initiate reclaiming of held locks when a server we
 998  * have mounted reboots.
 999  */
1000 int
1001 nfslockdnotify(proc_t p, user_addr_t argp)
1002 {
1003         int error, i, headsize;
1004         struct lockd_notify ln;
1005         struct nfsmount *nmp;
1006         struct sockaddr *saddr;
1007
1008         /* Let root make this call. */
1009         error = proc_suser(p);
1010         if (error) {
1011                 return error;
1012         }
1013
1014         headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
1015         error = copyin(argp, &ln, headsize);
1016         if (error) {
1017                 return error;
1018         }
1019         if (ln.ln_version != LOCKD_NOTIFY_VERSION) {
1020                 return EINVAL;
1021         }
1022         if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) {
1023                 return EINVAL;
1024         }
1025         argp += headsize;
1026         saddr = (struct sockaddr *)&ln.ln_addr[0];
1027
1028         lck_mtx_lock(nfs_lock_mutex);
1029
1030         for (i = 0; i < ln.ln_addrcount; i++) {
1031                 error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
1032                 if (error) {
1033                         break;
1034                 }
1035                 argp += sizeof(ln.ln_addr[0]);
1036                 /* scan lockd mount list for match to this address */
1037                 TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
1038                         /* check if address matches this mount's server address */
1039                         if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) {
1040                                 continue;
1041                         }
1042                         /* We have a match!  Mark it as needing recovery. */
1043                         lck_mtx_lock(&nmp->nm_lock);
1044                         nfs_need_recover(nmp, 0);
1045                         lck_mtx_unlock(&nmp->nm_lock);
1046                 }
1047         }
1048
1049         lck_mtx_unlock(nfs_lock_mutex);
1050
1051         return error;
1052 }