bsd/nfs/nfs_lock.c

   1 /*
   2  * Copyright (c) 2002-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  40  *    promote products derived from this software without specific prior
  41  *    written permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  *
  55  *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
  56  */
  57
  58 #include <nfs/nfs_conf.h>
  59 #if CONFIG_NFS_CLIENT
  60
  61 #include <sys/cdefs.h>
  62 #include <sys/param.h>
  63 #include <sys/systm.h>
  64 #include <sys/fcntl.h>
  65 #include <sys/kernel.h>         /* for hz */
  66 #include <sys/file_internal.h>
  67 #include <sys/malloc.h>
  68 #include <sys/lockf.h>          /* for hz */ /* Must come after sys/malloc.h */
  69 #include <sys/kpi_mbuf.h>
  70 #include <sys/mount_internal.h>
  71 #include <sys/proc_internal.h>  /* for p_start */
  72 #include <sys/kauth.h>
  73 #include <sys/resourcevar.h>
  74 #include <sys/socket.h>
  75 #include <sys/unistd.h>
  76 #include <sys/user.h>
  77 #include <sys/vnode_internal.h>
  78
  79 #include <kern/thread.h>
  80 #include <kern/host.h>
  81
  82 #include <machine/limits.h>
  83
  84 #include <net/if.h>
  85
  86 #include <nfs/rpcv2.h>
  87 #include <nfs/nfsproto.h>
  88 #include <nfs/nfs.h>
  89 #include <nfs/nfs_gss.h>
  90 #include <nfs/nfsmount.h>
  91 #include <nfs/nfsnode.h>
  92 #include <nfs/nfs_lock.h>
  93
  94 #include <mach/host_priv.h>
  95 #include <mach/mig_errors.h>
  96 #include <mach/host_special_ports.h>
  97 #include <lockd/lockd_mach.h>
  98
  99 extern void ipc_port_release_send(ipc_port_t);
 100
 101 /*
 102  * pending lock request messages are kept in this queue which is
 103  * kept sorted by transaction ID (xid).
 104  */
 105 static uint64_t nfs_lockxid = 0;
 106 static LOCKD_MSG_QUEUE nfs_pendlockq;
 107
 108 /* list of mounts that are (potentially) making lockd requests */
 109 TAILQ_HEAD(nfs_lockd_mount_list, nfsmount) nfs_lockd_mount_list;
 110
 111 static lck_grp_t *nfs_lock_lck_grp;
 112 static lck_mtx_t *nfs_lock_mutex;
 113
 114 void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
 115 void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
 116 int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
 117 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
 118 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
 119 uint64_t nfs_lockxid_get(void);
 120 int nfs_lockd_send_request(LOCKD_MSG *, int);
 121
 122 /*
 123  * initialize global nfs lock state
 124  */
 125 void
 126 nfs_lockinit(void)
 127 {
 128         TAILQ_INIT(&nfs_pendlockq);
 129         TAILQ_INIT(&nfs_lockd_mount_list);
 130
 131         nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
 132         nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
 133 }
 134
 135 /*
 136  * Register a mount as (potentially) making lockd requests.
 137  */
 138 void
 139 nfs_lockd_mount_register(struct nfsmount *nmp)
 140 {
 141         lck_mtx_lock(nfs_lock_mutex);
 142         TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
 143         nfs_lockd_mounts++;
 144         lck_mtx_unlock(nfs_lock_mutex);
 145 }
 146
 147 /*
 148  * Unregister a mount as (potentially) making lockd requests.
 149  *
 150  * When the lockd mount count drops to zero, then send a shutdown request to
 151  * lockd if we've sent any requests to it.
 152  */
 153 void
 154 nfs_lockd_mount_unregister(struct nfsmount *nmp)
 155 {
 156         int send_shutdown;
 157         mach_port_t lockd_port = IPC_PORT_NULL;
 158         kern_return_t kr;
 159
 160         lck_mtx_lock(nfs_lock_mutex);
 161         if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
 162                 lck_mtx_unlock(nfs_lock_mutex);
 163                 return;
 164         }
 165
 166         TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
 167         nmp->nm_ldlink.tqe_next = NFSNOLIST;
 168
 169         nfs_lockd_mounts--;
 170
 171         /* send a shutdown request if there are no more lockd mounts */
 172         send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
 173         if (send_shutdown) {
 174                 nfs_lockd_request_sent = 0;
 175         }
 176
 177         lck_mtx_unlock(nfs_lock_mutex);
 178
 179         if (!send_shutdown) {
 180                 return;
 181         }
 182
 183         /*
 184          * Let lockd know that it is no longer needed for any NFS mounts
 185          */
 186         kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 187         if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
 188                 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
 189                     kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
 190                     (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
 191                 return;
 192         }
 193
 194         kr = lockd_shutdown(lockd_port);
 195         if (kr != KERN_SUCCESS) {
 196                 printf("nfs_lockd_mount_change: shutdown %d\n", kr);
 197         }
 198
 199         ipc_port_release_send(lockd_port);
 200 }
 201
 202 /*
 203  * insert a lock request message into the pending queue
 204  * (nfs_lock_mutex must be held)
 205  */
 206 void
 207 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
 208 {
 209         LOCKD_MSG_REQUEST *mr;
 210
 211         mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
 212         if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
 213                 /* fast path: empty queue or new largest xid */
 214                 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
 215                 return;
 216         }
 217         /* slow path: need to walk list to find insertion point */
 218         while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
 219                 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
 220         }
 221         if (mr) {
 222                 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
 223         } else {
 224                 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
 225         }
 226 }
 227
 228 /*
 229  * remove a lock request message from the pending queue
 230  * (nfs_lock_mutex must be held)
 231  */
 232 void
 233 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
 234 {
 235         TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
 236 }
 237
 238 /*
 239  * find a pending lock request message by xid
 240  *
 241  * We search from the head of the list assuming that the message we're
 242  * looking for is for an older request (because we have an answer to it).
 243  * This assumes that lock request will be answered primarily in FIFO order.
 244  * However, this may not be the case if there are blocked requests.  We may
 245  * want to move blocked requests to a separate queue (but that'll complicate
 246  * duplicate xid checking).
 247  *
 248  * (nfs_lock_mutex must be held)
 249  */
 250 LOCKD_MSG_REQUEST *
 251 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
 252 {
 253         LOCKD_MSG_REQUEST *mr;
 254
 255         TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
 256                 if (mr->lmr_msg.lm_xid == lockxid) {
 257                         return mr;
 258                 }
 259                 if (mr->lmr_msg.lm_xid > lockxid) {
 260                         return NULL;
 261                 }
 262         }
 263         return mr;
 264 }
 265
 266 /*
 267  * Because we can't depend on nlm_granted messages containing the same
 268  * cookie we sent with the original lock request, we need code to test
 269  * if an nlm_granted answer matches the lock request.  We also need code
 270  * that can find a lockd message based solely on the nlm_granted answer.
 271  */
 272
 273 /*
 274  * compare lockd message to answer
 275  *
 276  * returns 0 on equality and 1 if different
 277  */
 278 int
 279 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
 280 {
 281         if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
 282                 return 1;
 283         }
 284         if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) {
 285                 return 1;
 286         }
 287         if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) {
 288                 return 1;
 289         }
 290         if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) {
 291                 return 1;
 292         }
 293         if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) {
 294                 return 1;
 295         }
 296         if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) {
 297                 return 1;
 298         }
 299         return 0;
 300 }
 301
 302 /*
 303  * find a pending lock request message based on the lock info provided
 304  * in the lockd_ans/nlm_granted data.  We need this because we can't
 305  * depend on nlm_granted messages containing the same cookie we sent
 306  * with the original lock request.
 307  *
 308  * We search from the head of the list assuming that the message we're
 309  * looking for is for an older request (because we have an answer to it).
 310  * This assumes that lock request will be answered primarily in FIFO order.
 311  * However, this may not be the case if there are blocked requests.  We may
 312  * want to move blocked requests to a separate queue (but that'll complicate
 313  * duplicate xid checking).
 314  *
 315  * (nfs_lock_mutex must be held)
 316  */
 317 LOCKD_MSG_REQUEST *
 318 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
 319 {
 320         LOCKD_MSG_REQUEST *mr;
 321
 322         if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) {
 323                 return NULL;
 324         }
 325         TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
 326                 if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) {
 327                         break;
 328                 }
 329         }
 330         return mr;
 331 }
 332
 333 /*
 334  * return the next unique lock request transaction ID
 335  * (nfs_lock_mutex must be held)
 336  */
 337 uint64_t
 338 nfs_lockxid_get(void)
 339 {
 340         LOCKD_MSG_REQUEST *mr;
 341
 342         /* derive initial lock xid from system time */
 343         if (!nfs_lockxid) {
 344                 /*
 345                  * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
 346                  * due to a broken clock) because we immediately increment it
 347                  * and we guarantee to never use xid 0.  So, nfs_lockxid should only
 348                  * ever be 0 the first time this function is called.
 349                  */
 350                 struct timeval tv;
 351                 microtime(&tv);
 352                 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
 353         }
 354
 355         /* make sure we get a unique xid */
 356         do {
 357                 /* Skip zero xid if it should ever happen.  */
 358                 if (++nfs_lockxid == 0) {
 359                         nfs_lockxid++;
 360                 }
 361                 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
 362                     (mr->lmr_msg.lm_xid < nfs_lockxid)) {
 363                         /* fast path: empty queue or new largest xid */
 364                         break;
 365                 }
 366                 /* check if xid is already in use */
 367         } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
 368
 369         return nfs_lockxid;
 370 }
 371
 372 #define MACH_MAX_TRIES 3
 373
 374 int
 375 nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
 376 {
 377         kern_return_t kr;
 378         int retries = 0;
 379         mach_port_t lockd_port = IPC_PORT_NULL;
 380
 381         kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 382         if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) {
 383                 return ENOTSUP;
 384         }
 385
 386         do {
 387                 /* In the kernel all mach messaging is interruptable */
 388                 do {
 389                         kr = lockd_request(
 390                                 lockd_port,
 391                                 msg->lm_version,
 392                                 msg->lm_flags,
 393                                 msg->lm_xid,
 394                                 msg->lm_fl.l_start,
 395                                 msg->lm_fl.l_len,
 396                                 msg->lm_fl.l_pid,
 397                                 msg->lm_fl.l_type,
 398                                 msg->lm_fl.l_whence,
 399                                 (uint32_t *)&msg->lm_addr,
 400                                 (uint32_t *)&msg->lm_cred,
 401                                 msg->lm_fh_len,
 402                                 msg->lm_fh);
 403                         if (kr != KERN_SUCCESS) {
 404                                 printf("lockd_request received %d!\n", kr);
 405                         }
 406                 } while (!interruptable && kr == MACH_SEND_INTERRUPTED);
 407         } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
 408
 409         ipc_port_release_send(lockd_port);
 410         switch (kr) {
 411         case MACH_SEND_INTERRUPTED:
 412                 return EINTR;
 413         default:
 414                 /*
 415                  * Other MACH or MIG errors we will retry. Eventually
 416                  * we will call nfs_down and allow the user to disable
 417                  * locking.
 418                  */
 419                 return EAGAIN;
 420         }
 421 }
 422
 423 /*
 424  * NFS advisory byte-level locks (client)
 425  */
 426 int
 427 nfs3_lockd_request(
 428         nfsnode_t np,
 429         int type,
 430         LOCKD_MSG_REQUEST *msgreq,
 431         int flags,
 432         thread_t thd)
 433 {
 434         LOCKD_MSG *msg = &msgreq->lmr_msg;
 435         int error, error2;
 436         int interruptable, slpflag;
 437         struct nfsmount *nmp;
 438         struct timeval now;
 439         int timeo, starttime, endtime, lastmsg, wentdown = 0;
 440         struct timespec ts;
 441         struct sockaddr *saddr;
 442
 443         nmp = NFSTONMP(np);
 444         if (!nmp || !nmp->nm_saddr) {
 445                 return ENXIO;
 446         }
 447
 448         lck_mtx_lock(&nmp->nm_lock);
 449         saddr = nmp->nm_saddr;
 450         bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
 451         if (nmp->nm_vers == NFS_VER3) {
 452                 msg->lm_flags |= LOCKD_MSG_NFSV3;
 453         }
 454
 455         if (nmp->nm_sotype != SOCK_DGRAM) {
 456                 msg->lm_flags |= LOCKD_MSG_TCP;
 457         }
 458
 459         microuptime(&now);
 460         starttime = now.tv_sec;
 461         lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
 462         interruptable = NMFLAG(nmp, INTR);
 463         lck_mtx_unlock(&nmp->nm_lock);
 464
 465         lck_mtx_lock(nfs_lock_mutex);
 466
 467         /* allocate unique xid */
 468         msg->lm_xid = nfs_lockxid_get();
 469         nfs_lockdmsg_enqueue(msgreq);
 470
 471         timeo = 4;
 472
 473         for (;;) {
 474                 nfs_lockd_request_sent = 1;
 475
 476                 /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
 477                 lck_mtx_unlock(nfs_lock_mutex);
 478                 error = nfs_lockd_send_request(msg, interruptable);
 479                 lck_mtx_lock(nfs_lock_mutex);
 480                 if (error && error != EAGAIN) {
 481                         break;
 482                 }
 483
 484                 /*
 485                  * Always wait for an answer.  Not waiting for unlocks could
 486                  * cause a lock to be left if the unlock request gets dropped.
 487                  */
 488
 489                 /*
 490                  * Retry if it takes too long to get a response.
 491                  *
 492                  * The timeout numbers were picked out of thin air... they start
 493                  * at 4 and double each timeout with a max of 30 seconds.
 494                  *
 495                  * In order to maintain responsiveness, we pass a small timeout
 496                  * to msleep and calculate the timeouts ourselves.  This allows
 497                  * us to pick up on mount changes quicker.
 498                  */
 499 wait_for_granted:
 500                 error = EWOULDBLOCK;
 501                 slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
 502                 ts.tv_sec = 2;
 503                 ts.tv_nsec = 0;
 504                 microuptime(&now);
 505                 endtime = now.tv_sec + timeo;
 506                 while (now.tv_sec < endtime) {
 507                         error = error2 = 0;
 508                         if (!msgreq->lmr_answered) {
 509                                 error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
 510                                 slpflag = 0;
 511                         }
 512                         if (msgreq->lmr_answered) {
 513                                 /*
 514                                  * Note: it's possible to have a lock granted at
 515                                  * essentially the same time that we get interrupted.
 516                                  * Since the lock may be granted, we can't return an
 517                                  * error from this request or we might not unlock the
 518                                  * lock that's been granted.
 519                                  */
 520                                 nmp = NFSTONMP(np);
 521                                 if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
 522                                     (nmp->nm_state & NFSSTA_LOCKSWORK)) {
 523                                         /*
 524                                          * We have evidence that locks work, yet lockd
 525                                          * returned ENOTSUP.  This is probably because
 526                                          * it was unable to contact the server's lockd
 527                                          * to send it the request.
 528                                          *
 529                                          * Because we know locks work, we'll consider
 530                                          * this failure to be a timeout.
 531                                          */
 532                                         error = EWOULDBLOCK;
 533                                 } else {
 534                                         error = 0;
 535                                 }
 536                                 break;
 537                         }
 538                         if (error != EWOULDBLOCK) {
 539                                 break;
 540                         }
 541                         /* check that we still have our mount... */
 542                         /* ...and that we still support locks */
 543                         /* ...and that there isn't a recovery pending */
 544                         nmp = NFSTONMP(np);
 545                         if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 546                                 error = error2;
 547                                 if (type == F_UNLCK) {
 548                                         printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 549                                 }
 550                                 break;
 551                         }
 552                         lck_mtx_lock(&nmp->nm_lock);
 553                         if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 554                                 lck_mtx_unlock(&nmp->nm_lock);
 555                                 break;
 556                         }
 557                         if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 558                                 /* recovery pending... return an error that'll get this operation restarted */
 559                                 error = NFSERR_GRACE;
 560                                 lck_mtx_unlock(&nmp->nm_lock);
 561                                 break;
 562                         }
 563                         interruptable = NMFLAG(nmp, INTR);
 564                         lck_mtx_unlock(&nmp->nm_lock);
 565                         microuptime(&now);
 566                 }
 567                 if (error) {
 568                         /* check that we still have our mount... */
 569                         nmp = NFSTONMP(np);
 570                         if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 571                                 error = error2;
 572                                 if (error2 != EINTR) {
 573                                         if (type == F_UNLCK) {
 574                                                 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 575                                         }
 576                                         break;
 577                                 }
 578                         }
 579                         /* ...and that we still support locks */
 580                         lck_mtx_lock(&nmp->nm_lock);
 581                         if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 582                                 if (error == EWOULDBLOCK) {
 583                                         error = ENOTSUP;
 584                                 }
 585                                 lck_mtx_unlock(&nmp->nm_lock);
 586                                 break;
 587                         }
 588                         /* ...and that there isn't a recovery pending */
 589                         if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 590                                 /* recovery pending... return to allow recovery to occur */
 591                                 error = NFSERR_DENIED;
 592                                 lck_mtx_unlock(&nmp->nm_lock);
 593                                 break;
 594                         }
 595                         interruptable = NMFLAG(nmp, INTR);
 596                         if ((error != EWOULDBLOCK) ||
 597                             ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
 598                             ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
 599                                 if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
 600                                         /* give up if this is for recovery and taking too long */
 601                                         error = ETIMEDOUT;
 602                                 } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
 603                                         /* recovery pending... return an error that'll get this operation restarted */
 604                                         error = NFSERR_GRACE;
 605                                 }
 606                                 lck_mtx_unlock(&nmp->nm_lock);
 607                                 /*
 608                                  * We're going to bail on this request.
 609                                  * If we were a blocked lock request, send a cancel.
 610                                  */
 611                                 if ((msgreq->lmr_errno == EINPROGRESS) &&
 612                                     !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
 613                                         /* set this request up as a cancel */
 614                                         msg->lm_flags |= LOCKD_MSG_CANCEL;
 615                                         nfs_lockdmsg_dequeue(msgreq);
 616                                         msg->lm_xid = nfs_lockxid_get();
 617                                         nfs_lockdmsg_enqueue(msgreq);
 618                                         msgreq->lmr_saved_errno = error;
 619                                         msgreq->lmr_errno = 0;
 620                                         msgreq->lmr_answered = 0;
 621                                         /* reset timeout */
 622                                         timeo = 2;
 623                                         /* send cancel request */
 624                                         continue;
 625                                 }
 626                                 break;
 627                         }
 628
 629                         /* warn if we're not getting any response */
 630                         microuptime(&now);
 631                         if ((msgreq->lmr_errno != EINPROGRESS) &&
 632                             !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
 633                             (nmp->nm_tprintf_initial_delay != 0) &&
 634                             ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
 635                                 lck_mtx_unlock(&nmp->nm_lock);
 636                                 lastmsg = now.tv_sec;
 637                                 nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 1);
 638                                 wentdown = 1;
 639                         } else {
 640                                 lck_mtx_unlock(&nmp->nm_lock);
 641                         }
 642
 643                         if (msgreq->lmr_errno == EINPROGRESS) {
 644                                 /*
 645                                  * We've got a blocked lock request that we are
 646                                  * going to retry.  First, we'll want to try to
 647                                  * send a cancel for the previous request.
 648                                  *
 649                                  * Clear errno so if we don't get a response
 650                                  * to the resend we'll call nfs_down().
 651                                  * Also reset timeout because we'll expect a
 652                                  * quick response to the cancel/resend (even if
 653                                  * it is NLM_BLOCKED).
 654                                  */
 655                                 msg->lm_flags |= LOCKD_MSG_CANCEL;
 656                                 nfs_lockdmsg_dequeue(msgreq);
 657                                 msg->lm_xid = nfs_lockxid_get();
 658                                 nfs_lockdmsg_enqueue(msgreq);
 659                                 msgreq->lmr_saved_errno = msgreq->lmr_errno;
 660                                 msgreq->lmr_errno = 0;
 661                                 msgreq->lmr_answered = 0;
 662                                 timeo = 2;
 663                                 /* send cancel then resend request */
 664                                 continue;
 665                         }
 666
 667                         /*
 668                          * We timed out, so we will resend the request.
 669                          */
 670                         if (!(flags & R_RECOVER)) {
 671                                 timeo *= 2;
 672                         }
 673                         if (timeo > 30) {
 674                                 timeo = 30;
 675                         }
 676                         /* resend request */
 677                         continue;
 678                 }
 679
 680                 /* we got a reponse, so the server's lockd is OK */
 681                 nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
 682                     wentdown ? "lockd alive again" : NULL);
 683                 wentdown = 0;
 684
 685                 if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
 686                         /*
 687                          * The lock request was denied because the server lockd is
 688                          * still in its grace period.  So, we need to try the
 689                          * request again in a little bit.  Return the GRACE error so
 690                          * the higher levels can perform the retry.
 691                          */
 692                         msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
 693                 }
 694
 695                 if (msgreq->lmr_errno == EINPROGRESS) {
 696                         /* got NLM_BLOCKED response */
 697                         /* need to wait for NLM_GRANTED */
 698                         timeo = 30;
 699                         msgreq->lmr_answered = 0;
 700                         goto wait_for_granted;
 701                 }
 702
 703                 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
 704                     (msgreq->lmr_saved_errno == EINPROGRESS)) {
 705                         /*
 706                          * We just got a successful reply to the
 707                          * cancel of the previous blocked lock request.
 708                          * Now, go ahead and return a DENIED error so the
 709                          * higher levels can resend the request.
 710                          */
 711                         msg->lm_flags &= ~LOCKD_MSG_CANCEL;
 712                         error = NFSERR_DENIED;
 713                         /* Will dequeue msgreq after the following break at the end of this routine */
 714                         break;
 715                 }
 716
 717                 /*
 718                  * If the blocked lock request was cancelled.
 719                  * Restore the error condition from when we
 720                  * originally bailed on the request.
 721                  */
 722                 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
 723                         msg->lm_flags &= ~LOCKD_MSG_CANCEL;
 724                         error = msgreq->lmr_saved_errno;
 725                 } else {
 726                         error = msgreq->lmr_errno;
 727                 }
 728
 729                 nmp = NFSTONMP(np);
 730                 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 731                         /*
 732                          * We have NO evidence that locks work and lockd
 733                          * returned ENOTSUP.  Let's take this as a hint
 734                          * that locks aren't supported and disable them
 735                          * for this mount.
 736                          */
 737                         nfs_lockdmsg_dequeue(msgreq);
 738                         lck_mtx_unlock(nfs_lock_mutex);
 739                         lck_mtx_lock(&nmp->nm_lock);
 740                         if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
 741                                 nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
 742                                 nfs_lockd_mount_unregister(nmp);
 743                         }
 744                         nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
 745                         lck_mtx_unlock(&nmp->nm_lock);
 746                         printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
 747                             vfs_statfs(nmp->nm_mountp)->f_mntfromname);
 748                         return error;
 749                 }
 750                 if (!error) {
 751                         /* record that NFS file locking has worked on this mount */
 752                         if (nmp) {
 753                                 lck_mtx_lock(&nmp->nm_lock);
 754                                 if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 755                                         nmp->nm_state |= NFSSTA_LOCKSWORK;
 756                                 }
 757                                 lck_mtx_unlock(&nmp->nm_lock);
 758                         }
 759                 }
 760                 break;
 761         }
 762
 763         nfs_lockdmsg_dequeue(msgreq);
 764
 765         lck_mtx_unlock(nfs_lock_mutex);
 766
 767         return error;
 768 }
 769
 770 /*
 771  * Send an NLM LOCK message to the server
 772  */
 773 int
 774 nfs3_setlock_rpc(
 775         nfsnode_t np,
 776         struct nfs_open_file *nofp,
 777         struct nfs_file_lock *nflp,
 778         int reclaim,
 779         int flags,
 780         thread_t thd,
 781         kauth_cred_t cred)
 782 {
 783         struct nfs_lock_owner *nlop = nflp->nfl_owner;
 784         struct nfsmount *nmp;
 785         int error;
 786         LOCKD_MSG_REQUEST msgreq;
 787         LOCKD_MSG *msg;
 788
 789         nmp = NFSTONMP(np);
 790         if (nfs_mount_gone(nmp)) {
 791                 return ENXIO;
 792         }
 793
 794         if (!nlop->nlo_open_owner) {
 795                 nfs_open_owner_ref(nofp->nof_owner);
 796                 nlop->nlo_open_owner = nofp->nof_owner;
 797         }
 798         if ((error = nfs_lock_owner_set_busy(nlop, thd))) {
 799                 return error;
 800         }
 801
 802         /* set up lock message request structure */
 803         bzero(&msgreq, sizeof(msgreq));
 804         msg = &msgreq.lmr_msg;
 805         msg->lm_version = LOCKD_MSG_VERSION;
 806         if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) {
 807                 msg->lm_flags |= LOCKD_MSG_BLOCK;
 808         }
 809         if (reclaim) {
 810                 msg->lm_flags |= LOCKD_MSG_RECLAIM;
 811         }
 812         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 813         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 814         cru2x(cred, &msg->lm_cred);
 815
 816         msg->lm_fl.l_whence = SEEK_SET;
 817         msg->lm_fl.l_start = nflp->nfl_start;
 818         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
 819         msg->lm_fl.l_type = nflp->nfl_type;
 820         msg->lm_fl.l_pid = nlop->nlo_pid;
 821
 822         error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
 823
 824         nfs_lock_owner_clear_busy(nlop);
 825         return error;
 826 }
 827
 828 /*
 829  * Send an NLM UNLOCK message to the server
 830  */
 831 int
 832 nfs3_unlock_rpc(
 833         nfsnode_t np,
 834         struct nfs_lock_owner *nlop,
 835         __unused int type,
 836         uint64_t start,
 837         uint64_t end,
 838         int flags,
 839         thread_t thd,
 840         kauth_cred_t cred)
 841 {
 842         struct nfsmount *nmp;
 843         LOCKD_MSG_REQUEST msgreq;
 844         LOCKD_MSG *msg;
 845
 846         nmp = NFSTONMP(np);
 847         if (!nmp) {
 848                 return ENXIO;
 849         }
 850
 851         /* set up lock message request structure */
 852         bzero(&msgreq, sizeof(msgreq));
 853         msg = &msgreq.lmr_msg;
 854         msg->lm_version = LOCKD_MSG_VERSION;
 855         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 856         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 857         cru2x(cred, &msg->lm_cred);
 858
 859         msg->lm_fl.l_whence = SEEK_SET;
 860         msg->lm_fl.l_start = start;
 861         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
 862         msg->lm_fl.l_type = F_UNLCK;
 863         msg->lm_fl.l_pid = nlop->nlo_pid;
 864
 865         return nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd);
 866 }
 867
 868 /*
 869  * Send an NLM LOCK TEST message to the server
 870  */
 871 int
 872 nfs3_getlock_rpc(
 873         nfsnode_t np,
 874         struct nfs_lock_owner *nlop,
 875         struct flock *fl,
 876         uint64_t start,
 877         uint64_t end,
 878         vfs_context_t ctx)
 879 {
 880         struct nfsmount *nmp;
 881         int error;
 882         LOCKD_MSG_REQUEST msgreq;
 883         LOCKD_MSG *msg;
 884
 885         nmp = NFSTONMP(np);
 886         if (nfs_mount_gone(nmp)) {
 887                 return ENXIO;
 888         }
 889
 890         /* set up lock message request structure */
 891         bzero(&msgreq, sizeof(msgreq));
 892         msg = &msgreq.lmr_msg;
 893         msg->lm_version = LOCKD_MSG_VERSION;
 894         msg->lm_flags |= LOCKD_MSG_TEST;
 895         msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
 896         bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
 897         cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
 898
 899         msg->lm_fl.l_whence = SEEK_SET;
 900         msg->lm_fl.l_start = start;
 901         msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
 902         msg->lm_fl.l_type = fl->l_type;
 903         msg->lm_fl.l_pid = nlop->nlo_pid;
 904
 905         error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
 906
 907         if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
 908                 if (msg->lm_fl.l_type != F_UNLCK) {
 909                         fl->l_type = msg->lm_fl.l_type;
 910                         fl->l_pid = msg->lm_fl.l_pid;
 911                         fl->l_start = msg->lm_fl.l_start;
 912                         fl->l_len = msg->lm_fl.l_len;
 913                         fl->l_whence = SEEK_SET;
 914                 } else {
 915                         fl->l_type = F_UNLCK;
 916                 }
 917         }
 918
 919         return error;
 920 }
 921
 922 /*
 923  * nfslockdans --
 924  *      NFS advisory byte-level locks answer from the lock daemon.
 925  */
 926 int
 927 nfslockdans(proc_t p, struct lockd_ans *ansp)
 928 {
 929         LOCKD_MSG_REQUEST *msgreq;
 930         int error;
 931
 932         /* Let root make this call. */
 933         error = proc_suser(p);
 934         if (error) {
 935                 return error;
 936         }
 937
 938         /* the version should match, or we're out of sync */
 939         if (ansp->la_version != LOCKD_ANS_VERSION) {
 940                 return EINVAL;
 941         }
 942
 943         lck_mtx_lock(nfs_lock_mutex);
 944
 945         /* try to find the lockd message by transaction id (cookie) */
 946         msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
 947         if (ansp->la_flags & LOCKD_ANS_GRANTED) {
 948                 /*
 949                  * We can't depend on the granted message having our cookie,
 950                  * so we check the answer against the lockd message found.
 951                  * If no message was found or it doesn't match the answer,
 952                  * we look for the lockd message by the answer's lock info.
 953                  */
 954                 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) {
 955                         msgreq = nfs_lockdmsg_find_by_answer(ansp);
 956                 }
 957                 /*
 958                  * We need to make sure this request isn't being cancelled
 959                  * If it is, we don't want to accept the granted message.
 960                  */
 961                 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) {
 962                         msgreq = NULL;
 963                 }
 964         }
 965         if (!msgreq) {
 966                 lck_mtx_unlock(nfs_lock_mutex);
 967                 return EPIPE;
 968         }
 969
 970         msgreq->lmr_errno = ansp->la_errno;
 971         if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
 972                 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
 973                         if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) {
 974                                 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
 975                         } else {
 976                                 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
 977                         }
 978                         msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
 979                         msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
 980                         msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
 981                 } else {
 982                         msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
 983                 }
 984         }
 985         if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) {
 986                 msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
 987         }
 988
 989         msgreq->lmr_answered = 1;
 990         lck_mtx_unlock(nfs_lock_mutex);
 991         wakeup(msgreq);
 992
 993         return 0;
 994 }
 995
 996 /*
 997  * nfslockdnotify --
 998  *      NFS host restart notification from the lock daemon.
 999  *
1000  * Used to initiate reclaiming of held locks when a server we
1001  * have mounted reboots.
1002  */
1003 int
1004 nfslockdnotify(proc_t p, user_addr_t argp)
1005 {
1006         int error, i, headsize;
1007         struct lockd_notify ln;
1008         struct nfsmount *nmp;
1009         struct sockaddr *saddr;
1010
1011         /* Let root make this call. */
1012         error = proc_suser(p);
1013         if (error) {
1014                 return error;
1015         }
1016
1017         headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
1018         error = copyin(argp, &ln, headsize);
1019         if (error) {
1020                 return error;
1021         }
1022         if (ln.ln_version != LOCKD_NOTIFY_VERSION) {
1023                 return EINVAL;
1024         }
1025         if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) {
1026                 return EINVAL;
1027         }
1028         argp += headsize;
1029         saddr = (struct sockaddr *)&ln.ln_addr[0];
1030
1031         lck_mtx_lock(nfs_lock_mutex);
1032
1033         for (i = 0; i < ln.ln_addrcount; i++) {
1034                 error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
1035                 if (error) {
1036                         break;
1037                 }
1038                 argp += sizeof(ln.ln_addr[0]);
1039                 /* scan lockd mount list for match to this address */
1040                 TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
1041                         /* check if address matches this mount's server address */
1042                         if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) {
1043                                 continue;
1044                         }
1045                         /* We have a match!  Mark it as needing recovery. */
1046                         lck_mtx_lock(&nmp->nm_lock);
1047                         nfs_need_recover(nmp, 0);
1048                         lck_mtx_unlock(&nmp->nm_lock);
1049                 }
1050         }
1051
1052         lck_mtx_unlock(nfs_lock_mutex);
1053
1054         return error;
1055 }
1056
1057 #endif /* CONFIG_NFS_CLIENT */