bsd/nfs/nfs_vnops.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * This code is derived from software contributed to Berkeley by
  34  * Rick Macklem at The University of Guelph.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. All advertising materials mentioning features or use of this software
  45  *    must display the following acknowledgement:
  46  *      This product includes software developed by the University of
  47  *      California, Berkeley and its contributors.
  48  * 4. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  *
  64  *      @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
  65  * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
  66  */
  67
  68
  69 /*
  70  * vnode op calls for Sun NFS version 2 and 3
  71  */
  72 #include <sys/param.h>
  73 #include <sys/kernel.h>
  74 #include <sys/systm.h>
  75 #include <sys/resourcevar.h>
  76 #include <sys/proc_internal.h>
  77 #include <sys/kauth.h>
  78 #include <sys/mount_internal.h>
  79 #include <sys/malloc.h>
  80 #include <sys/kpi_mbuf.h>
  81 #include <sys/conf.h>
  82 #include <sys/vnode_internal.h>
  83 #include <sys/dirent.h>
  84 #include <sys/fcntl.h>
  85 #include <sys/lockf.h>
  86 #include <sys/ubc_internal.h>
  87 #include <sys/attr.h>
  88 #include <sys/signalvar.h>
  89 #include <sys/uio_internal.h>
  90 #include <sys/xattr.h>
  91
  92 #include <vfs/vfs_support.h>
  93
  94 #include <sys/vm.h>
  95
  96 #include <sys/time.h>
  97 #include <kern/clock.h>
  98 #include <libkern/OSAtomic.h>
  99
 100 #include <miscfs/fifofs/fifo.h>
 101 #include <miscfs/specfs/specdev.h>
 102
 103 #include <nfs/rpcv2.h>
 104 #include <nfs/nfsproto.h>
 105 #include <nfs/nfs.h>
 106 #include <nfs/nfsnode.h>
 107 #include <nfs/nfs_gss.h>
 108 #include <nfs/nfsmount.h>
 109 #include <nfs/nfs_lock.h>
 110 #include <nfs/xdr_subs.h>
 111 #include <nfs/nfsm_subs.h>
 112
 113 #include <net/if.h>
 114 #include <netinet/in.h>
 115 #include <netinet/in_var.h>
 116
 117 #include <vm/vm_kern.h>
 118 #include <vm/vm_pageout.h>
 119
 120 #include <kern/task.h>
 121 #include <kern/sched_prim.h>
 122
 123 #define NFS_VNOP_DBG(...) NFS_DBG(NFS_FAC_VNOP, 7, ## __VA_ARGS__)
 124 #define DEFAULT_READLINK_NOCACHE 0
 125
 126 /*
 127  * NFS vnode ops
 128  */
 129 int     nfs_vnop_lookup(struct vnop_lookup_args *);
 130 int     nfsspec_vnop_read(struct vnop_read_args *);
 131 int     nfsspec_vnop_write(struct vnop_write_args *);
 132 int     nfsspec_vnop_close(struct vnop_close_args *);
 133 #if FIFO
 134 int     nfsfifo_vnop_read(struct vnop_read_args *);
 135 int     nfsfifo_vnop_write(struct vnop_write_args *);
 136 int     nfsfifo_vnop_close(struct vnop_close_args *);
 137 #endif
 138 int     nfs_vnop_ioctl(struct vnop_ioctl_args *);
 139 int     nfs_vnop_select(struct vnop_select_args *);
 140 int     nfs_vnop_setattr(struct vnop_setattr_args *);
 141 int     nfs_vnop_fsync(struct vnop_fsync_args *);
 142 int     nfs_vnop_rename(struct vnop_rename_args *);
 143 int     nfs_vnop_readdir(struct vnop_readdir_args *);
 144 int     nfs_vnop_readlink(struct vnop_readlink_args *);
 145 int     nfs_vnop_pathconf(struct vnop_pathconf_args *);
 146 int     nfs_vnop_pagein(struct vnop_pagein_args *);
 147 int     nfs_vnop_pageout(struct vnop_pageout_args *);
 148 int     nfs_vnop_blktooff(struct vnop_blktooff_args *);
 149 int     nfs_vnop_offtoblk(struct vnop_offtoblk_args *);
 150 int     nfs_vnop_blockmap(struct vnop_blockmap_args *);
 151 int     nfs_vnop_monitor(struct vnop_monitor_args *);
 152
 153 int     nfs3_vnop_create(struct vnop_create_args *);
 154 int     nfs3_vnop_mknod(struct vnop_mknod_args *);
 155 int     nfs3_vnop_getattr(struct vnop_getattr_args *);
 156 int     nfs3_vnop_link(struct vnop_link_args *);
 157 int     nfs3_vnop_mkdir(struct vnop_mkdir_args *);
 158 int     nfs3_vnop_rmdir(struct vnop_rmdir_args *);
 159 int     nfs3_vnop_symlink(struct vnop_symlink_args *);
 160
 161
 162 vnop_t **nfsv2_vnodeop_p;
 163 static const struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 164         { .opve_op = &vnop_default_desc, .opve_impl = (vnop_t *)vn_default_error },
 165         { .opve_op = &vnop_lookup_desc, .opve_impl = (vnop_t *)nfs_vnop_lookup },       /* lookup */
 166         { .opve_op = &vnop_create_desc, .opve_impl = (vnop_t *)nfs3_vnop_create },      /* create */
 167         { .opve_op = &vnop_mknod_desc, .opve_impl = (vnop_t *)nfs3_vnop_mknod },        /* mknod */
 168         { .opve_op = &vnop_open_desc, .opve_impl = (vnop_t *)nfs_vnop_open },           /* open */
 169         { .opve_op = &vnop_close_desc, .opve_impl = (vnop_t *)nfs_vnop_close },         /* close */
 170         { .opve_op = &vnop_access_desc, .opve_impl = (vnop_t *)nfs_vnop_access },       /* access */
 171         { .opve_op = &vnop_getattr_desc, .opve_impl = (vnop_t *)nfs3_vnop_getattr },    /* getattr */
 172         { .opve_op = &vnop_setattr_desc, .opve_impl = (vnop_t *)nfs_vnop_setattr },     /* setattr */
 173         { .opve_op = &vnop_read_desc, .opve_impl = (vnop_t *)nfs_vnop_read },           /* read */
 174         { .opve_op = &vnop_write_desc, .opve_impl = (vnop_t *)nfs_vnop_write },         /* write */
 175         { .opve_op = &vnop_ioctl_desc, .opve_impl = (vnop_t *)nfs_vnop_ioctl },         /* ioctl */
 176         { .opve_op = &vnop_select_desc, .opve_impl = (vnop_t *)nfs_vnop_select },       /* select */
 177         { .opve_op = &vnop_revoke_desc, .opve_impl = (vnop_t *)nfs_vnop_revoke },       /* revoke */
 178         { .opve_op = &vnop_mmap_desc, .opve_impl = (vnop_t *)nfs_vnop_mmap },           /* mmap */
 179         { .opve_op = &vnop_mnomap_desc, .opve_impl = (vnop_t *)nfs_vnop_mnomap },       /* mnomap */
 180         { .opve_op = &vnop_fsync_desc, .opve_impl = (vnop_t *)nfs_vnop_fsync },         /* fsync */
 181         { .opve_op = &vnop_remove_desc, .opve_impl = (vnop_t *)nfs_vnop_remove },       /* remove */
 182         { .opve_op = &vnop_link_desc, .opve_impl = (vnop_t *)nfs3_vnop_link },          /* link */
 183         { .opve_op = &vnop_rename_desc, .opve_impl = (vnop_t *)nfs_vnop_rename },       /* rename */
 184         { .opve_op = &vnop_mkdir_desc, .opve_impl = (vnop_t *)nfs3_vnop_mkdir },        /* mkdir */
 185         { .opve_op = &vnop_rmdir_desc, .opve_impl = (vnop_t *)nfs3_vnop_rmdir },        /* rmdir */
 186         { .opve_op = &vnop_symlink_desc, .opve_impl = (vnop_t *)nfs3_vnop_symlink },    /* symlink */
 187         { .opve_op = &vnop_readdir_desc, .opve_impl = (vnop_t *)nfs_vnop_readdir },     /* readdir */
 188         { .opve_op = &vnop_readlink_desc, .opve_impl = (vnop_t *)nfs_vnop_readlink },   /* readlink */
 189         { .opve_op = &vnop_inactive_desc, .opve_impl = (vnop_t *)nfs_vnop_inactive },   /* inactive */
 190         { .opve_op = &vnop_reclaim_desc, .opve_impl = (vnop_t *)nfs_vnop_reclaim },     /* reclaim */
 191         { .opve_op = &vnop_strategy_desc, .opve_impl = (vnop_t *)err_strategy },        /* strategy */
 192         { .opve_op = &vnop_pathconf_desc, .opve_impl = (vnop_t *)nfs_vnop_pathconf },   /* pathconf */
 193         { .opve_op = &vnop_advlock_desc, .opve_impl = (vnop_t *)nfs_vnop_advlock },     /* advlock */
 194         { .opve_op = &vnop_bwrite_desc, .opve_impl = (vnop_t *)err_bwrite },            /* bwrite */
 195         { .opve_op = &vnop_pagein_desc, .opve_impl = (vnop_t *)nfs_vnop_pagein },       /* Pagein */
 196         { .opve_op = &vnop_pageout_desc, .opve_impl = (vnop_t *)nfs_vnop_pageout },     /* Pageout */
 197         { .opve_op = &vnop_copyfile_desc, .opve_impl = (vnop_t *)err_copyfile },        /* Copyfile */
 198         { .opve_op = &vnop_blktooff_desc, .opve_impl = (vnop_t *)nfs_vnop_blktooff },   /* blktooff */
 199         { .opve_op = &vnop_offtoblk_desc, .opve_impl = (vnop_t *)nfs_vnop_offtoblk },   /* offtoblk */
 200         { .opve_op = &vnop_blockmap_desc, .opve_impl = (vnop_t *)nfs_vnop_blockmap },   /* blockmap */
 201         { .opve_op = &vnop_monitor_desc, .opve_impl = (vnop_t *)nfs_vnop_monitor },     /* monitor */
 202         { .opve_op = NULL, .opve_impl = NULL }
 203 };
 204 const struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 205 { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 206
 207
 208 #if CONFIG_NFS4
 209 vnop_t **nfsv4_vnodeop_p;
 210 static const struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
 211         { &vnop_default_desc, (vnop_t *)vn_default_error },
 212         { &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup },       /* lookup */
 213         { &vnop_create_desc, (vnop_t *)nfs4_vnop_create },      /* create */
 214         { &vnop_mknod_desc, (vnop_t *)nfs4_vnop_mknod },        /* mknod */
 215         { &vnop_open_desc, (vnop_t *)nfs_vnop_open },           /* open */
 216         { &vnop_close_desc, (vnop_t *)nfs_vnop_close },         /* close */
 217         { &vnop_access_desc, (vnop_t *)nfs_vnop_access },       /* access */
 218         { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },    /* getattr */
 219         { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },     /* setattr */
 220         { &vnop_read_desc, (vnop_t *)nfs_vnop_read },           /* read */
 221         { &vnop_write_desc, (vnop_t *)nfs_vnop_write },         /* write */
 222         { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl },         /* ioctl */
 223         { &vnop_select_desc, (vnop_t *)nfs_vnop_select },       /* select */
 224         { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke },       /* revoke */
 225         { &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap },           /* mmap */
 226         { &vnop_mnomap_desc, (vnop_t *)nfs_vnop_mnomap },       /* mnomap */
 227         { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },         /* fsync */
 228         { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove },       /* remove */
 229         { &vnop_link_desc, (vnop_t *)nfs4_vnop_link },          /* link */
 230         { &vnop_rename_desc, (vnop_t *)nfs_vnop_rename },       /* rename */
 231         { &vnop_mkdir_desc, (vnop_t *)nfs4_vnop_mkdir },        /* mkdir */
 232         { &vnop_rmdir_desc, (vnop_t *)nfs4_vnop_rmdir },        /* rmdir */
 233         { &vnop_symlink_desc, (vnop_t *)nfs4_vnop_symlink },    /* symlink */
 234         { &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir },     /* readdir */
 235         { &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink },   /* readlink */
 236         { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },   /* inactive */
 237         { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },     /* reclaim */
 238         { &vnop_strategy_desc, (vnop_t *)err_strategy },        /* strategy */
 239         { &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf },   /* pathconf */
 240         { &vnop_advlock_desc, (vnop_t *)nfs_vnop_advlock },     /* advlock */
 241         { &vnop_bwrite_desc, (vnop_t *)err_bwrite },            /* bwrite */
 242         { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },       /* Pagein */
 243         { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },     /* Pageout */
 244         { &vnop_copyfile_desc, (vnop_t *)err_copyfile },        /* Copyfile */
 245         { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },   /* blktooff */
 246         { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },   /* offtoblk */
 247         { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },   /* blockmap */
 248         { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr },  /* getxattr */
 249         { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr },  /* setxattr */
 250         { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
 251         { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
 252 #if NAMEDSTREAMS
 253         { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream },      /* getnamedstream */
 254         { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream },    /* makenamedstream */
 255         { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
 256 #endif
 257         { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },     /* monitor */
 258         { NULL, NULL }
 259 };
 260 const struct vnodeopv_desc nfsv4_vnodeop_opv_desc =
 261 { &nfsv4_vnodeop_p, nfsv4_vnodeop_entries };
 262 #endif
 263
 264 /*
 265  * Special device vnode ops
 266  */
 267 vnop_t **spec_nfsv2nodeop_p;
 268 static const struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
 269         { &vnop_default_desc, (vnop_t *)vn_default_error },
 270         { &vnop_lookup_desc, (vnop_t *)spec_lookup },           /* lookup */
 271         { &vnop_create_desc, (vnop_t *)spec_create },           /* create */
 272         { &vnop_mknod_desc, (vnop_t *)spec_mknod },             /* mknod */
 273         { &vnop_open_desc, (vnop_t *)spec_open },               /* open */
 274         { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close },     /* close */
 275         { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr },    /* getattr */
 276         { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },     /* setattr */
 277         { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read },       /* read */
 278         { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write },     /* write */
 279         { &vnop_ioctl_desc, (vnop_t *)spec_ioctl },             /* ioctl */
 280         { &vnop_select_desc, (vnop_t *)spec_select },           /* select */
 281         { &vnop_revoke_desc, (vnop_t *)spec_revoke },           /* revoke */
 282         { &vnop_mmap_desc, (vnop_t *)spec_mmap },               /* mmap */
 283         { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },         /* fsync */
 284         { &vnop_remove_desc, (vnop_t *)spec_remove },           /* remove */
 285         { &vnop_link_desc, (vnop_t *)spec_link },               /* link */
 286         { &vnop_rename_desc, (vnop_t *)spec_rename },           /* rename */
 287         { &vnop_mkdir_desc, (vnop_t *)spec_mkdir },             /* mkdir */
 288         { &vnop_rmdir_desc, (vnop_t *)spec_rmdir },             /* rmdir */
 289         { &vnop_symlink_desc, (vnop_t *)spec_symlink },         /* symlink */
 290         { &vnop_readdir_desc, (vnop_t *)spec_readdir },         /* readdir */
 291         { &vnop_readlink_desc, (vnop_t *)spec_readlink },       /* readlink */
 292         { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },   /* inactive */
 293         { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },     /* reclaim */
 294         { &vnop_strategy_desc, (vnop_t *)spec_strategy },       /* strategy */
 295         { &vnop_pathconf_desc, (vnop_t *)spec_pathconf },       /* pathconf */
 296         { &vnop_advlock_desc, (vnop_t *)spec_advlock },         /* advlock */
 297         { &vnop_bwrite_desc, (vnop_t *)vn_bwrite },             /* bwrite */
 298         { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },       /* Pagein */
 299         { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },     /* Pageout */
 300         { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },   /* blktooff */
 301         { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },   /* offtoblk */
 302         { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },   /* blockmap */
 303         { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },     /* monitor */
 304         { NULL, NULL }
 305 };
 306 const struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 307 { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
 308 #if CONFIG_NFS4
 309 vnop_t **spec_nfsv4nodeop_p;
 310 static const struct vnodeopv_entry_desc spec_nfsv4nodeop_entries[] = {
 311         { &vnop_default_desc, (vnop_t *)vn_default_error },
 312         { &vnop_lookup_desc, (vnop_t *)spec_lookup },           /* lookup */
 313         { &vnop_create_desc, (vnop_t *)spec_create },           /* create */
 314         { &vnop_mknod_desc, (vnop_t *)spec_mknod },             /* mknod */
 315         { &vnop_open_desc, (vnop_t *)spec_open },               /* open */
 316         { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close },     /* close */
 317         { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },    /* getattr */
 318         { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },     /* setattr */
 319         { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read },       /* read */
 320         { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write },     /* write */
 321         { &vnop_ioctl_desc, (vnop_t *)spec_ioctl },             /* ioctl */
 322         { &vnop_select_desc, (vnop_t *)spec_select },           /* select */
 323         { &vnop_revoke_desc, (vnop_t *)spec_revoke },           /* revoke */
 324         { &vnop_mmap_desc, (vnop_t *)spec_mmap },               /* mmap */
 325         { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },         /* fsync */
 326         { &vnop_remove_desc, (vnop_t *)spec_remove },           /* remove */
 327         { &vnop_link_desc, (vnop_t *)spec_link },               /* link */
 328         { &vnop_rename_desc, (vnop_t *)spec_rename },           /* rename */
 329         { &vnop_mkdir_desc, (vnop_t *)spec_mkdir },             /* mkdir */
 330         { &vnop_rmdir_desc, (vnop_t *)spec_rmdir },             /* rmdir */
 331         { &vnop_symlink_desc, (vnop_t *)spec_symlink },         /* symlink */
 332         { &vnop_readdir_desc, (vnop_t *)spec_readdir },         /* readdir */
 333         { &vnop_readlink_desc, (vnop_t *)spec_readlink },       /* readlink */
 334         { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },   /* inactive */
 335         { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },     /* reclaim */
 336         { &vnop_strategy_desc, (vnop_t *)spec_strategy },       /* strategy */
 337         { &vnop_pathconf_desc, (vnop_t *)spec_pathconf },       /* pathconf */
 338         { &vnop_advlock_desc, (vnop_t *)spec_advlock },         /* advlock */
 339         { &vnop_bwrite_desc, (vnop_t *)vn_bwrite },             /* bwrite */
 340         { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },       /* Pagein */
 341         { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },     /* Pageout */
 342         { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },   /* blktooff */
 343         { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },   /* offtoblk */
 344         { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },   /* blockmap */
 345         { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr },  /* getxattr */
 346         { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr },  /* setxattr */
 347         { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
 348         { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
 349 #if NAMEDSTREAMS
 350         { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream },      /* getnamedstream */
 351         { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream },    /* makenamedstream */
 352         { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
 353 #endif
 354         { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },     /* monitor */
 355         { NULL, NULL }
 356 };
 357 const struct vnodeopv_desc spec_nfsv4nodeop_opv_desc =
 358 { &spec_nfsv4nodeop_p, spec_nfsv4nodeop_entries };
 359 #endif /* CONFIG_NFS4 */
 360
 361 #if FIFO
 362 vnop_t **fifo_nfsv2nodeop_p;
 363 static const struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
 364         { &vnop_default_desc, (vnop_t *)vn_default_error },
 365         { &vnop_lookup_desc, (vnop_t *)fifo_lookup },           /* lookup */
 366         { &vnop_create_desc, (vnop_t *)fifo_create },           /* create */
 367         { &vnop_mknod_desc, (vnop_t *)fifo_mknod },             /* mknod */
 368         { &vnop_open_desc, (vnop_t *)fifo_open },               /* open */
 369         { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close },     /* close */
 370         { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr },    /* getattr */
 371         { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },     /* setattr */
 372         { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read },       /* read */
 373         { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write },     /* write */
 374         { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl },             /* ioctl */
 375         { &vnop_select_desc, (vnop_t *)fifo_select },           /* select */
 376         { &vnop_revoke_desc, (vnop_t *)fifo_revoke },           /* revoke */
 377         { &vnop_mmap_desc, (vnop_t *)fifo_mmap },               /* mmap */
 378         { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },         /* fsync */
 379         { &vnop_remove_desc, (vnop_t *)fifo_remove },           /* remove */
 380         { &vnop_link_desc, (vnop_t *)fifo_link },               /* link */
 381         { &vnop_rename_desc, (vnop_t *)fifo_rename },           /* rename */
 382         { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir },             /* mkdir */
 383         { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir },             /* rmdir */
 384         { &vnop_symlink_desc, (vnop_t *)fifo_symlink },         /* symlink */
 385         { &vnop_readdir_desc, (vnop_t *)fifo_readdir },         /* readdir */
 386         { &vnop_readlink_desc, (vnop_t *)fifo_readlink },       /* readlink */
 387         { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },   /* inactive */
 388         { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },     /* reclaim */
 389         { &vnop_strategy_desc, (vnop_t *)fifo_strategy },       /* strategy */
 390         { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf },       /* pathconf */
 391         { &vnop_advlock_desc, (vnop_t *)fifo_advlock },         /* advlock */
 392         { &vnop_bwrite_desc, (vnop_t *)vn_bwrite },             /* bwrite */
 393         { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },       /* Pagein */
 394         { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },     /* Pageout */
 395         { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },   /* blktooff */
 396         { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },   /* offtoblk */
 397         { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },   /* blockmap */
 398         { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },     /* monitor */
 399         { NULL, NULL }
 400 };
 401 const struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 402 { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
 403 #endif
 404
 405 #if CONFIG_NFS4
 406 #if FIFO
 407 vnop_t **fifo_nfsv4nodeop_p;
 408 static const struct vnodeopv_entry_desc fifo_nfsv4nodeop_entries[] = {
 409         { &vnop_default_desc, (vnop_t *)vn_default_error },
 410         { &vnop_lookup_desc, (vnop_t *)fifo_lookup },           /* lookup */
 411         { &vnop_create_desc, (vnop_t *)fifo_create },           /* create */
 412         { &vnop_mknod_desc, (vnop_t *)fifo_mknod },             /* mknod */
 413         { &vnop_open_desc, (vnop_t *)fifo_open },               /* open */
 414         { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close },     /* close */
 415         { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },    /* getattr */
 416         { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },     /* setattr */
 417         { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read },       /* read */
 418         { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write },     /* write */
 419         { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl },             /* ioctl */
 420         { &vnop_select_desc, (vnop_t *)fifo_select },           /* select */
 421         { &vnop_revoke_desc, (vnop_t *)fifo_revoke },           /* revoke */
 422         { &vnop_mmap_desc, (vnop_t *)fifo_mmap },               /* mmap */
 423         { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },         /* fsync */
 424         { &vnop_remove_desc, (vnop_t *)fifo_remove },           /* remove */
 425         { &vnop_link_desc, (vnop_t *)fifo_link },               /* link */
 426         { &vnop_rename_desc, (vnop_t *)fifo_rename },           /* rename */
 427         { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir },             /* mkdir */
 428         { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir },             /* rmdir */
 429         { &vnop_symlink_desc, (vnop_t *)fifo_symlink },         /* symlink */
 430         { &vnop_readdir_desc, (vnop_t *)fifo_readdir },         /* readdir */
 431         { &vnop_readlink_desc, (vnop_t *)fifo_readlink },       /* readlink */
 432         { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },   /* inactive */
 433         { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },     /* reclaim */
 434         { &vnop_strategy_desc, (vnop_t *)fifo_strategy },       /* strategy */
 435         { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf },       /* pathconf */
 436         { &vnop_advlock_desc, (vnop_t *)fifo_advlock },         /* advlock */
 437         { &vnop_bwrite_desc, (vnop_t *)vn_bwrite },             /* bwrite */
 438         { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },       /* Pagein */
 439         { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },     /* Pageout */
 440         { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },   /* blktooff */
 441         { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },   /* offtoblk */
 442         { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },   /* blockmap */
 443         { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr },  /* getxattr */
 444         { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr },  /* setxattr */
 445         { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
 446         { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
 447 #if NAMEDSTREAMS
 448         { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream },      /* getnamedstream */
 449         { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream },    /* makenamedstream */
 450         { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
 451 #endif
 452         { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },     /* monitor */
 453         { NULL, NULL }
 454 };
 455 const struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc =
 456 { &fifo_nfsv4nodeop_p, fifo_nfsv4nodeop_entries };
 457 #endif /* FIFO */
 458 #endif /* CONFIG_NFS4 */
 459
 460 int     nfs_sillyrename(nfsnode_t, nfsnode_t, struct componentname *, vfs_context_t);
 461 int     nfs_getattr_internal(nfsnode_t, struct nfs_vattr *, vfs_context_t, int);
 462 int     nfs_refresh_fh(nfsnode_t, vfs_context_t);
 463
 464
 465 /*
 466  * Find the slot in the access cache for this UID.
 467  * If adding and no existing slot is found, reuse slots in FIFO order.
 468  * The index of the next slot to use is kept in the last entry of the n_access array.
 469  */
 470 int
 471 nfs_node_access_slot(nfsnode_t np, uid_t uid, int add)
 472 {
 473         int slot;
 474
 475         for (slot = 0; slot < NFS_ACCESS_CACHE_SIZE; slot++) {
 476                 if (np->n_accessuid[slot] == uid) {
 477                         break;
 478                 }
 479         }
 480         if (slot == NFS_ACCESS_CACHE_SIZE) {
 481                 if (!add) {
 482                         return -1;
 483                 }
 484                 slot = np->n_access[NFS_ACCESS_CACHE_SIZE];
 485                 np->n_access[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
 486         }
 487         return slot;
 488 }
 489
 490 int
 491 nfs3_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx)
 492 {
 493         int error = 0, lockerror = ENOENT, status, slot;
 494         uint32_t access_result = 0;
 495         u_int64_t xid;
 496         struct nfsm_chain nmreq, nmrep;
 497         struct nfsmount *nmp;
 498         struct timeval now;
 499         uid_t uid;
 500
 501         nfsm_chain_null(&nmreq);
 502         nfsm_chain_null(&nmrep);
 503
 504         nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3) + NFSX_UNSIGNED);
 505         nfsm_chain_add_fh(error, &nmreq, NFS_VER3, np->n_fhp, np->n_fhsize);
 506         nfsm_chain_add_32(error, &nmreq, *access);
 507         nfsm_chain_build_done(error, &nmreq);
 508         nfsmout_if(error);
 509         error = nfs_request2(np, NULL, &nmreq, NFSPROC_ACCESS,
 510             vfs_context_thread(ctx), vfs_context_ucred(ctx),
 511             NULL, rpcflags, &nmrep, &xid, &status);
 512         if ((lockerror = nfs_node_lock(np))) {
 513                 error = lockerror;
 514         }
 515         nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
 516         if (!error) {
 517                 error = status;
 518         }
 519         nfsm_chain_get_32(error, &nmrep, access_result);
 520         nfsmout_if(error);
 521
 522         /* XXXab do we really need mount here, also why are we doing access cache management here? */
 523         nmp = NFSTONMP(np);
 524         if (nfs_mount_gone(nmp)) {
 525                 error = ENXIO;
 526         }
 527         nfsmout_if(error);
 528
 529 #if CONFIG_NFS_GSS
 530         if (auth_is_kerberized(np->n_auth) || auth_is_kerberized(nmp->nm_auth)) {
 531                 uid = nfs_cred_getasid2uid(vfs_context_ucred(ctx));
 532         } else {
 533                 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
 534         }
 535 #else
 536         uid = kauth_cred_getuid(vfs_context_ucred(ctx));
 537 #endif /* CONFIG_NFS_GSS */
 538         slot = nfs_node_access_slot(np, uid, 1);
 539         np->n_accessuid[slot] = uid;
 540         microuptime(&now);
 541         np->n_accessstamp[slot] = now.tv_sec;
 542         np->n_access[slot] = access_result;
 543
 544         /*
 545          * If we asked for DELETE but didn't get it, the server
 546          * may simply not support returning that bit (possible
 547          * on UNIX systems).  So, we'll assume that it is OK,
 548          * and just let any subsequent delete action fail if it
 549          * really isn't deletable.
 550          */
 551         if ((*access & NFS_ACCESS_DELETE) &&
 552             !(np->n_access[slot] & NFS_ACCESS_DELETE)) {
 553                 np->n_access[slot] |= NFS_ACCESS_DELETE;
 554         }
 555         /* ".zfs" subdirectories may erroneously give a denied answer for add/remove */
 556         if (nfs_access_dotzfs && (np->n_flag & NISDOTZFSCHILD)) {
 557                 np->n_access[slot] |= (NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND | NFS_ACCESS_DELETE);
 558         }
 559         /* pass back the access returned with this request */
 560         *access = np->n_access[slot];
 561 nfsmout:
 562         if (!lockerror) {
 563                 nfs_node_unlock(np);
 564         }
 565         nfsm_chain_cleanup(&nmreq);
 566         nfsm_chain_cleanup(&nmrep);
 567         return error;
 568 }
 569
 570
 571 /*
 572  * NFS access vnode op.
 573  * For NFS version 2, just return ok. File accesses may fail later.
 574  * For NFS version 3+, use the access RPC to check accessibility. If file
 575  * permissions are changed on the server, accesses might still fail later.
 576  */
 577 int
 578 nfs_vnop_access(
 579         struct vnop_access_args /* {
 580                                  *  struct vnodeop_desc *a_desc;
 581                                  *  vnode_t a_vp;
 582                                  *  int a_action;
 583                                  *  vfs_context_t a_context;
 584                                  *  } */*ap)
 585 {
 586         vfs_context_t ctx = ap->a_context;
 587         vnode_t vp = ap->a_vp;
 588         int error = 0, slot, dorpc, rpcflags = 0;
 589         u_int32_t access, waccess;
 590         nfsnode_t np = VTONFS(vp);
 591         struct nfsmount *nmp;
 592         int nfsvers;
 593         struct timeval now;
 594         uid_t uid;
 595
 596         nmp = VTONMP(vp);
 597         if (nfs_mount_gone(nmp)) {
 598                 return ENXIO;
 599         }
 600         nfsvers = nmp->nm_vers;
 601
 602
 603         if (nfsvers == NFS_VER2 || NMFLAG(nmp, NOOPAQUE_AUTH)) {
 604                 if ((ap->a_action & KAUTH_VNODE_WRITE_RIGHTS) &&
 605                     vfs_isrdonly(vnode_mount(vp))) {
 606                         return EROFS;
 607                 }
 608                 return 0;
 609         }
 610
 611         /*
 612          * For NFS v3, do an access rpc, otherwise you are stuck emulating
 613          * ufs_access() locally using the vattr. This may not be correct,
 614          * since the server may apply other access criteria such as
 615          * client uid-->server uid mapping that we do not know about, but
 616          * this is better than just returning anything that is lying about
 617          * in the cache.
 618          */
 619
 620         /*
 621          * Convert KAUTH primitives to NFS access rights.
 622          */
 623         access = 0;
 624         if (vnode_isdir(vp)) {
 625                 /* directory */
 626                 if (ap->a_action &
 627                     (KAUTH_VNODE_LIST_DIRECTORY |
 628                     KAUTH_VNODE_READ_EXTATTRIBUTES)) {
 629                         access |= NFS_ACCESS_READ;
 630                 }
 631                 if (ap->a_action & KAUTH_VNODE_SEARCH) {
 632                         access |= NFS_ACCESS_LOOKUP;
 633                 }
 634                 if (ap->a_action &
 635                     (KAUTH_VNODE_ADD_FILE |
 636                     KAUTH_VNODE_ADD_SUBDIRECTORY)) {
 637                         access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
 638                 }
 639                 if (ap->a_action & KAUTH_VNODE_DELETE_CHILD) {
 640                         access |= NFS_ACCESS_MODIFY;
 641                 }
 642         } else {
 643                 /* file */
 644                 if (ap->a_action &
 645                     (KAUTH_VNODE_READ_DATA |
 646                     KAUTH_VNODE_READ_EXTATTRIBUTES)) {
 647                         access |= NFS_ACCESS_READ;
 648                 }
 649                 if (ap->a_action & KAUTH_VNODE_WRITE_DATA) {
 650                         access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
 651                 }
 652                 if (ap->a_action & KAUTH_VNODE_APPEND_DATA) {
 653                         access |= NFS_ACCESS_EXTEND;
 654                 }
 655                 if (ap->a_action & KAUTH_VNODE_EXECUTE) {
 656                         access |= NFS_ACCESS_EXECUTE;
 657                 }
 658         }
 659         /* common */
 660         if (ap->a_action & KAUTH_VNODE_DELETE) {
 661                 access |= NFS_ACCESS_DELETE;
 662         }
 663         if (ap->a_action &
 664             (KAUTH_VNODE_WRITE_ATTRIBUTES |
 665             KAUTH_VNODE_WRITE_EXTATTRIBUTES |
 666             KAUTH_VNODE_WRITE_SECURITY)) {
 667                 access |= NFS_ACCESS_MODIFY;
 668         }
 669         /* XXX this is pretty dubious */
 670         if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER) {
 671                 access |= NFS_ACCESS_MODIFY;
 672         }
 673
 674         /* if caching, always ask for every right */
 675         if (nfs_access_cache_timeout > 0) {
 676                 waccess = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
 677                     NFS_ACCESS_EXTEND | NFS_ACCESS_EXECUTE |
 678                     NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
 679         } else {
 680                 waccess = access;
 681         }
 682
 683         if ((error = nfs_node_lock(np))) {
 684                 return error;
 685         }
 686
 687         /*
 688          * Does our cached result allow us to give a definite yes to
 689          * this request?
 690          */
 691 #if CONFIG_NFS_GSS
 692         if (auth_is_kerberized(np->n_auth) || auth_is_kerberized(nmp->nm_auth)) {
 693                 uid = nfs_cred_getasid2uid(vfs_context_ucred(ctx));
 694         } else {
 695                 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
 696         }
 697 #else
 698         uid = kauth_cred_getuid(vfs_context_ucred(ctx));
 699 #endif /* CONFIG_NFS_GSS */
 700         slot = nfs_node_access_slot(np, uid, 0);
 701         dorpc = 1;
 702         if (access == 0) {
 703                 /* not asking for any rights understood by NFS, so don't bother doing an RPC */
 704                 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
 705                 dorpc = 0;
 706                 waccess = 0;
 707         } else if (NACCESSVALID(np, slot)) {
 708                 microuptime(&now);
 709                 if (((now.tv_sec < (np->n_accessstamp[slot] + nfs_access_cache_timeout)) &&
 710                     ((np->n_access[slot] & access) == access)) || nfs_use_cache(nmp)) {
 711                         /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
 712                         dorpc = 0;
 713                         waccess = np->n_access[slot];
 714                 }
 715         }
 716         nfs_node_unlock(np);
 717         if (dorpc) {
 718                 /* Either a no, or a don't know.  Go to the wire. */
 719                 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
 720
 721                 /*
 722                  * Allow an access call to timeout if we have it cached
 723                  * so we won't hang if the server isn't responding.
 724                  */
 725                 if (NACCESSVALID(np, slot)) {
 726                         rpcflags |= R_SOFT;
 727                 }
 728
 729                 error = nmp->nm_funcs->nf_access_rpc(np, &waccess, rpcflags, ctx);
 730
 731                 /*
 732                  * If the server didn't respond return the cached access.
 733                  */
 734                 if ((error == ETIMEDOUT) && (rpcflags & R_SOFT)) {
 735                         error = 0;
 736                         waccess = np->n_access[slot];
 737                 }
 738         }
 739         if (!error && ((waccess & access) != access)) {
 740                 error = EACCES;
 741         }
 742
 743         return error;
 744 }
 745
 746
 747 /*
 748  * NFS open vnode op
 749  *
 750  * Perform various update/invalidation checks and then add the
 751  * open to the node.  Regular files will have an open file structure
 752  * on the node and, for NFSv4, perform an OPEN request on the server.
 753  */
 754 int
 755 nfs_vnop_open(
 756         struct vnop_open_args /* {
 757                                *  struct vnodeop_desc *a_desc;
 758                                *  vnode_t a_vp;
 759                                *  int a_mode;
 760                                *  vfs_context_t a_context;
 761                                *  } */*ap)
 762 {
 763         vfs_context_t ctx = ap->a_context;
 764         vnode_t vp = ap->a_vp;
 765         nfsnode_t np = VTONFS(vp);
 766         struct nfsmount *nmp;
 767         int error, accessMode, denyMode, opened = 0;
 768         struct nfs_open_owner *noop = NULL;
 769         struct nfs_open_file *nofp = NULL;
 770         enum vtype vtype;
 771
 772         if (!(ap->a_mode & (FREAD | FWRITE))) {
 773                 return EINVAL;
 774         }
 775
 776         nmp = VTONMP(vp);
 777         if (nfs_mount_gone(nmp)) {
 778                 return ENXIO;
 779         }
 780         if (np->n_flag & NREVOKE) {
 781                 return EIO;
 782         }
 783
 784         vtype = vnode_vtype(vp);
 785         if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK)) {
 786                 return EACCES;
 787         }
 788
 789         /* First, check if we need to update/invalidate */
 790         if (ISSET(np->n_flag, NUPDATESIZE)) {
 791                 nfs_data_update_size(np, 0);
 792         }
 793         if ((error = nfs_node_lock(np))) {
 794                 return error;
 795         }
 796         if (np->n_flag & NNEEDINVALIDATE) {
 797                 np->n_flag &= ~NNEEDINVALIDATE;
 798                 if (vtype == VDIR) {
 799                         nfs_invaldir(np);
 800                 }
 801                 nfs_node_unlock(np);
 802                 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
 803                 if ((error = nfs_node_lock(np))) {
 804                         return error;
 805                 }
 806         }
 807         if (vtype == VREG) {
 808                 np->n_lastrahead = -1;
 809         }
 810         if (np->n_flag & NMODIFIED) {
 811                 if (vtype == VDIR) {
 812                         nfs_invaldir(np);
 813                 }
 814                 nfs_node_unlock(np);
 815                 if ((error = nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1))) {
 816                         return error;
 817                 }
 818         } else {
 819                 nfs_node_unlock(np);
 820         }
 821
 822         /* nfs_getattr() will check changed and purge caches */
 823         if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED))) {
 824                 return error;
 825         }
 826
 827         if (vtype != VREG) {
 828                 /* Just mark that it was opened */
 829                 lck_mtx_lock(&np->n_openlock);
 830                 np->n_openrefcnt++;
 831                 lck_mtx_unlock(&np->n_openlock);
 832                 return 0;
 833         }
 834
 835         /* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */
 836         accessMode = 0;
 837         if (ap->a_mode & FREAD) {
 838                 accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
 839         }
 840         if (ap->a_mode & FWRITE) {
 841                 accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
 842         }
 843         if (ap->a_mode & O_EXLOCK) {
 844                 denyMode = NFS_OPEN_SHARE_DENY_BOTH;
 845         } else if (ap->a_mode & O_SHLOCK) {
 846                 denyMode = NFS_OPEN_SHARE_DENY_WRITE;
 847         } else {
 848                 denyMode = NFS_OPEN_SHARE_DENY_NONE;
 849         }
 850         // XXX don't do deny modes just yet (and never do it for !v4)
 851         denyMode = NFS_OPEN_SHARE_DENY_NONE;
 852
 853         noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
 854         if (!noop) {
 855                 return ENOMEM;
 856         }
 857
 858 restart:
 859         error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
 860         if (error) {
 861                 nfs_open_owner_rele(noop);
 862                 return error;
 863         }
 864         if (np->n_flag & NREVOKE) {
 865                 error = EIO;
 866                 nfs_mount_state_in_use_end(nmp, 0);
 867                 nfs_open_owner_rele(noop);
 868                 return error;
 869         }
 870
 871         error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1);
 872         if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
 873                 NP(np, "nfs_vnop_open: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
 874                 error = EIO;
 875         }
 876 #if CONFIG_NFS4
 877         if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
 878                 nfs_mount_state_in_use_end(nmp, 0);
 879                 error = nfs4_reopen(nofp, vfs_context_thread(ctx));
 880                 nofp = NULL;
 881                 if (!error) {
 882                         goto restart;
 883                 }
 884         }
 885 #endif
 886         if (!error) {
 887                 error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
 888         }
 889         if (error) {
 890                 nofp = NULL;
 891                 goto out;
 892         }
 893
 894         if (nmp->nm_vers < NFS_VER4) {
 895                 /*
 896                  * NFS v2/v3 opens are always allowed - so just add it.
 897                  */
 898                 nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
 899                 goto out;
 900         }
 901
 902         /*
 903          * If we just created the file and the modes match, then we simply use
 904          * the open performed in the create.  Otherwise, send the request.
 905          */
 906         if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
 907             (nofp->nof_creator == current_thread()) &&
 908             (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) &&
 909             (denyMode == NFS_OPEN_SHARE_DENY_NONE)) {
 910                 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
 911                 nofp->nof_creator = NULL;
 912         } else {
 913 #if CONFIG_NFS4
 914                 if (!opened) {
 915                         error = nfs4_open(np, nofp, accessMode, denyMode, ctx);
 916                 }
 917 #endif
 918                 if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
 919                     (nofp->nof_creator == current_thread())) {
 920                         /*
 921                          * Ugh.  This can happen if we just created the file with read-only
 922                          * perms and we're trying to open it for real with different modes
 923                          * (e.g. write-only or with a deny mode) and the server decides to
 924                          * not allow the second open because of the read-only perms.
 925                          * The best we can do is to just use the create's open.
 926                          * We may have access we don't need or we may not have a requested
 927                          * deny mode.  We may log complaints later, but we'll try to avoid it.
 928                          */
 929                         if (denyMode != NFS_OPEN_SHARE_DENY_NONE) {
 930                                 NP(np, "nfs_vnop_open: deny mode foregone on create, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
 931                         }
 932                         nofp->nof_creator = NULL;
 933                         error = 0;
 934                 }
 935                 if (error) {
 936                         goto out;
 937                 }
 938                 opened = 1;
 939                 /*
 940                  * If we had just created the file, we already had it open.
 941                  * If the actual open mode is less than what we grabbed at
 942                  * create time, then we'll downgrade the open here.
 943                  */
 944                 if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
 945                     (nofp->nof_creator == current_thread())) {
 946                         error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
 947                         if (error) {
 948                                 NP(np, "nfs_vnop_open: create close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
 949                         }
 950                         if (!nfs_mount_state_error_should_restart(error)) {
 951                                 error = 0;
 952                                 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
 953                         }
 954                 }
 955         }
 956
 957 out:
 958         if (nofp) {
 959                 nfs_open_file_clear_busy(nofp);
 960         }
 961         if (nfs_mount_state_in_use_end(nmp, error)) {
 962                 nofp = NULL;
 963                 goto restart;
 964         }
 965         if (error) {
 966                 NP(np, "nfs_vnop_open: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
 967         }
 968         if (noop) {
 969                 nfs_open_owner_rele(noop);
 970         }
 971         if (!error && vtype == VREG && (ap->a_mode & FWRITE)) {
 972                 lck_mtx_lock(&nmp->nm_lock);
 973                 nmp->nm_state &= ~NFSSTA_SQUISHY;
 974                 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
 975                 if (nmp->nm_curdeadtimeout <= 0) {
 976                         nmp->nm_deadto_start = 0;
 977                 }
 978                 nmp->nm_writers++;
 979                 lck_mtx_unlock(&nmp->nm_lock);
 980         }
 981
 982         return error;
 983 }
 984
 985 static uint32_t
 986 nfs_no_of_open_file_writers(nfsnode_t np)
 987 {
 988         uint32_t writers = 0;
 989         struct nfs_open_file *nofp;
 990
 991         TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
 992                 writers += nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw + nofp->nof_rw_dw +
 993                     nofp->nof_w_drw + nofp->nof_rw_drw + nofp->nof_d_w_dw +
 994                     nofp->nof_d_rw_dw + nofp->nof_d_w_drw + nofp->nof_d_rw_drw +
 995                     nofp->nof_d_w + nofp->nof_d_rw;
 996         }
 997
 998         return writers;
 999 }
1000
1001 /*
1002  * NFS close vnode op
1003  *
1004  * What an NFS client should do upon close after writing is a debatable issue.
1005  * Most NFS clients push delayed writes to the server upon close, basically for
1006  * two reasons:
1007  * 1 - So that any write errors may be reported back to the client process
1008  *     doing the close system call. By far the two most likely errors are
1009  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
1010  * 2 - To put a worst case upper bound on cache inconsistency between
1011  *     multiple clients for the file.
1012  * There is also a consistency problem for Version 2 of the protocol w.r.t.
1013  * not being able to tell if other clients are writing a file concurrently,
1014  * since there is no way of knowing if the changed modify time in the reply
1015  * is only due to the write for this client.
1016  * (NFS Version 3 provides weak cache consistency data in the reply that
1017  *  should be sufficient to detect and handle this case.)
1018  *
1019  * The current code does the following:
1020  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
1021  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate them.
1022  * for NFS Version 4 - basically the same as NFSv3
1023  */
1024 int
1025 nfs_vnop_close(
1026         struct vnop_close_args /* {
1027                                 *  struct vnodeop_desc *a_desc;
1028                                 *  vnode_t a_vp;
1029                                 *  int a_fflag;
1030                                 *  vfs_context_t a_context;
1031                                 *  } */*ap)
1032 {
1033         vfs_context_t ctx = ap->a_context;
1034         vnode_t vp = ap->a_vp;
1035         nfsnode_t np = VTONFS(vp);
1036         struct nfsmount *nmp;
1037         int error = 0, error1, nfsvers;
1038         int fflag = ap->a_fflag;
1039         enum vtype vtype;
1040         int accessMode, denyMode;
1041         struct nfs_open_owner *noop = NULL;
1042         struct nfs_open_file *nofp = NULL;
1043
1044         nmp = VTONMP(vp);
1045         if (!nmp) {
1046                 return ENXIO;
1047         }
1048         nfsvers = nmp->nm_vers;
1049         vtype = vnode_vtype(vp);
1050
1051         /* First, check if we need to update/flush/invalidate */
1052         if (ISSET(np->n_flag, NUPDATESIZE)) {
1053                 nfs_data_update_size(np, 0);
1054         }
1055         nfs_node_lock_force(np);
1056         if (np->n_flag & NNEEDINVALIDATE) {
1057                 np->n_flag &= ~NNEEDINVALIDATE;
1058                 nfs_node_unlock(np);
1059                 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
1060                 nfs_node_lock_force(np);
1061         }
1062         if ((vtype == VREG) && (np->n_flag & NMODIFIED) && (fflag & FWRITE)) {
1063                 /* we're closing an open for write and the file is modified, so flush it */
1064                 nfs_node_unlock(np);
1065                 if (nfsvers != NFS_VER2) {
1066                         error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0);
1067                 } else {
1068                         error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1069                 }
1070                 nfs_node_lock_force(np);
1071                 NATTRINVALIDATE(np);
1072         }
1073         if (np->n_flag & NWRITEERR) {
1074                 np->n_flag &= ~NWRITEERR;
1075                 error = np->n_error;
1076         }
1077         nfs_node_unlock(np);
1078
1079         if (vtype != VREG) {
1080                 /* Just mark that it was closed */
1081                 lck_mtx_lock(&np->n_openlock);
1082                 if (np->n_openrefcnt == 0) {
1083                         if (fflag & (FREAD | FWRITE)) {
1084                                 NP(np, "nfs_vnop_close: open reference underrun");
1085                                 error = EINVAL;
1086                         }
1087                 } else if (fflag & (FREAD | FWRITE)) {
1088                         np->n_openrefcnt--;
1089                 } else {
1090                         /* No FREAD/FWRITE set - probably the final close */
1091                         np->n_openrefcnt = 0;
1092                 }
1093                 lck_mtx_unlock(&np->n_openlock);
1094                 return error;
1095         }
1096         error1 = error;
1097
1098         /* fflag should contain some combination of: FREAD, FWRITE, FHASLOCK */
1099         accessMode = 0;
1100         if (fflag & FREAD) {
1101                 accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
1102         }
1103         if (fflag & FWRITE) {
1104                 accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
1105         }
1106 // XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open
1107 //      if (fflag & O_EXLOCK)
1108 //              denyMode = NFS_OPEN_SHARE_DENY_BOTH;
1109 //      else if (fflag & O_SHLOCK)
1110 //              denyMode = NFS_OPEN_SHARE_DENY_WRITE;
1111 //      else
1112 //              denyMode = NFS_OPEN_SHARE_DENY_NONE;
1113 #if 0  // Not yet
1114         if (fflag & FHASLOCK) {
1115                 /* XXX assume FHASLOCK is for the deny mode and not flock */
1116                 /* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */
1117                 if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) {
1118                         denyMode = NFS_OPEN_SHARE_DENY_BOTH;
1119                 } else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE) {
1120                         denyMode = NFS_OPEN_SHARE_DENY_WRITE;
1121                 } else {
1122                         denyMode = NFS_OPEN_SHARE_DENY_NONE;
1123                 }
1124         } else {
1125                 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1126         }
1127 #else
1128         // XXX don't do deny modes just yet (and never do it for !v4)
1129         denyMode = NFS_OPEN_SHARE_DENY_NONE;
1130 #endif
1131
1132         if (!accessMode) {
1133                 /*
1134                  * No mode given to close?
1135                  * Guess this is the final close.
1136                  * We should unlock all locks and close all opens.
1137                  */
1138                 uint32_t writers;
1139                 mount_t mp = vnode_mount(vp);
1140                 int force = (!mp || vfs_isforce(mp));
1141
1142                 writers = nfs_no_of_open_file_writers(np);
1143                 nfs_release_open_state_for_node(np, force);
1144                 if (writers) {
1145                         lck_mtx_lock(&nmp->nm_lock);
1146                         if (writers > nmp->nm_writers) {
1147                                 NP(np, "nfs_vnop_close: number of write opens for mount underrun. Node has %d"
1148                                     " opens for write. Mount has total of %d opens for write\n",
1149                                     writers, nmp->nm_writers);
1150                                 nmp->nm_writers = 0;
1151                         } else {
1152                                 nmp->nm_writers -= writers;
1153                         }
1154                         lck_mtx_unlock(&nmp->nm_lock);
1155                 }
1156
1157                 return error;
1158         } else if (fflag & FWRITE) {
1159                 lck_mtx_lock(&nmp->nm_lock);
1160                 if (nmp->nm_writers == 0) {
1161                         NP(np, "nfs_vnop_close: removing open writer from mount, but mount has no files open for writing");
1162                 } else {
1163                         nmp->nm_writers--;
1164                 }
1165                 lck_mtx_unlock(&nmp->nm_lock);
1166         }
1167
1168
1169         noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
1170         if (!noop) {
1171                 // printf("nfs_vnop_close: can't get open owner!\n");
1172                 return EIO;
1173         }
1174
1175 restart:
1176         error = nfs_mount_state_in_use_start(nmp, NULL);
1177         if (error) {
1178                 nfs_open_owner_rele(noop);
1179                 return error;
1180         }
1181
1182         error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
1183 #if CONFIG_NFS4
1184         if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
1185                 nfs_mount_state_in_use_end(nmp, 0);
1186                 error = nfs4_reopen(nofp, NULL);
1187                 nofp = NULL;
1188                 if (!error) {
1189                         goto restart;
1190                 }
1191         }
1192 #endif
1193         if (error) {
1194                 NP(np, "nfs_vnop_close: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1195                 error = EBADF;
1196                 goto out;
1197         }
1198         error = nfs_open_file_set_busy(nofp, NULL);
1199         if (error) {
1200                 nofp = NULL;
1201                 goto out;
1202         }
1203
1204         error = nfs_close(np, nofp, accessMode, denyMode, ctx);
1205         if (error) {
1206                 NP(np, "nfs_vnop_close: close error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1207         }
1208
1209 out:
1210         if (nofp) {
1211                 nfs_open_file_clear_busy(nofp);
1212         }
1213         if (nfs_mount_state_in_use_end(nmp, error)) {
1214                 nofp = NULL;
1215                 goto restart;
1216         }
1217         if (!error) {
1218                 error = error1;
1219         }
1220         if (error) {
1221                 NP(np, "nfs_vnop_close: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1222         }
1223         if (noop) {
1224                 nfs_open_owner_rele(noop);
1225         }
1226         return error;
1227 }
1228
1229 /*
1230  * nfs_close(): common function that does all the heavy lifting of file closure
1231  *
1232  * Takes an open file structure and a set of access/deny modes and figures out how
1233  * to update the open file structure (and the state on the server) appropriately.
1234  */
1235 int
1236 nfs_close(
1237         nfsnode_t np,
1238         struct nfs_open_file *nofp,
1239         uint32_t accessMode,
1240         uint32_t denyMode,
1241         __unused vfs_context_t ctx)
1242 {
1243 #if CONFIG_NFS4
1244         struct nfs_lock_owner *nlop;
1245 #endif
1246         int error = 0, changed = 0, delegated = 0, closed = 0, downgrade = 0;
1247         uint32_t newAccessMode, newDenyMode;
1248
1249         /* warn if modes don't match current state */
1250         if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode)) {
1251                 NP(np, "nfs_close: mode mismatch %d %d, current %d %d, %d",
1252                     accessMode, denyMode, nofp->nof_access, nofp->nof_deny,
1253                     kauth_cred_getuid(nofp->nof_owner->noo_cred));
1254         }
1255
1256         /*
1257          * If we're closing a write-only open, we may not have a write-only count
1258          * if we also grabbed read access.  So, check the read-write count.
1259          */
1260         if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
1261                 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1262                     (nofp->nof_w == 0) && (nofp->nof_d_w == 0) &&
1263                     (nofp->nof_rw || nofp->nof_d_rw)) {
1264                         accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1265                 }
1266         } else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
1267                 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1268                     (nofp->nof_w_dw == 0) && (nofp->nof_d_w_dw == 0) &&
1269                     (nofp->nof_rw_dw || nofp->nof_d_rw_dw)) {
1270                         accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1271                 }
1272         } else { /* NFS_OPEN_SHARE_DENY_BOTH */
1273                 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1274                     (nofp->nof_w_drw == 0) && (nofp->nof_d_w_drw == 0) &&
1275                     (nofp->nof_rw_drw || nofp->nof_d_rw_drw)) {
1276                         accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1277                 }
1278         }
1279
1280         nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
1281         if ((newAccessMode != nofp->nof_access) || (newDenyMode != nofp->nof_deny)) {
1282                 changed = 1;
1283         } else {
1284                 changed = 0;
1285         }
1286
1287         if (NFSTONMP(np)->nm_vers < NFS_VER4) {
1288                 /* NFS v2/v3 closes simply need to remove the open. */
1289                 goto v3close;
1290         }
1291 #if CONFIG_NFS4
1292         if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) {
1293                 /*
1294                  * No more access after this close, so clean up and close it.
1295                  * Don't send a close RPC if we're closing a delegated open.
1296                  */
1297                 nfs_wait_bufs(np);
1298                 closed = 1;
1299                 if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
1300                         error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
1301                 }
1302                 if (error == NFSERR_LOCKS_HELD) {
1303                         /*
1304                          * Hmm... the server says we have locks we need to release first
1305                          * Find the lock owner and try to unlock everything.
1306                          */
1307                         nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0);
1308                         if (nlop) {
1309                                 nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX,
1310                                     0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
1311                                 nfs_lock_owner_rele(nlop);
1312                         }
1313                         error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
1314                 }
1315         } else if (changed) {
1316                 /*
1317                  * File is still open but with less access, so downgrade the open.
1318                  * Don't send a downgrade RPC if we're closing a delegated open.
1319                  */
1320                 if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
1321                         downgrade = 1;
1322                         /*
1323                          * If we have delegated opens, we should probably claim them before sending
1324                          * the downgrade because the server may not know the open we are downgrading to.
1325                          */
1326                         if (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
1327                             nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
1328                             nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r) {
1329                                 nfs4_claim_delegated_state_for_open_file(nofp, 0);
1330                         }
1331                         /* need to remove the open before sending the downgrade */
1332                         nfs_open_file_remove_open(nofp, accessMode, denyMode);
1333                         error = nfs4_open_downgrade_rpc(np, nofp, ctx);
1334                         if (error) { /* Hmm.. that didn't work. Add the open back in. */
1335                                 nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
1336                         }
1337                 }
1338         }
1339 #endif
1340 v3close:
1341         if (error) {
1342                 NP(np, "nfs_close: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
1343                 return error;
1344         }
1345
1346         if (!downgrade) {
1347                 nfs_open_file_remove_open(nofp, accessMode, denyMode);
1348         }
1349
1350         if (closed) {
1351                 lck_mtx_lock(&nofp->nof_lock);
1352                 if (nofp->nof_r || nofp->nof_d_r || nofp->nof_w || nofp->nof_d_w || nofp->nof_d_rw ||
1353                     (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) ||
1354                     nofp->nof_r_dw || nofp->nof_d_r_dw || nofp->nof_w_dw || nofp->nof_d_w_dw ||
1355                     nofp->nof_rw_dw || nofp->nof_d_rw_dw || nofp->nof_r_drw || nofp->nof_d_r_drw ||
1356                     nofp->nof_w_drw || nofp->nof_d_w_drw || nofp->nof_rw_drw || nofp->nof_d_rw_drw) {
1357                         NP(np, "nfs_close: unexpected count: %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u flags 0x%x, %d",
1358                             nofp->nof_r, nofp->nof_d_r, nofp->nof_w, nofp->nof_d_w,
1359                             nofp->nof_rw, nofp->nof_d_rw, nofp->nof_r_dw, nofp->nof_d_r_dw,
1360                             nofp->nof_w_dw, nofp->nof_d_w_dw, nofp->nof_rw_dw, nofp->nof_d_rw_dw,
1361                             nofp->nof_r_drw, nofp->nof_d_r_drw, nofp->nof_w_drw, nofp->nof_d_w_drw,
1362                             nofp->nof_rw_drw, nofp->nof_d_rw_drw, nofp->nof_flags,
1363                             kauth_cred_getuid(nofp->nof_owner->noo_cred));
1364                 }
1365                 /* clear out all open info, just to be safe */
1366                 nofp->nof_access = nofp->nof_deny = 0;
1367                 nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
1368                 nofp->nof_r = nofp->nof_d_r = 0;
1369                 nofp->nof_w = nofp->nof_d_w = 0;
1370                 nofp->nof_rw = nofp->nof_d_rw = 0;
1371                 nofp->nof_r_dw = nofp->nof_d_r_dw = 0;
1372                 nofp->nof_w_dw = nofp->nof_d_w_dw = 0;
1373                 nofp->nof_rw_dw = nofp->nof_d_rw_dw = 0;
1374                 nofp->nof_r_drw = nofp->nof_d_r_drw = 0;
1375                 nofp->nof_w_drw = nofp->nof_d_w_drw = 0;
1376                 nofp->nof_rw_drw = nofp->nof_d_rw_drw = 0;
1377                 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
1378                 lck_mtx_unlock(&nofp->nof_lock);
1379                 /* XXX we may potentially want to clean up idle/unused open file structures */
1380         }
1381         if (nofp->nof_flags & NFS_OPEN_FILE_LOST) {
1382                 error = EIO;
1383                 NP(np, "nfs_close: LOST%s, %d", !nofp->nof_opencnt ? " (last)" : "",
1384                     kauth_cred_getuid(nofp->nof_owner->noo_cred));
1385         }
1386
1387         return error;
1388 }
1389
1390
1391 int
1392 nfs3_getattr_rpc(
1393         nfsnode_t np,
1394         mount_t mp,
1395         u_char *fhp,
1396         size_t fhsize,
1397         int flags,
1398         vfs_context_t ctx,
1399         struct nfs_vattr *nvap,
1400         u_int64_t *xidp)
1401 {
1402         struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
1403         int error = 0, status, nfsvers, rpcflags = 0;
1404         struct nfsm_chain nmreq, nmrep;
1405
1406         if (nfs_mount_gone(nmp)) {
1407                 return ENXIO;
1408         }
1409         nfsvers = nmp->nm_vers;
1410
1411         if (flags & NGA_MONITOR) { /* vnode monitor requests should be soft */
1412                 rpcflags = R_RECOVER;
1413         }
1414
1415         if (flags & NGA_SOFT) { /* Return ETIMEDOUT if server not responding */
1416                 rpcflags |= R_SOFT;
1417         }
1418
1419         nfsm_chain_null(&nmreq);
1420         nfsm_chain_null(&nmrep);
1421
1422         nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
1423         if (nfsvers != NFS_VER2) {
1424                 nfsm_chain_add_32(error, &nmreq, fhsize);
1425         }
1426         nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
1427         nfsm_chain_build_done(error, &nmreq);
1428         nfsmout_if(error);
1429         error = nfs_request2(np, mp, &nmreq, NFSPROC_GETATTR,
1430             vfs_context_thread(ctx), vfs_context_ucred(ctx),
1431             NULL, rpcflags, &nmrep, xidp, &status);
1432         if (!error) {
1433                 error = status;
1434         }
1435         nfsmout_if(error);
1436         error = nfs_parsefattr(nmp, &nmrep, nfsvers, nvap);
1437 nfsmout:
1438         nfsm_chain_cleanup(&nmreq);
1439         nfsm_chain_cleanup(&nmrep);
1440         return error;
1441 }
1442
1443 /*
1444  * nfs_refresh_fh will attempt to update the file handle for the node.
1445  *
1446  * It only does this for symbolic links and regular files that are not currently opened.
1447  *
1448  * On Success returns 0 and the nodes file handle is updated, or ESTALE on failure.
1449  */
1450 int
1451 nfs_refresh_fh(nfsnode_t np, vfs_context_t ctx)
1452 {
1453         vnode_t dvp, vp = NFSTOV(np);
1454         nfsnode_t dnp;
1455         const char *v_name = vnode_getname(vp);
1456         char *name;
1457         int namelen, fhsize, refreshed;
1458         int error, wanted = 0;
1459         uint8_t *fhp;
1460         struct timespec ts = {.tv_sec = 2, .tv_nsec = 0};
1461
1462         NFS_VNOP_DBG("vnode is %d\n", vnode_vtype(vp));
1463
1464         dvp = vnode_parent(vp);
1465         if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VLNK) ||
1466             v_name == NULL || *v_name == '\0' || dvp == NULL) {
1467                 if (v_name != NULL) {
1468                         vnode_putname(v_name);
1469                 }
1470                 return ESTALE;
1471         }
1472         dnp = VTONFS(dvp);
1473
1474         namelen = strlen(v_name);
1475         MALLOC(name, char *, namelen + 1, M_TEMP, M_WAITOK);
1476         if (name == NULL) {
1477                 vnode_putname(v_name);
1478                 return ESTALE;
1479         }
1480         bcopy(v_name, name, namelen + 1);
1481         NFS_VNOP_DBG("Trying to refresh %s : %s\n", v_name, name);
1482         vnode_putname(v_name);
1483
1484         /* Allocate the maximum size file handle */
1485         MALLOC(fhp, uint8_t *, NFS4_FHSIZE, M_TEMP, M_WAITOK);
1486         if (fhp == NULL) {
1487                 FREE(name, M_TEMP);
1488                 return ESTALE;
1489         }
1490
1491         if ((error = nfs_node_lock(np))) {
1492                 FREE(name, M_TEMP);
1493                 FREE(fhp, M_TEMP);
1494                 return ESTALE;
1495         }
1496
1497         fhsize = np->n_fhsize;
1498         bcopy(np->n_fhp, fhp, fhsize);
1499         while (ISSET(np->n_flag, NREFRESH)) {
1500                 SET(np->n_flag, NREFRESHWANT);
1501                 NFS_VNOP_DBG("Waiting for refresh of %s\n", name);
1502                 msleep(np, &np->n_lock, PZERO - 1, "nfsrefreshwant", &ts);
1503                 if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
1504                         break;
1505                 }
1506         }
1507         refreshed = error ? 0 : !NFS_CMPFH(np, fhp, fhsize);
1508         SET(np->n_flag, NREFRESH);
1509         nfs_node_unlock(np);
1510
1511         NFS_VNOP_DBG("error = %d, refreshed = %d\n", error, refreshed);
1512         if (error || refreshed) {
1513                 goto nfsmout;
1514         }
1515
1516         /* Check that there are no open references for this file */
1517         lck_mtx_lock(&np->n_openlock);
1518         if (np->n_openrefcnt || !TAILQ_EMPTY(&np->n_opens) || !TAILQ_EMPTY(&np->n_lock_owners)) {
1519                 int cnt = 0;
1520                 struct nfs_open_file *ofp;
1521
1522                 TAILQ_FOREACH(ofp, &np->n_opens, nof_link) {
1523                         cnt += ofp->nof_opencnt;
1524                 }
1525                 if (cnt) {
1526                         lck_mtx_unlock(&np->n_openlock);
1527                         NFS_VNOP_DBG("Can not refresh file handle for %s with open state\n", name);
1528                         NFS_VNOP_DBG("\topenrefcnt = %d, opens = %d lock_owners = %d\n",
1529                             np->n_openrefcnt, cnt, !TAILQ_EMPTY(&np->n_lock_owners));
1530                         error = ESTALE;
1531                         goto nfsmout;
1532                 }
1533         }
1534         lck_mtx_unlock(&np->n_openlock);
1535         /*
1536          * Since the FH is currently stale we should not be able to
1537          * establish any open state until the FH is refreshed.
1538          */
1539
1540         error = nfs_node_lock(np);
1541         nfsmout_if(error);
1542         /*
1543          * Symlinks should never need invalidations and are holding
1544          * the one and only nfsbuf in an uncached acquired state
1545          * trying to do a readlink. So we will hang if we invalidate
1546          * in that case. Only in in the VREG case do we need to
1547          * invalidate.
1548          */
1549         if (vnode_vtype(vp) == VREG) {
1550                 np->n_flag &= ~NNEEDINVALIDATE;
1551                 nfs_node_unlock(np);
1552                 error = nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ctx, 1);
1553                 if (error) {
1554                         NFS_VNOP_DBG("nfs_vinvalbuf returned %d\n", error);
1555                 }
1556                 nfsmout_if(error);
1557         } else {
1558                 nfs_node_unlock(np);
1559         }
1560
1561         NFS_VNOP_DBG("Looking up %s\n", name);
1562         error = nfs_lookitup(dnp, name, namelen, ctx, &np);
1563         if (error) {
1564                 NFS_VNOP_DBG("nfs_lookitup returned %d\n", error);
1565         }
1566
1567 nfsmout:
1568         nfs_node_lock_force(np);
1569         wanted = ISSET(np->n_flag, NREFRESHWANT);
1570         CLR(np->n_flag, NREFRESH | NREFRESHWANT);
1571         nfs_node_unlock(np);
1572         if (wanted) {
1573                 wakeup(np);
1574         }
1575
1576         if (error == 0) {
1577                 NFS_VNOP_DBG("%s refreshed file handle\n", name);
1578         }
1579
1580         FREE(name, M_TEMP);
1581         FREE(fhp, M_TEMP);
1582
1583         return error ? ESTALE : 0;
1584 }
1585
1586 int
1587 nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
1588 {
1589         int error;
1590
1591 retry:
1592         error = nfs_getattr_internal(np, nvap, ctx, flags);
1593         if (error == ESTALE) {
1594                 error = nfs_refresh_fh(np, ctx);
1595                 if (!error) {
1596                         goto retry;
1597                 }
1598         }
1599         return error;
1600 }
1601
1602 int
1603 nfs_getattr_internal(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
1604 {
1605         struct nfsmount *nmp;
1606         int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods;
1607         struct nfs_vattr nvattr;
1608         struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
1609         u_int64_t xid;
1610
1611         FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
1612
1613         nmp = NFSTONMP(np);
1614
1615         if (nfs_mount_gone(nmp)) {
1616                 return ENXIO;
1617         }
1618         nfsvers = nmp->nm_vers;
1619
1620         if (!nvap) {
1621                 nvap = &nvattr;
1622         }
1623         NVATTR_INIT(nvap);
1624
1625         /* Update local times for special files. */
1626         if (np->n_flag & (NACC | NUPD)) {
1627                 nfs_node_lock_force(np);
1628                 np->n_flag |= NCHG;
1629                 nfs_node_unlock(np);
1630         }
1631         /* Update size, if necessary */
1632         if (ISSET(np->n_flag, NUPDATESIZE)) {
1633                 nfs_data_update_size(np, 0);
1634         }
1635
1636         error = nfs_node_lock(np);
1637         nfsmout_if(error);
1638         if (!(flags & (NGA_UNCACHED | NGA_MONITOR)) || ((nfsvers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))) {
1639                 /*
1640                  * Use the cache or wait for any getattr in progress if:
1641                  * - it's a cached request, or
1642                  * - we have a delegation, or
1643                  * - the server isn't responding
1644                  */
1645                 while (1) {
1646                         error = nfs_getattrcache(np, nvap, flags);
1647                         if (!error || (error != ENOENT)) {
1648                                 nfs_node_unlock(np);
1649                                 goto nfsmout;
1650                         }
1651                         error = 0;
1652                         if (!ISSET(np->n_flag, NGETATTRINPROG)) {
1653                                 break;
1654                         }
1655                         if (flags & NGA_MONITOR) {
1656                                 /* no need to wait if a request is pending */
1657                                 error = EINPROGRESS;
1658                                 nfs_node_unlock(np);
1659                                 goto nfsmout;
1660                         }
1661                         SET(np->n_flag, NGETATTRWANT);
1662                         msleep(np, &np->n_lock, PZERO - 1, "nfsgetattrwant", &ts);
1663                         if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
1664                                 nfs_node_unlock(np);
1665                                 goto nfsmout;
1666                         }
1667                 }
1668                 SET(np->n_flag, NGETATTRINPROG);
1669                 inprogset = 1;
1670         } else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
1671                 SET(np->n_flag, NGETATTRINPROG);
1672                 inprogset = 1;
1673         } else if (flags & NGA_MONITOR) {
1674                 /* no need to make a request if one is pending */
1675                 error = EINPROGRESS;
1676         }
1677         nfs_node_unlock(np);
1678
1679         nmp = NFSTONMP(np);
1680         if (nfs_mount_gone(nmp)) {
1681                 error = ENXIO;
1682         }
1683         if (error) {
1684                 goto nfsmout;
1685         }
1686
1687         /*
1688          * Return cached attributes if they are valid,
1689          * if the server doesn't respond, and this is
1690          * some softened up style of mount.
1691          */
1692         if (NATTRVALID(np) && nfs_use_cache(nmp)) {
1693                 flags |= NGA_SOFT;
1694         }
1695
1696         /*
1697          * We might want to try to get both the attributes and access info by
1698          * making an ACCESS call and seeing if it returns updated attributes.
1699          * But don't bother if we aren't caching access info or if the
1700          * attributes returned wouldn't be cached.
1701          */
1702         if (!(flags & NGA_ACL) && (nfsvers != NFS_VER2) && nfs_access_for_getattr && (nfs_access_cache_timeout > 0)) {
1703                 if (nfs_attrcachetimeout(np) > 0) {
1704                         /*  OSAddAtomic(1, &nfsstats.accesscache_misses); */
1705                         u_int32_t access = NFS_ACCESS_ALL;
1706                         int rpcflags = 0;
1707
1708                         /* Return cached attrs if server doesn't respond */
1709                         if (flags & NGA_SOFT) {
1710                                 rpcflags |= R_SOFT;
1711                         }
1712
1713                         error = nmp->nm_funcs->nf_access_rpc(np, &access, rpcflags, ctx);
1714
1715                         if (error == ETIMEDOUT) {
1716                                 goto returncached;
1717                         }
1718
1719                         if (error) {
1720                                 goto nfsmout;
1721                         }
1722                         nfs_node_lock_force(np);
1723                         error = nfs_getattrcache(np, nvap, flags);
1724                         nfs_node_unlock(np);
1725                         if (!error || (error != ENOENT)) {
1726                                 goto nfsmout;
1727                         }
1728                         /* Well, that didn't work... just do a getattr... */
1729                         error = 0;
1730                 }
1731         }
1732
1733         avoidfloods = 0;
1734
1735 tryagain:
1736         error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid);
1737         if (!error) {
1738                 nfs_node_lock_force(np);
1739                 error = nfs_loadattrcache(np, nvap, &xid, 0);
1740                 nfs_node_unlock(np);
1741         }
1742
1743         /*
1744          * If the server didn't respond, return cached attributes.
1745          */
1746 returncached:
1747         if ((flags & NGA_SOFT) && (error == ETIMEDOUT)) {
1748                 nfs_node_lock_force(np);
1749                 error = nfs_getattrcache(np, nvap, flags);
1750                 if (!error || (error != ENOENT)) {
1751                         nfs_node_unlock(np);
1752                         goto nfsmout;
1753                 }
1754                 nfs_node_unlock(np);
1755         }
1756         nfsmout_if(error);
1757
1758         if (!xid) { /* out-of-order rpc - attributes were dropped */
1759                 FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
1760                 if (avoidfloods++ < 20) {
1761                         goto tryagain;
1762                 }
1763                 /* avoidfloods>1 is bizarre.  at 20 pull the plug */
1764                 /* just return the last attributes we got */
1765         }
1766 nfsmout:
1767         nfs_node_lock_force(np);
1768         if (inprogset) {
1769                 wanted = ISSET(np->n_flag, NGETATTRWANT);
1770                 CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT));
1771         }
1772         if (!error) {
1773                 /* check if the node changed on us */
1774                 vnode_t vp = NFSTOV(np);
1775                 enum vtype vtype = vnode_vtype(vp);
1776                 if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) {
1777                         FSDBG(513, -1, np, 0, np);
1778                         np->n_flag &= ~NNEGNCENTRIES;
1779                         cache_purge(vp);
1780                         np->n_ncgen++;
1781                         NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
1782                         NFS_VNOP_DBG("Purge directory 0x%llx\n",
1783                             (uint64_t)VM_KERNEL_ADDRPERM(vp));
1784                 }
1785                 if (NFS_CHANGED(nfsvers, np, nvap)) {
1786                         FSDBG(513, -1, np, -1, np);
1787                         if (vtype == VDIR) {
1788                                 NFS_VNOP_DBG("Invalidate directory 0x%llx\n",
1789                                     (uint64_t)VM_KERNEL_ADDRPERM(vp));
1790                                 nfs_invaldir(np);
1791                         }
1792                         nfs_node_unlock(np);
1793                         if (wanted) {
1794                                 wakeup(np);
1795                         }
1796                         error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1797                         FSDBG(513, -1, np, -2, error);
1798                         if (!error) {
1799                                 nfs_node_lock_force(np);
1800                                 NFS_CHANGED_UPDATE(nfsvers, np, nvap);
1801                                 nfs_node_unlock(np);
1802                         }
1803                 } else {
1804                         nfs_node_unlock(np);
1805                         if (wanted) {
1806                                 wakeup(np);
1807                         }
1808                 }
1809         } else {
1810                 nfs_node_unlock(np);
1811                 if (wanted) {
1812                         wakeup(np);
1813                 }
1814         }
1815
1816         if (nvap == &nvattr) {
1817                 NVATTR_CLEANUP(nvap);
1818         } else if (!(flags & NGA_ACL)) {
1819                 /* make sure we don't return an ACL if it wasn't asked for */
1820                 NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
1821                 if (nvap->nva_acl) {
1822                         kauth_acl_free(nvap->nva_acl);
1823                         nvap->nva_acl = NULL;
1824                 }
1825         }
1826         FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
1827         return error;
1828 }
1829
1830
1831 /*
1832  * NFS getattr call from vfs.
1833  */
1834
1835 /*
1836  * The attributes we support over the wire.
1837  * We also get fsid but the vfs layer gets it out of the mount
1838  * structure after this calling us so there's no need to return it,
1839  * and Finder expects to call getattrlist just looking for the FSID
1840  * with out hanging on a non responsive server.
1841  */
1842 #define NFS3_SUPPORTED_VATTRS \
1843         (VNODE_ATTR_va_rdev |           \
1844          VNODE_ATTR_va_nlink |          \
1845          VNODE_ATTR_va_data_size |      \
1846          VNODE_ATTR_va_data_alloc |     \
1847          VNODE_ATTR_va_uid |            \
1848          VNODE_ATTR_va_gid |            \
1849          VNODE_ATTR_va_mode |           \
1850          VNODE_ATTR_va_modify_time |    \
1851          VNODE_ATTR_va_change_time |    \
1852          VNODE_ATTR_va_access_time |    \
1853          VNODE_ATTR_va_fileid |         \
1854          VNODE_ATTR_va_type)
1855
1856
1857 int
1858 nfs3_vnop_getattr(
1859         struct vnop_getattr_args /* {
1860                                   *  struct vnodeop_desc *a_desc;
1861                                   *  vnode_t a_vp;
1862                                   *  struct vnode_attr *a_vap;
1863                                   *  vfs_context_t a_context;
1864                                   *  } */*ap)
1865 {
1866         int error;
1867         struct nfs_vattr nva;
1868         struct vnode_attr *vap = ap->a_vap;
1869         struct nfsmount *nmp;
1870         dev_t rdev;
1871
1872         nmp = VTONMP(ap->a_vp);
1873
1874         /*
1875          * Lets don't go over the wire if we don't support any of the attributes.
1876          * Just fall through at the VFS layer and let it cons up what it needs.
1877          */
1878         /* Return the io size no matter what, since we don't go over the wire for this */
1879         VATTR_RETURN(vap, va_iosize, nfs_iosize);
1880
1881         if ((vap->va_active & NFS3_SUPPORTED_VATTRS) == 0) {
1882                 return 0;
1883         }
1884
1885         if (VATTR_IS_ACTIVE(ap->a_vap, va_name)) {
1886                 NFS_VNOP_DBG("Getting attrs for 0x%llx, vname is %s\n",
1887                     (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
1888                     ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
1889         }
1890         error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
1891         if (error) {
1892                 return error;
1893         }
1894
1895         /* copy nva to *a_vap */
1896         VATTR_RETURN(vap, va_type, nva.nva_type);
1897         VATTR_RETURN(vap, va_mode, nva.nva_mode);
1898         rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
1899         VATTR_RETURN(vap, va_rdev, rdev);
1900         VATTR_RETURN(vap, va_uid, nva.nva_uid);
1901         VATTR_RETURN(vap, va_gid, nva.nva_gid);
1902         VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
1903         VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
1904         VATTR_RETURN(vap, va_data_size, nva.nva_size);
1905         VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes);
1906         vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
1907         vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
1908         VATTR_SET_SUPPORTED(vap, va_access_time);
1909         vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
1910         vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
1911         VATTR_SET_SUPPORTED(vap, va_modify_time);
1912         vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
1913         vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
1914         VATTR_SET_SUPPORTED(vap, va_change_time);
1915
1916
1917         // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
1918         return error;
1919 }
1920
1921 /*
1922  * NFS setattr call.
1923  */
1924 int
1925 nfs_vnop_setattr(
1926         struct vnop_setattr_args /* {
1927                                   *  struct vnodeop_desc *a_desc;
1928                                   *  vnode_t a_vp;
1929                                   *  struct vnode_attr *a_vap;
1930                                   *  vfs_context_t a_context;
1931                                   *  } */*ap)
1932 {
1933         vfs_context_t ctx = ap->a_context;
1934         vnode_t vp = ap->a_vp;
1935         nfsnode_t np = VTONFS(vp);
1936         struct nfsmount *nmp;
1937         struct vnode_attr *vap = ap->a_vap;
1938         int error = 0;
1939         int biosize, nfsvers, namedattrs;
1940         u_quad_t origsize, vapsize;
1941         struct nfs_dulookup dul;
1942         nfsnode_t dnp = NULL;
1943         int dul_in_progress = 0;
1944         vnode_t dvp = NULL;
1945         const char *vname = NULL;
1946 #if CONFIG_NFS4
1947         struct nfs_open_owner *noop = NULL;
1948         struct nfs_open_file *nofp = NULL;
1949 #endif
1950         nmp = VTONMP(vp);
1951         if (nfs_mount_gone(nmp)) {
1952                 return ENXIO;
1953         }
1954         nfsvers = nmp->nm_vers;
1955         namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
1956         biosize = nmp->nm_biosize;
1957
1958         /* Disallow write attempts if the filesystem is mounted read-only. */
1959         if (vnode_vfsisrdonly(vp)) {
1960                 return EROFS;
1961         }
1962
1963         origsize = np->n_size;
1964         if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1965                 switch (vnode_vtype(vp)) {
1966                 case VDIR:
1967                         return EISDIR;
1968                 case VCHR:
1969                 case VBLK:
1970                 case VSOCK:
1971                 case VFIFO:
1972                         if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
1973                             !VATTR_IS_ACTIVE(vap, va_access_time) &&
1974                             !VATTR_IS_ACTIVE(vap, va_mode) &&
1975                             !VATTR_IS_ACTIVE(vap, va_uid) &&
1976                             !VATTR_IS_ACTIVE(vap, va_gid)) {
1977                                 return 0;
1978                         }
1979                         VATTR_CLEAR_ACTIVE(vap, va_data_size);
1980                         break;
1981                 default:
1982                         /*
1983                          * Disallow write attempts if the filesystem is
1984                          * mounted read-only.
1985                          */
1986                         if (vnode_vfsisrdonly(vp)) {
1987                                 return EROFS;
1988                         }
1989                         FSDBG_TOP(512, np->n_size, vap->va_data_size,
1990                             np->n_vattr.nva_size, np->n_flag);
1991                         /* clear NNEEDINVALIDATE, if set */
1992                         if ((error = nfs_node_lock(np))) {
1993                                 return error;
1994                         }
1995                         if (np->n_flag & NNEEDINVALIDATE) {
1996                                 np->n_flag &= ~NNEEDINVALIDATE;
1997                         }
1998                         nfs_node_unlock(np);
1999                         /* flush everything */
2000                         error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0), ctx, 1);
2001                         if (error) {
2002                                 NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
2003                                 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
2004                                 return error;
2005                         }
2006 #if CONFIG_NFS4
2007                         if (nfsvers >= NFS_VER4) {
2008                                 /* setting file size requires having the file open for write access */
2009                                 if (np->n_flag & NREVOKE) {
2010                                         return EIO;
2011                                 }
2012                                 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
2013                                 if (!noop) {
2014                                         return ENOMEM;
2015                                 }
2016 restart:
2017                                 error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
2018                                 if (error) {
2019                                         return error;
2020                                 }
2021                                 if (np->n_flag & NREVOKE) {
2022                                         nfs_mount_state_in_use_end(nmp, 0);
2023                                         return EIO;
2024                                 }
2025                                 error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
2026                                 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
2027                                         error = EIO;
2028                                 }
2029                                 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
2030                                         nfs_mount_state_in_use_end(nmp, 0);
2031                                         error = nfs4_reopen(nofp, vfs_context_thread(ctx));
2032                                         nofp = NULL;
2033                                         if (!error) {
2034                                                 goto restart;
2035                                         }
2036                                 }
2037                                 if (!error) {
2038                                         error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
2039                                 }
2040                                 if (error) {
2041                                         nfs_open_owner_rele(noop);
2042                                         return error;
2043                                 }
2044                                 if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
2045                                         /* we don't have the file open for write access, so open it */
2046                                         error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
2047                                         if (!error) {
2048                                                 nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
2049                                         }
2050                                         if (nfs_mount_state_error_should_restart(error)) {
2051                                                 nfs_open_file_clear_busy(nofp);
2052                                                 nofp = NULL;
2053                                                 if (nfs_mount_state_in_use_end(nmp, error)) {
2054                                                         goto restart;
2055                                                 }
2056                                         }
2057                                 }
2058                         }
2059 #endif
2060                         nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2061                         if (np->n_size > vap->va_data_size) { /* shrinking? */
2062                                 daddr64_t obn, bn;
2063                                 int neweofoff, mustwrite;
2064                                 struct nfsbuf *bp;
2065
2066                                 obn = (np->n_size - 1) / biosize;
2067                                 bn = vap->va_data_size / biosize;
2068                                 for (; obn >= bn; obn--) {
2069                                         if (!nfs_buf_is_incore(np, obn)) {
2070                                                 continue;
2071                                         }
2072                                         error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
2073                                         if (error) {
2074                                                 continue;
2075                                         }
2076                                         if (obn != bn) {
2077                                                 FSDBG(512, bp, bp->nb_flags, 0, obn);
2078                                                 SET(bp->nb_flags, NB_INVAL);
2079                                                 nfs_buf_release(bp, 1);
2080                                                 continue;
2081                                         }
2082                                         mustwrite = 0;
2083                                         neweofoff = vap->va_data_size - NBOFF(bp);
2084                                         /* check for any dirty data before the new EOF */
2085                                         if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
2086                                                 /* clip dirty range to EOF */
2087                                                 if (bp->nb_dirtyend > neweofoff) {
2088                                                         bp->nb_dirtyend = neweofoff;
2089                                                         if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
2090                                                                 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2091                                                         }
2092                                                 }
2093                                                 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
2094                                                         mustwrite++;
2095                                                 }
2096                                         }
2097                                         bp->nb_dirty &= (1 << round_page_32(neweofoff) / PAGE_SIZE) - 1;
2098                                         if (bp->nb_dirty) {
2099                                                 mustwrite++;
2100                                         }
2101                                         if (!mustwrite) {
2102                                                 FSDBG(512, bp, bp->nb_flags, 0, obn);
2103                                                 SET(bp->nb_flags, NB_INVAL);
2104                                                 nfs_buf_release(bp, 1);
2105                                                 continue;
2106                                         }
2107                                         /* gotta write out dirty data before invalidating */
2108                                         /* (NB_STABLE indicates that data writes should be FILESYNC) */
2109                                         /* (NB_NOCACHE indicates buffer should be discarded) */
2110                                         CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
2111                                         SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
2112                                         if (!IS_VALID_CRED(bp->nb_wcred)) {
2113                                                 kauth_cred_t cred = vfs_context_ucred(ctx);
2114                                                 kauth_cred_ref(cred);
2115                                                 bp->nb_wcred = cred;
2116                                         }
2117                                         error = nfs_buf_write(bp);
2118                                         // Note: bp has been released
2119                                         if (error) {
2120                                                 FSDBG(512, bp, 0xd00dee, 0xbad, error);
2121                                                 nfs_node_lock_force(np);
2122                                                 np->n_error = error;
2123                                                 np->n_flag |= NWRITEERR;
2124                                                 /*
2125                                                  * There was a write error and we need to
2126                                                  * invalidate attrs and flush buffers in
2127                                                  * order to sync up with the server.
2128                                                  * (if this write was extending the file,
2129                                                  * we may no longer know the correct size)
2130                                                  */
2131                                                 NATTRINVALIDATE(np);
2132                                                 nfs_node_unlock(np);
2133                                                 nfs_data_unlock(np);
2134                                                 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
2135                                                 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2136                                                 error = 0;
2137                                         }
2138                                 }
2139                         }
2140                         if (vap->va_data_size != np->n_size) {
2141                                 ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
2142                         }
2143                         origsize = np->n_size;
2144                         np->n_size = np->n_vattr.nva_size = vap->va_data_size;
2145                         nfs_node_lock_force(np);
2146                         CLR(np->n_flag, NUPDATESIZE);
2147                         nfs_node_unlock(np);
2148                         FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
2149                 }
2150         } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
2151             VATTR_IS_ACTIVE(vap, va_access_time) ||
2152             (vap->va_vaflags & VA_UTIMES_NULL)) {
2153                 if ((error = nfs_node_lock(np))) {
2154                         return error;
2155                 }
2156                 if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
2157                         nfs_node_unlock(np);
2158                         error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
2159                         if (error == EINTR) {
2160                                 return error;
2161                         }
2162                 } else {
2163                         nfs_node_unlock(np);
2164                 }
2165         }
2166         if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
2167             VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
2168             !(error = nfs_node_lock(np))) {
2169                 NACCESSINVALIDATE(np);
2170                 nfs_node_unlock(np);
2171                 if (!namedattrs) {
2172                         dvp = vnode_getparent(vp);
2173                         vname = vnode_getname(vp);
2174                         dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
2175                         if (dnp) {
2176                                 if (nfs_node_set_busy(dnp, vfs_context_thread(ctx))) {
2177                                         vnode_put(dvp);
2178                                         vnode_putname(vname);
2179                                 } else {
2180                                         nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
2181                                         nfs_dulookup_start(&dul, dnp, ctx);
2182                                         dul_in_progress = 1;
2183                                 }
2184                         } else {
2185                                 if (dvp) {
2186                                         vnode_put(dvp);
2187                                 }
2188                                 if (vname) {
2189                                         vnode_putname(vname);
2190                                 }
2191                         }
2192                 }
2193         }
2194
2195         if (!error) {
2196                 error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
2197         }
2198
2199         if (dul_in_progress) {
2200                 nfs_dulookup_finish(&dul, dnp, ctx);
2201                 nfs_node_clear_busy(dnp);
2202                 vnode_put(dvp);
2203                 vnode_putname(vname);
2204         }
2205
2206         FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
2207         if (VATTR_IS_ACTIVE(vap, va_data_size)) {
2208                 if (error && (origsize != np->n_size) &&
2209                     ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
2210                         /* make every effort to resync file size w/ server... */
2211                         /* (don't bother if we'll be restarting the operation) */
2212                         int err; /* preserve "error" for return */
2213                         np->n_size = np->n_vattr.nva_size = origsize;
2214                         nfs_node_lock_force(np);
2215                         CLR(np->n_flag, NUPDATESIZE);
2216                         nfs_node_unlock(np);
2217                         FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
2218                         ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
2219                         vapsize = vap->va_data_size;
2220                         vap->va_data_size = origsize;
2221                         err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
2222                         if (err) {
2223                                 NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
2224                         }
2225                         vap->va_data_size = vapsize;
2226                 }
2227                 nfs_node_lock_force(np);
2228                 /*
2229                  * The size was just set.  If the size is already marked for update, don't
2230                  * trust the newsize (it may have been set while the setattr was in progress).
2231                  * Clear the update flag and make sure we fetch new attributes so we are sure
2232                  * we have the latest size.
2233                  */
2234                 if (ISSET(np->n_flag, NUPDATESIZE)) {
2235                         CLR(np->n_flag, NUPDATESIZE);
2236                         NATTRINVALIDATE(np);
2237                         nfs_node_unlock(np);
2238                         nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
2239                 } else {
2240                         nfs_node_unlock(np);
2241                 }
2242                 nfs_data_unlock(np);
2243 #if CONFIG_NFS4
2244                 if (nfsvers >= NFS_VER4) {
2245                         if (nofp) {
2246                                 /* don't close our setattr open if we'll be restarting... */
2247                                 if (!nfs_mount_state_error_should_restart(error) &&
2248                                     (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
2249                                         int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
2250                                         if (err) {
2251                                                 NP(np, "nfs_vnop_setattr: close error: %d", err);
2252                                         }
2253                                         nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
2254                                 }
2255                                 nfs_open_file_clear_busy(nofp);
2256                                 nofp = NULL;
2257                         }
2258                         if (nfs_mount_state_in_use_end(nmp, error)) {
2259                                 goto restart;
2260                         }
2261                         nfs_open_owner_rele(noop);
2262                 }
2263 #endif
2264         }
2265         return error;
2266 }
2267
2268 /*
2269  * Do an NFS setattr RPC.
2270  */
2271 int
2272 nfs3_setattr_rpc(
2273         nfsnode_t np,
2274         struct vnode_attr *vap,
2275         vfs_context_t ctx)
2276 {
2277         struct nfsmount *nmp = NFSTONMP(np);
2278         int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers;
2279         u_int64_t xid, nextxid;
2280         struct nfsm_chain nmreq, nmrep;
2281
2282         if (nfs_mount_gone(nmp)) {
2283                 return ENXIO;
2284         }
2285         nfsvers = nmp->nm_vers;
2286
2287         VATTR_SET_SUPPORTED(vap, va_mode);
2288         VATTR_SET_SUPPORTED(vap, va_uid);
2289         VATTR_SET_SUPPORTED(vap, va_gid);
2290         VATTR_SET_SUPPORTED(vap, va_data_size);
2291         VATTR_SET_SUPPORTED(vap, va_access_time);
2292         VATTR_SET_SUPPORTED(vap, va_modify_time);
2293
2294
2295         if (VATTR_IS_ACTIVE(vap, va_flags)
2296             ) {
2297                 if (vap->va_flags) {    /* we don't support setting flags */
2298                         if (vap->va_active & ~VNODE_ATTR_va_flags) {
2299                                 return EINVAL;        /* return EINVAL if other attributes also set */
2300                         } else {
2301                                 return ENOTSUP;       /* return ENOTSUP for chflags(2) */
2302                         }
2303                 }
2304                 /* no flags set, so we'll just ignore it */
2305                 if (!(vap->va_active & ~VNODE_ATTR_va_flags)) {
2306                         return 0; /* no (other) attributes to set, so nothing to do */
2307                 }
2308         }
2309
2310         nfsm_chain_null(&nmreq);
2311         nfsm_chain_null(&nmrep);
2312
2313         nfsm_chain_build_alloc_init(error, &nmreq,
2314             NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
2315         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2316         if (nfsvers == NFS_VER3) {
2317                 if (VATTR_IS_ACTIVE(vap, va_mode)) {
2318                         nfsm_chain_add_32(error, &nmreq, TRUE);
2319                         nfsm_chain_add_32(error, &nmreq, vap->va_mode);
2320                 } else {
2321                         nfsm_chain_add_32(error, &nmreq, FALSE);
2322                 }
2323                 if (VATTR_IS_ACTIVE(vap, va_uid)) {
2324                         nfsm_chain_add_32(error, &nmreq, TRUE);
2325                         nfsm_chain_add_32(error, &nmreq, vap->va_uid);
2326                 } else {
2327                         nfsm_chain_add_32(error, &nmreq, FALSE);
2328                 }
2329                 if (VATTR_IS_ACTIVE(vap, va_gid)) {
2330                         nfsm_chain_add_32(error, &nmreq, TRUE);
2331                         nfsm_chain_add_32(error, &nmreq, vap->va_gid);
2332                 } else {
2333                         nfsm_chain_add_32(error, &nmreq, FALSE);
2334                 }
2335                 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
2336                         nfsm_chain_add_32(error, &nmreq, TRUE);
2337                         nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
2338                 } else {
2339                         nfsm_chain_add_32(error, &nmreq, FALSE);
2340                 }
2341                 if (vap->va_vaflags & VA_UTIMES_NULL) {
2342                         nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
2343                         nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
2344                 } else {
2345                         if (VATTR_IS_ACTIVE(vap, va_access_time)) {
2346                                 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
2347                                 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
2348                                 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
2349                         } else {
2350                                 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
2351                         }
2352                         if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
2353                                 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
2354                                 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
2355                                 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
2356                         } else {
2357                                 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
2358                         }
2359                 }
2360                 nfsm_chain_add_32(error, &nmreq, FALSE);
2361         } else {
2362                 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
2363                     vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
2364                 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
2365                     vap->va_uid : (uint32_t)-1);
2366                 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
2367                     vap->va_gid : (uint32_t)-1);
2368                 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
2369                     vap->va_data_size : (uint32_t)-1);
2370                 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
2371                         nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
2372                         nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
2373                             ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
2374                 } else {
2375                         nfsm_chain_add_32(error, &nmreq, -1);
2376                         nfsm_chain_add_32(error, &nmreq, -1);
2377                 }
2378                 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
2379                         nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
2380                         nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
2381                             ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
2382                 } else {
2383                         nfsm_chain_add_32(error, &nmreq, -1);
2384                         nfsm_chain_add_32(error, &nmreq, -1);
2385                 }
2386         }
2387         nfsm_chain_build_done(error, &nmreq);
2388         nfsmout_if(error);
2389         error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
2390         if ((lockerror = nfs_node_lock(np))) {
2391                 error = lockerror;
2392         }
2393         if (nfsvers == NFS_VER3) {
2394                 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
2395                 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
2396                 nfsmout_if(error);
2397                 /* if file hadn't changed, update cached mtime */
2398                 if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
2399                         NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
2400                 }
2401                 /* if directory hadn't changed, update namecache mtime */
2402                 if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
2403                     nfstimespeccmp(&np->n_ncmtime, &premtime, ==)) {
2404                         NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
2405                 }
2406                 if (!wccpostattr) {
2407                         NATTRINVALIDATE(np);
2408                 }
2409                 error = status;
2410         } else {
2411                 if (!error) {
2412                         error = status;
2413                 }
2414                 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
2415         }
2416         /*
2417          * We just changed the attributes and we want to make sure that we
2418          * see the latest attributes.  Get the next XID.  If it's not the
2419          * next XID after the SETATTR XID, then it's possible that another
2420          * RPC was in flight at the same time and it might put stale attributes
2421          * in the cache.  In that case, we invalidate the attributes and set
2422          * the attribute cache XID to guarantee that newer attributes will
2423          * get loaded next.
2424          */
2425         nextxid = 0;
2426         nfs_get_xid(&nextxid);
2427         if (nextxid != (xid + 1)) {
2428                 np->n_xid = nextxid;
2429                 NATTRINVALIDATE(np);
2430         }
2431 nfsmout:
2432         if (!lockerror) {
2433                 nfs_node_unlock(np);
2434         }
2435         nfsm_chain_cleanup(&nmreq);
2436         nfsm_chain_cleanup(&nmrep);
2437         return error;
2438 }
2439
2440 /*
2441  * NFS lookup call, one step at a time...
2442  * First look in cache
2443  * If not found, unlock the directory nfsnode and do the RPC
2444  */
2445 int
2446 nfs_vnop_lookup(
2447         struct vnop_lookup_args /* {
2448                                  *  struct vnodeop_desc *a_desc;
2449                                  *  vnode_t a_dvp;
2450                                  *  vnode_t *a_vpp;
2451                                  *  struct componentname *a_cnp;
2452                                  *  vfs_context_t a_context;
2453                                  *  } */*ap)
2454 {
2455         vfs_context_t ctx = ap->a_context;
2456         struct componentname *cnp = ap->a_cnp;
2457         vnode_t dvp = ap->a_dvp;
2458         vnode_t *vpp = ap->a_vpp;
2459         int flags = cnp->cn_flags;
2460         vnode_t newvp;
2461         nfsnode_t dnp, np;
2462         struct nfsmount *nmp;
2463         mount_t mp;
2464         int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
2465         u_int64_t xid;
2466         struct nfs_vattr nvattr;
2467         int ngflags;
2468         struct vnop_access_args naa;
2469         fhandle_t fh;
2470         struct nfsreq rq, *req = &rq;
2471
2472         *vpp = NULLVP;
2473
2474         dnp = VTONFS(dvp);
2475         NVATTR_INIT(&nvattr);
2476
2477         mp = vnode_mount(dvp);
2478         nmp = VFSTONFS(mp);
2479         if (nfs_mount_gone(nmp)) {
2480                 error = ENXIO;
2481                 goto error_return;
2482         }
2483         nfsvers = nmp->nm_vers;
2484         negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
2485
2486         if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)))) {
2487                 goto error_return;
2488         }
2489         /* nfs_getattr() will check changed and purge caches */
2490         if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED))) {
2491                 goto error_return;
2492         }
2493
2494         error = cache_lookup(dvp, vpp, cnp);
2495         switch (error) {
2496         case ENOENT:
2497                 /* negative cache entry */
2498                 goto error_return;
2499         case 0:
2500                 /* cache miss */
2501                 if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
2502                         /* if rdirplus, try dir buf cache lookup */
2503                         error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0);
2504                         if (!error && np) {
2505                                 /* dir buf cache hit */
2506                                 *vpp = NFSTOV(np);
2507                                 error = -1;
2508                         }
2509                 }
2510                 if (error != -1) { /* cache miss */
2511                         break;
2512                 }
2513         /* FALLTHROUGH */
2514         case -1:
2515                 /* cache hit, not really an error */
2516                 OSAddAtomic64(1, &nfsstats.lookupcache_hits);
2517
2518                 nfs_node_clear_busy(dnp);
2519                 busyerror = ENOENT;
2520
2521                 /* check for directory access */
2522                 naa.a_desc = &vnop_access_desc;
2523                 naa.a_vp = dvp;
2524                 naa.a_action = KAUTH_VNODE_SEARCH;
2525                 naa.a_context = ctx;
2526
2527                 /* compute actual success/failure based on accessibility */
2528                 error = nfs_vnop_access(&naa);
2529         /* FALLTHROUGH */
2530         default:
2531                 /* unexpected error from cache_lookup */
2532                 goto error_return;
2533         }
2534
2535         /* skip lookup, if we know who we are: "." or ".." */
2536         isdot = isdotdot = 0;
2537         if (cnp->cn_nameptr[0] == '.') {
2538                 if (cnp->cn_namelen == 1) {
2539                         isdot = 1;
2540                 }
2541                 if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.')) {
2542                         isdotdot = 1;
2543                 }
2544         }
2545         if (isdotdot || isdot) {
2546                 fh.fh_len = 0;
2547                 goto found;
2548         }
2549 #if CONFIG_NFS4
2550         if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
2551                 /* we should never be looking things up in a trigger directory, return nothing */
2552                 error = ENOENT;
2553                 goto error_return;
2554         }
2555 #endif
2556
2557         /* do we know this name is too long? */
2558         nmp = VTONMP(dvp);
2559         if (nfs_mount_gone(nmp)) {
2560                 error = ENXIO;
2561                 goto error_return;
2562         }
2563         if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
2564             (cnp->cn_namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
2565                 error = ENAMETOOLONG;
2566                 goto error_return;
2567         }
2568
2569         error = 0;
2570         newvp = NULLVP;
2571
2572         OSAddAtomic64(1, &nfsstats.lookupcache_misses);
2573
2574         error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
2575         nfsmout_if(error);
2576         error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, &fh, &nvattr);
2577         nfsmout_if(error);
2578
2579         /* is the file handle the same as this directory's file handle? */
2580         isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len);
2581
2582 found:
2583         if (flags & ISLASTCN) {
2584                 switch (cnp->cn_nameiop) {
2585                 case DELETE:
2586                         cnp->cn_flags &= ~MAKEENTRY;
2587                         break;
2588                 case RENAME:
2589                         cnp->cn_flags &= ~MAKEENTRY;
2590                         if (isdot) {
2591                                 error = EISDIR;
2592                                 goto error_return;
2593                         }
2594                         break;
2595                 }
2596         }
2597
2598         if (isdotdot) {
2599                 newvp = vnode_getparent(dvp);
2600                 if (!newvp) {
2601                         error = ENOENT;
2602                         goto error_return;
2603                 }
2604         } else if (isdot) {
2605                 error = vnode_get(dvp);
2606                 if (error) {
2607                         goto error_return;
2608                 }
2609                 newvp = dvp;
2610                 nfs_node_lock_force(dnp);
2611                 if (fh.fh_len && (dnp->n_xid <= xid)) {
2612                         nfs_loadattrcache(dnp, &nvattr, &xid, 0);
2613                 }
2614                 nfs_node_unlock(dnp);
2615         } else {
2616                 ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
2617                 error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, ngflags, &np);
2618                 if (error) {
2619                         goto error_return;
2620                 }
2621                 newvp = NFSTOV(np);
2622                 nfs_node_unlock(np);
2623         }
2624         *vpp = newvp;
2625
2626 nfsmout:
2627         if (error) {
2628                 if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
2629                     (flags & ISLASTCN) && (error == ENOENT)) {
2630                         if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp)) {
2631                                 error = EROFS;
2632                         } else {
2633                                 error = EJUSTRETURN;
2634                         }
2635                 }
2636         }
2637         if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
2638             (cnp->cn_nameiop != CREATE) && negnamecache) {
2639                 /* add a negative entry in the name cache */
2640                 nfs_node_lock_force(dnp);
2641                 cache_enter(dvp, NULL, cnp);
2642                 dnp->n_flag |= NNEGNCENTRIES;
2643                 nfs_node_unlock(dnp);
2644         }
2645 error_return:
2646         NVATTR_CLEANUP(&nvattr);
2647         if (!busyerror) {
2648                 nfs_node_clear_busy(dnp);
2649         }
2650         if (error && *vpp) {
2651                 vnode_put(*vpp);
2652                 *vpp = NULLVP;
2653         }
2654         return error;
2655 }
2656
2657 int nfs_readlink_nocache = DEFAULT_READLINK_NOCACHE;
2658
2659 /*
2660  * NFS readlink call
2661  */
2662 int
2663 nfs_vnop_readlink(
2664         struct vnop_readlink_args /* {
2665                                    *  struct vnodeop_desc *a_desc;
2666                                    *  vnode_t a_vp;
2667                                    *  struct uio *a_uio;
2668                                    *  vfs_context_t a_context;
2669                                    *  } */*ap)
2670 {
2671         vfs_context_t ctx = ap->a_context;
2672         nfsnode_t np = VTONFS(ap->a_vp);
2673         struct nfsmount *nmp;
2674         int error = 0, nfsvers;
2675         uint32_t buflen;
2676         uio_t uio = ap->a_uio;
2677         struct nfsbuf *bp = NULL;
2678         struct timespec ts;
2679         int timeo;
2680
2681         if (vnode_vtype(ap->a_vp) != VLNK) {
2682                 return EPERM;
2683         }
2684
2685         if (uio_resid(uio) == 0) {
2686                 return 0;
2687         }
2688         if (uio_offset(uio) < 0) {
2689                 return EINVAL;
2690         }
2691
2692         nmp = VTONMP(ap->a_vp);
2693         if (nfs_mount_gone(nmp)) {
2694                 return ENXIO;
2695         }
2696         nfsvers = nmp->nm_vers;
2697
2698
2699         /* nfs_getattr() will check changed and purge caches */
2700         if ((error = nfs_getattr(np, NULL, ctx, nfs_readlink_nocache ? NGA_UNCACHED : NGA_CACHED))) {
2701                 FSDBG(531, np, 0xd1e0001, 0, error);
2702                 return error;
2703         }
2704
2705         if (nfs_readlink_nocache) {
2706                 timeo = nfs_attrcachetimeout(np);
2707                 nanouptime(&ts);
2708         }
2709
2710 retry:
2711         OSAddAtomic64(1, &nfsstats.biocache_readlinks);
2712         error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_META, &bp);
2713         if (error) {
2714                 FSDBG(531, np, 0xd1e0002, 0, error);
2715                 return error;
2716         }
2717
2718         if (nfs_readlink_nocache) {
2719                 NFS_VNOP_DBG("timeo = %d ts.tv_sec = %ld need refresh = %d cached = %d\n", timeo, ts.tv_sec,
2720                     (np->n_rltim.tv_sec + timeo) < ts.tv_sec || nfs_readlink_nocache > 1,
2721                     ISSET(bp->nb_flags, NB_CACHE) == NB_CACHE);
2722                 /* n_rltim is synchronized by the associated nfs buf */
2723                 if (ISSET(bp->nb_flags, NB_CACHE) && ((nfs_readlink_nocache > 1) || ((np->n_rltim.tv_sec + timeo) < ts.tv_sec))) {
2724                         SET(bp->nb_flags, NB_INVAL);
2725                         nfs_buf_release(bp, 0);
2726                         goto retry;
2727                 }
2728         }
2729         if (!ISSET(bp->nb_flags, NB_CACHE)) {
2730 readagain:
2731                 OSAddAtomic64(1, &nfsstats.readlink_bios);
2732                 buflen = bp->nb_bufsize;
2733                 error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
2734                 if (error) {
2735                         if (error == ESTALE) {
2736                                 NFS_VNOP_DBG("Stale FH from readlink rpc\n");
2737                                 error = nfs_refresh_fh(np, ctx);
2738                                 if (error == 0) {
2739                                         goto readagain;
2740                                 }
2741                         }
2742                         SET(bp->nb_flags, NB_ERROR);
2743                         bp->nb_error = error;
2744                         NFS_VNOP_DBG("readlink failed %d\n", error);
2745                 } else {
2746                         bp->nb_validoff = 0;
2747                         bp->nb_validend = buflen;
2748                         np->n_rltim = ts;
2749                         NFS_VNOP_DBG("readlink of %.*s\n", bp->nb_validend, (char *)bp->nb_data);
2750                 }
2751         } else {
2752                 NFS_VNOP_DBG("got cached link of %.*s\n", bp->nb_validend, (char *)bp->nb_data);
2753         }
2754
2755         if (!error && (bp->nb_validend > 0)) {
2756                 error = uiomove(bp->nb_data, bp->nb_validend, uio);
2757         }
2758         FSDBG(531, np, bp->nb_validend, 0, error);
2759         nfs_buf_release(bp, 1);
2760         return error;
2761 }
2762
2763 /*
2764  * Do a readlink RPC.
2765  */
2766 int
2767 nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
2768 {
2769         struct nfsmount *nmp;
2770         int error = 0, lockerror = ENOENT, nfsvers, status;
2771         uint32_t len;
2772         u_int64_t xid;
2773         struct nfsm_chain nmreq, nmrep;
2774
2775         nmp = NFSTONMP(np);
2776         if (nfs_mount_gone(nmp)) {
2777                 return ENXIO;
2778         }
2779         nfsvers = nmp->nm_vers;
2780         nfsm_chain_null(&nmreq);
2781         nfsm_chain_null(&nmrep);
2782
2783         nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
2784         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2785         nfsm_chain_build_done(error, &nmreq);
2786         nfsmout_if(error);
2787         error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
2788         if ((lockerror = nfs_node_lock(np))) {
2789                 error = lockerror;
2790         }
2791         if (nfsvers == NFS_VER3) {
2792                 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
2793         }
2794         if (!error) {
2795                 error = status;
2796         }
2797         nfsm_chain_get_32(error, &nmrep, len);
2798         nfsmout_if(error);
2799         if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
2800                 error = EBADRPC;
2801                 goto nfsmout;
2802         }
2803         if (len >= *buflenp) {
2804                 if (np->n_size && (np->n_size < *buflenp)) {
2805                         len = np->n_size;
2806                 } else {
2807                         len = *buflenp - 1;
2808                 }
2809         }
2810         nfsm_chain_get_opaque(error, &nmrep, len, buf);
2811         if (!error) {
2812                 *buflenp = len;
2813         }
2814 nfsmout:
2815         if (!lockerror) {
2816                 nfs_node_unlock(np);
2817         }
2818         nfsm_chain_cleanup(&nmreq);
2819         nfsm_chain_cleanup(&nmrep);
2820         return error;
2821 }
2822
2823 /*
2824  * NFS read RPC call
2825  * Ditto above
2826  */
2827 int
2828 nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
2829 {
2830         struct nfsmount *nmp;
2831         int error = 0, nfsvers, eof = 0;
2832         size_t nmrsize, len, retlen;
2833         user_ssize_t tsiz;
2834         off_t txoffset;
2835         struct nfsreq rq, *req = &rq;
2836 #if CONFIG_NFS4
2837         uint32_t stategenid = 0, restart = 0;
2838 #endif
2839         FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
2840         nmp = NFSTONMP(np);
2841         if (nfs_mount_gone(nmp)) {
2842                 return ENXIO;
2843         }
2844         nfsvers = nmp->nm_vers;
2845         nmrsize = nmp->nm_rsize;
2846
2847         txoffset = uio_offset(uio);
2848         tsiz = uio_resid(uio);
2849         if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
2850                 FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
2851                 return EFBIG;
2852         }
2853
2854         while (tsiz > 0) {
2855                 len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
2856                 FSDBG(536, np, txoffset, len, 0);
2857                 if (np->n_flag & NREVOKE) {
2858                         error = EIO;
2859                         break;
2860                 }
2861 #if CONFIG_NFS4
2862                 if (nmp->nm_vers >= NFS_VER4) {
2863                         stategenid = nmp->nm_stategenid;
2864                 }
2865 #endif
2866                 error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
2867                     vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
2868                 if (!error) {
2869                         error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
2870                 }
2871 #if CONFIG_NFS4
2872                 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
2873                     (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
2874                         lck_mtx_lock(&nmp->nm_lock);
2875                         if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
2876                                 NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
2877                                 nfs_need_recover(nmp, error);
2878                         }
2879                         lck_mtx_unlock(&nmp->nm_lock);
2880                         if (np->n_flag & NREVOKE) {
2881                                 error = EIO;
2882                         } else {
2883                                 if (error == NFSERR_GRACE) {
2884                                         tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
2885                                 }
2886                                 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
2887                                         continue;
2888                                 }
2889                         }
2890                 }
2891 #endif
2892                 if (error) {
2893                         break;
2894                 }
2895                 txoffset += retlen;
2896                 tsiz -= retlen;
2897                 if (nfsvers != NFS_VER2) {
2898                         if (eof || (retlen == 0)) {
2899                                 tsiz = 0;
2900                         }
2901                 } else if (retlen < len) {
2902                         tsiz = 0;
2903                 }
2904         }
2905
2906         FSDBG_BOT(536, np, eof, uio_resid(uio), error);
2907         return error;
2908 }
2909
2910 int
2911 nfs3_read_rpc_async(
2912         nfsnode_t np,
2913         off_t offset,
2914         size_t len,
2915         thread_t thd,
2916         kauth_cred_t cred,
2917         struct nfsreq_cbinfo *cb,
2918         struct nfsreq **reqp)
2919 {
2920         struct nfsmount *nmp;
2921         int error = 0, nfsvers;
2922         struct nfsm_chain nmreq;
2923
2924         nmp = NFSTONMP(np);
2925         if (nfs_mount_gone(nmp)) {
2926                 return ENXIO;
2927         }
2928         nfsvers = nmp->nm_vers;
2929
2930         nfsm_chain_null(&nmreq);
2931         nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
2932         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2933         if (nfsvers == NFS_VER3) {
2934                 nfsm_chain_add_64(error, &nmreq, offset);
2935                 nfsm_chain_add_32(error, &nmreq, len);
2936         } else {
2937                 nfsm_chain_add_32(error, &nmreq, offset);
2938                 nfsm_chain_add_32(error, &nmreq, len);
2939                 nfsm_chain_add_32(error, &nmreq, 0);
2940         }
2941         nfsm_chain_build_done(error, &nmreq);
2942         nfsmout_if(error);
2943         error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
2944 nfsmout:
2945         nfsm_chain_cleanup(&nmreq);
2946         return error;
2947 }
2948
2949 int
2950 nfs3_read_rpc_async_finish(
2951         nfsnode_t np,
2952         struct nfsreq *req,
2953         uio_t uio,
2954         size_t *lenp,
2955         int *eofp)
2956 {
2957         int error = 0, lockerror, nfsvers, status, eof = 0;
2958         size_t retlen = 0;
2959         uint64_t xid;
2960         struct nfsmount *nmp;
2961         struct nfsm_chain nmrep;
2962
2963         nmp = NFSTONMP(np);
2964         if (nfs_mount_gone(nmp)) {
2965                 nfs_request_async_cancel(req);
2966                 return ENXIO;
2967         }
2968         nfsvers = nmp->nm_vers;
2969
2970         nfsm_chain_null(&nmrep);
2971
2972         error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2973         if (error == EINPROGRESS) { /* async request restarted */
2974                 return error;
2975         }
2976
2977         if ((lockerror = nfs_node_lock(np))) {
2978                 error = lockerror;
2979         }
2980         if (nfsvers == NFS_VER3) {
2981                 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
2982         }
2983         if (!error) {
2984                 error = status;
2985         }
2986         if (nfsvers == NFS_VER3) {
2987                 nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
2988                 nfsm_chain_get_32(error, &nmrep, eof);
2989         } else {
2990                 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
2991         }
2992         if (!lockerror) {
2993                 nfs_node_unlock(np);
2994         }
2995         nfsm_chain_get_32(error, &nmrep, retlen);
2996         if ((nfsvers == NFS_VER2) && (retlen > *lenp)) {
2997                 error = EBADRPC;
2998         }
2999         nfsmout_if(error);
3000         error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
3001         if (eofp) {
3002                 if (nfsvers == NFS_VER3) {
3003                         if (!eof && !retlen) {
3004                                 eof = 1;
3005                         }
3006                 } else if (retlen < *lenp) {
3007                         eof = 1;
3008                 }
3009                 *eofp = eof;
3010         }
3011         *lenp = MIN(retlen, *lenp);
3012 nfsmout:
3013         nfsm_chain_cleanup(&nmrep);
3014         return error;
3015 }
3016
3017 /*
3018  * NFS write call
3019  */
3020 int
3021 nfs_vnop_write(
3022         struct vnop_write_args /* {
3023                                 *  struct vnodeop_desc *a_desc;
3024                                 *  vnode_t a_vp;
3025                                 *  struct uio *a_uio;
3026                                 *  int a_ioflag;
3027                                 *  vfs_context_t a_context;
3028                                 *  } */*ap)
3029 {
3030         vfs_context_t ctx = ap->a_context;
3031         uio_t uio = ap->a_uio;
3032         vnode_t vp = ap->a_vp;
3033         nfsnode_t np = VTONFS(vp);
3034         int ioflag = ap->a_ioflag;
3035         struct nfsbuf *bp;
3036         struct nfsmount *nmp = VTONMP(vp);
3037         daddr64_t lbn;
3038         int biosize;
3039         int n, on, error = 0;
3040         off_t boff, start, end;
3041         uio_t auio;
3042         char auio_buf[UIO_SIZEOF(1)];
3043         thread_t thd;
3044         kauth_cred_t cred;
3045
3046         FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
3047
3048         if (vnode_vtype(vp) != VREG) {
3049                 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
3050                 return EIO;
3051         }
3052
3053         thd = vfs_context_thread(ctx);
3054         cred = vfs_context_ucred(ctx);
3055
3056         nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
3057
3058         if ((error = nfs_node_lock(np))) {
3059                 nfs_data_unlock(np);
3060                 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
3061                 return error;
3062         }
3063         np->n_wrbusy++;
3064
3065         if (np->n_flag & NWRITEERR) {
3066                 error = np->n_error;
3067                 np->n_flag &= ~NWRITEERR;
3068         }
3069         if (np->n_flag & NNEEDINVALIDATE) {
3070                 np->n_flag &= ~NNEEDINVALIDATE;
3071                 nfs_node_unlock(np);
3072                 nfs_data_unlock(np);
3073                 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
3074                 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
3075         } else {
3076                 nfs_node_unlock(np);
3077         }
3078         if (error) {
3079                 goto out;
3080         }
3081
3082         biosize = nmp->nm_biosize;
3083
3084         if (ioflag & (IO_APPEND | IO_SYNC)) {
3085                 nfs_node_lock_force(np);
3086                 if (np->n_flag & NMODIFIED) {
3087                         NATTRINVALIDATE(np);
3088                         nfs_node_unlock(np);
3089                         nfs_data_unlock(np);
3090                         error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
3091                         nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
3092                         if (error) {
3093                                 FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
3094                                 goto out;
3095                         }
3096                 } else {
3097                         nfs_node_unlock(np);
3098                 }
3099                 if (ioflag & IO_APPEND) {
3100                         nfs_data_unlock(np);
3101                         /* nfs_getattr() will check changed and purge caches */
3102                         error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
3103                         /* we'll be extending the file, so take the data lock exclusive */
3104                         nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
3105                         if (error) {
3106                                 FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
3107                                 goto out;
3108                         }
3109                         uio_setoffset(uio, np->n_size);
3110                 }
3111         }
3112         if (uio_offset(uio) < 0) {
3113                 error = EINVAL;
3114                 FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
3115                 goto out;
3116         }
3117         if (uio_resid(uio) == 0) {
3118                 goto out;
3119         }
3120
3121         if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
3122                 /*
3123                  * It looks like we'll be extending the file, so take the data lock exclusive.
3124                  */
3125                 nfs_data_unlock(np);
3126                 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
3127
3128                 /*
3129                  * Also, if the write begins after the previous EOF buffer, make sure to zero
3130                  * and validate the new bytes in that buffer.
3131                  */
3132                 struct nfsbuf *eofbp = NULL;
3133                 daddr64_t eofbn = np->n_size / biosize;
3134                 int eofoff = np->n_size % biosize;
3135                 lbn = uio_offset(uio) / biosize;
3136
3137                 if (eofoff && (eofbn < lbn)) {
3138                         if ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE | NBLK_ONLYVALID, &eofbp))) {
3139                                 goto out;
3140                         }
3141                         np->n_size += (biosize - eofoff);
3142                         nfs_node_lock_force(np);
3143                         CLR(np->n_flag, NUPDATESIZE);
3144                         np->n_flag |= NMODIFIED;
3145                         nfs_node_unlock(np);
3146                         FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
3147                         ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
3148                         if (eofbp) {
3149                                 /*
3150                                  * For the old last page, don't zero bytes if there
3151                                  * are invalid bytes in that page (i.e. the page isn't
3152                                  * currently valid).
3153                                  * For pages after the old last page, zero them and
3154                                  * mark them as valid.
3155                                  */
3156                                 char *d;
3157                                 int i;
3158                                 if (ioflag & IO_NOCACHE) {
3159                                         SET(eofbp->nb_flags, NB_NOCACHE);
3160                                 }
3161                                 NFS_BUF_MAP(eofbp);
3162                                 FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
3163                                 d = eofbp->nb_data;
3164                                 i = eofoff / PAGE_SIZE;
3165                                 while (eofoff < biosize) {
3166                                         int poff = eofoff & PAGE_MASK;
3167                                         if (!poff || NBPGVALID(eofbp, i)) {
3168                                                 bzero(d + eofoff, PAGE_SIZE - poff);
3169                                                 NBPGVALID_SET(eofbp, i);
3170                                         }
3171                                         eofoff += PAGE_SIZE - poff;
3172                                         i++;
3173                                 }
3174                                 nfs_buf_release(eofbp, 1);
3175                         }
3176                 }
3177         }
3178
3179         do {
3180                 OSAddAtomic64(1, &nfsstats.biocache_writes);
3181                 lbn = uio_offset(uio) / biosize;
3182                 on = uio_offset(uio) % biosize;
3183                 n = biosize - on;
3184                 if (uio_resid(uio) < n) {
3185                         n = uio_resid(uio);
3186                 }
3187 again:
3188                 /*
3189                  * Get a cache block for writing.  The range to be written is
3190                  * (off..off+n) within the block.  We ensure that the block
3191                  * either has no dirty region or that the given range is
3192                  * contiguous with the existing dirty region.
3193                  */
3194                 error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
3195                 if (error) {
3196                         goto out;
3197                 }
3198                 /* map the block because we know we're going to write to it */
3199                 NFS_BUF_MAP(bp);
3200
3201                 if (ioflag & IO_NOCACHE) {
3202                         SET(bp->nb_flags, NB_NOCACHE);
3203                 }
3204
3205                 if (!IS_VALID_CRED(bp->nb_wcred)) {
3206                         kauth_cred_ref(cred);
3207                         bp->nb_wcred = cred;
3208                 }
3209
3210                 /*
3211                  * If there's already a dirty range AND dirty pages in this block we
3212                  * need to send a commit AND write the dirty pages before continuing.
3213                  *
3214                  * If there's already a dirty range OR dirty pages in this block
3215                  * and the new write range is not contiguous with the existing range,
3216                  * then force the buffer to be written out now.
3217                  * (We used to just extend the dirty range to cover the valid,
3218                  * but unwritten, data in between also.  But writing ranges
3219                  * of data that weren't actually written by an application
3220                  * risks overwriting some other client's data with stale data
3221                  * that's just masquerading as new written data.)
3222                  */
3223                 if (bp->nb_dirtyend > 0) {
3224                         if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
3225                                 FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
3226                                 /* write/commit buffer "synchronously" */
3227                                 /* (NB_STABLE indicates that data writes should be FILESYNC) */
3228                                 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
3229                                 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
3230                                 error = nfs_buf_write(bp);
3231                                 if (error) {
3232                                         goto out;
3233                                 }
3234                                 goto again;
3235                         }
3236                 } else if (bp->nb_dirty) {
3237                         int firstpg, lastpg;
3238                         u_int32_t pagemask;
3239                         /* calculate write range pagemask */
3240                         firstpg = on / PAGE_SIZE;
3241                         lastpg = (on + n - 1) / PAGE_SIZE;
3242                         pagemask = ((1 << (lastpg + 1)) - 1) & ~((1 << firstpg) - 1);
3243                         /* check if there are dirty pages outside the write range */
3244                         if (bp->nb_dirty & ~pagemask) {
3245                                 FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
3246                                 /* write/commit buffer "synchronously" */
3247                                 /* (NB_STABLE indicates that data writes should be FILESYNC) */
3248                                 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
3249                                 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
3250                                 error = nfs_buf_write(bp);
3251                                 if (error) {
3252                                         goto out;
3253                                 }
3254                                 goto again;
3255                         }
3256                         /* if the first or last pages are already dirty */
3257                         /* make sure that the dirty range encompasses those pages */
3258                         if (NBPGDIRTY(bp, firstpg) || NBPGDIRTY(bp, lastpg)) {
3259                                 FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
3260                                 bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
3261                                 if (NBPGDIRTY(bp, lastpg)) {
3262                                         bp->nb_dirtyend = (lastpg + 1) * PAGE_SIZE;
3263                                         /* clip to EOF */
3264                                         if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
3265                                                 bp->nb_dirtyend = np->n_size - NBOFF(bp);
3266                                                 if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
3267                                                         bp->nb_dirtyoff = bp->nb_dirtyend = 0;
3268                                                 }
3269                                         }
3270                                 } else {
3271                                         bp->nb_dirtyend = on + n;
3272                                 }
3273                         }
3274                 }
3275
3276                 /*
3277                  * Are we extending the size of the file with this write?
3278                  * If so, update file size now that we have the block.
3279                  * If there was a partial buf at the old eof, validate
3280                  * and zero the new bytes.
3281                  */
3282                 if ((uio_offset(uio) + n) > (off_t)np->n_size) {
3283                         daddr64_t eofbn = np->n_size / biosize;
3284                         int neweofoff = (uio_offset(uio) + n) % biosize;
3285
3286                         FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
3287
3288                         /* if we're extending within the same last block */
3289                         /* and the block is flagged as being cached... */
3290                         if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
3291                                 /* ...check that all pages in buffer are valid */
3292                                 int endpg = ((neweofoff ? neweofoff : biosize) - 1) / PAGE_SIZE;
3293                                 u_int32_t pagemask;
3294                                 /* pagemask only has to extend to last page being written to */
3295                                 pagemask = (1 << (endpg + 1)) - 1;
3296                                 FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
3297                                 if ((bp->nb_valid & pagemask) != pagemask) {
3298                                         /* zerofill any hole */
3299                                         if (on > bp->nb_validend) {
3300                                                 int i;
3301                                                 for (i = bp->nb_validend / PAGE_SIZE; i <= (on - 1) / PAGE_SIZE; i++) {
3302                                                         NBPGVALID_SET(bp, i);
3303                                                 }
3304                                                 NFS_BUF_MAP(bp);
3305                                                 FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
3306                                                 bzero((char *)bp->nb_data + bp->nb_validend,
3307                                                     on - bp->nb_validend);
3308                                         }
3309                                         /* zerofill any trailing data in the last page */
3310                                         if (neweofoff) {
3311                                                 NFS_BUF_MAP(bp);
3312                                                 FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
3313                                                 bzero((char *)bp->nb_data + neweofoff,
3314                                                     PAGE_SIZE - (neweofoff & PAGE_MASK));
3315                                         }
3316                                 }
3317                         }
3318                         np->n_size = uio_offset(uio) + n;
3319                         nfs_node_lock_force(np);
3320                         CLR(np->n_flag, NUPDATESIZE);
3321                         np->n_flag |= NMODIFIED;
3322                         nfs_node_unlock(np);
3323                         FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
3324                         ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
3325                 }
3326                 /*
3327                  * If dirtyend exceeds file size, chop it down.  This should
3328                  * not occur unless there is a race.
3329                  */
3330                 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
3331                         bp->nb_dirtyend = np->n_size - NBOFF(bp);
3332                         if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
3333                                 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
3334                         }
3335                 }
3336                 /*
3337                  * UBC doesn't handle partial pages, so we need to make sure
3338                  * that any pages left in the page cache are completely valid.
3339                  *
3340                  * Writes that are smaller than a block are delayed if they
3341                  * don't extend to the end of the block.
3342                  *
3343                  * If the block isn't (completely) cached, we may need to read
3344                  * in some parts of pages that aren't covered by the write.
3345                  * If the write offset (on) isn't page aligned, we'll need to
3346                  * read the start of the first page being written to.  Likewise,
3347                  * if the offset of the end of the write (on+n) isn't page aligned,
3348                  * we'll need to read the end of the last page being written to.
3349                  *
3350                  * Notes:
3351                  * We don't want to read anything we're just going to write over.
3352                  * We don't want to read anything we're just going drop when the
3353                  *   I/O is complete (i.e. don't do reads for NOCACHE requests).
3354                  * We don't want to issue multiple I/Os if we don't have to
3355                  *   (because they're synchronous rpcs).
3356                  * We don't want to read anything we already have modified in the
3357                  *   page cache.
3358                  */
3359                 if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
3360                         int firstpg, lastpg, dirtypg;
3361                         int firstpgoff, lastpgoff;
3362                         start = end = -1;
3363                         firstpg = on / PAGE_SIZE;
3364                         firstpgoff = on & PAGE_MASK;
3365                         lastpg = (on + n - 1) / PAGE_SIZE;
3366                         lastpgoff = (on + n) & PAGE_MASK;
3367                         if (firstpgoff && !NBPGVALID(bp, firstpg)) {
3368                                 /* need to read start of first page */
3369                                 start = firstpg * PAGE_SIZE;
3370                                 end = start + firstpgoff;
3371                         }
3372                         if (lastpgoff && !NBPGVALID(bp, lastpg)) {
3373                                 /* need to read end of last page */
3374                                 if (start < 0) {
3375                                         start = (lastpg * PAGE_SIZE) + lastpgoff;
3376                                 }
3377                                 end = (lastpg + 1) * PAGE_SIZE;
3378                         }
3379                         if (ISSET(bp->nb_flags, NB_NOCACHE)) {
3380                                 /*
3381                                  * For nocache writes, if there is any partial page at the
3382                                  * start or end of the write range, then we do the write
3383                                  * synchronously to make sure that we can drop the data
3384                                  * from the cache as soon as the WRITE finishes.  Normally,
3385                                  * we would do an unstable write and not drop the data until
3386                                  * it was committed.  But doing that here would risk allowing
3387                                  * invalid data to be read from the cache between the WRITE
3388                                  * and the COMMIT.
3389                                  * (NB_STABLE indicates that data writes should be FILESYNC)
3390                                  */
3391                                 if (end > start) {
3392                                         SET(bp->nb_flags, NB_STABLE);
3393                                 }
3394                                 goto skipread;
3395                         }
3396                         if (end > start) {
3397                                 /* need to read the data in range: start...end-1 */
3398
3399                                 /* first, check for dirty pages in between */
3400                                 /* if there are, we'll have to do two reads because */
3401                                 /* we don't want to overwrite the dirty pages. */
3402                                 for (dirtypg = start / PAGE_SIZE; dirtypg <= (end - 1) / PAGE_SIZE; dirtypg++) {
3403                                         if (NBPGDIRTY(bp, dirtypg)) {
3404                                                 break;
3405                                         }
3406                                 }
3407
3408                                 /* if start is at beginning of page, try */
3409                                 /* to get any preceeding pages as well. */
3410                                 if (!(start & PAGE_MASK)) {
3411                                         /* stop at next dirty/valid page or start of block */
3412                                         for (; start > 0; start -= PAGE_SIZE) {
3413                                                 if (NBPGVALID(bp, ((start - 1) / PAGE_SIZE))) {
3414                                                         break;
3415                                                 }
3416                                         }
3417                                 }
3418
3419                                 NFS_BUF_MAP(bp);
3420                                 /* setup uio for read(s) */
3421                                 boff = NBOFF(bp);
3422                                 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
3423                                     &auio_buf, sizeof(auio_buf));
3424
3425                                 if (dirtypg <= (end - 1) / PAGE_SIZE) {
3426                                         /* there's a dirty page in the way, so just do two reads */
3427                                         /* we'll read the preceding data here */
3428                                         uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
3429                                         uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
3430                                         error = nfs_read_rpc(np, auio, ctx);
3431                                         if (error) {
3432                                                 /* couldn't read the data, so treat buffer as synchronous NOCACHE */
3433                                                 SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
3434                                                 goto skipread;
3435                                         }
3436                                         if (uio_resid(auio) > 0) {
3437                                                 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
3438                                                 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
3439                                         }
3440                                         if (!error) {
3441                                                 /* update validoff/validend if necessary */
3442                                                 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
3443                                                         bp->nb_validoff = start;
3444                                                 }
3445                                                 if ((bp->nb_validend < 0) || (bp->nb_validend < on)) {
3446                                                         bp->nb_validend = on;
3447                                                 }
3448                                                 if ((off_t)np->n_size > boff + bp->nb_validend) {
3449                                                         bp->nb_validend = min(np->n_size - (boff + start), biosize);
3450                                                 }
3451                                                 /* validate any pages before the write offset */
3452                                                 for (; start < on / PAGE_SIZE; start += PAGE_SIZE) {
3453                                                         NBPGVALID_SET(bp, start / PAGE_SIZE);
3454                                                 }
3455                                         }
3456                                         /* adjust start to read any trailing data */
3457                                         start = on + n;
3458                                 }
3459
3460                                 /* if end is at end of page, try to */
3461                                 /* get any following pages as well. */
3462                                 if (!(end & PAGE_MASK)) {
3463                                         /* stop at next valid page or end of block */
3464                                         for (; end < biosize; end += PAGE_SIZE) {
3465                                                 if (NBPGVALID(bp, end / PAGE_SIZE)) {
3466                                                         break;
3467                                                 }
3468                                         }
3469                                 }
3470
3471                                 if (((boff + start) >= (off_t)np->n_size) ||
3472                                     ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
3473                                         /*
3474                                          * Either this entire read is beyond the current EOF
3475                                          * or the range that we won't be modifying (on+n...end)
3476                                          * is all beyond the current EOF.
3477                                          * No need to make a trip across the network to
3478                                          * read nothing.  So, just zero the buffer instead.
3479                                          */
3480                                         FSDBG(516, bp, start, end - start, 0xd00dee00);
3481                                         bzero(bp->nb_data + start, end - start);
3482                                         error = 0;
3483                                 } else {
3484                                         /* now we'll read the (rest of the) data */
3485                                         uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
3486                                         uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
3487                                         error = nfs_read_rpc(np, auio, ctx);
3488                                         if (error) {
3489                                                 /* couldn't read the data, so treat buffer as synchronous NOCACHE */
3490                                                 SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
3491                                                 goto skipread;
3492                                         }
3493                                         if (uio_resid(auio) > 0) {
3494                                                 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
3495                                                 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
3496                                         }
3497                                 }
3498                                 if (!error) {
3499                                         /* update validoff/validend if necessary */
3500                                         if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
3501                                                 bp->nb_validoff = start;
3502                                         }
3503                                         if ((bp->nb_validend < 0) || (bp->nb_validend < end)) {
3504                                                 bp->nb_validend = end;
3505                                         }
3506                                         if ((off_t)np->n_size > boff + bp->nb_validend) {
3507                                                 bp->nb_validend = min(np->n_size - (boff + start), biosize);
3508                                         }
3509                                         /* validate any pages before the write offset's page */
3510                                         for (; start < (off_t)trunc_page_32(on); start += PAGE_SIZE) {
3511                                                 NBPGVALID_SET(bp, start / PAGE_SIZE);
3512                                         }
3513                                         /* validate any pages after the range of pages being written to */
3514                                         for (; (end - 1) > (off_t)round_page_32(on + n - 1); end -= PAGE_SIZE) {
3515                                                 NBPGVALID_SET(bp, (end - 1) / PAGE_SIZE);
3516                                         }
3517                                 }
3518                                 /* Note: pages being written to will be validated when written */
3519                         }
3520                 }
3521 skipread:
3522
3523                 if (ISSET(bp->nb_flags, NB_ERROR)) {
3524                         error = bp->nb_error;
3525                         nfs_buf_release(bp, 1);
3526                         goto out;
3527                 }
3528
3529                 nfs_node_lock_force(np);
3530                 np->n_flag |= NMODIFIED;
3531                 nfs_node_unlock(np);
3532
3533                 NFS_BUF_MAP(bp);
3534                 error = uiomove((char *)bp->nb_data + on, n, uio);
3535                 if (error) {
3536                         SET(bp->nb_flags, NB_ERROR);
3537                         nfs_buf_release(bp, 1);
3538                         goto out;
3539                 }
3540
3541                 /* validate any pages written to */
3542                 start = on & ~PAGE_MASK;
3543                 for (; start < on + n; start += PAGE_SIZE) {
3544                         NBPGVALID_SET(bp, start / PAGE_SIZE);
3545                         /*
3546                          * This may seem a little weird, but we don't actually set the
3547                          * dirty bits for writes.  This is because we keep the dirty range
3548                          * in the nb_dirtyoff/nb_dirtyend fields.  Also, particularly for
3549                          * delayed writes, when we give the pages back to the VM we don't
3550                          * want to keep them marked dirty, because when we later write the
3551                          * buffer we won't be able to tell which pages were written dirty
3552                          * and which pages were mmapped and dirtied.
3553                          */
3554                 }
3555                 if (bp->nb_dirtyend > 0) {
3556                         bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
3557                         bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
3558                 } else {
3559                         bp->nb_dirtyoff = on;
3560                         bp->nb_dirtyend = on + n;
3561                 }
3562                 if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
3563                     bp->nb_validoff > bp->nb_dirtyend) {
3564                         bp->nb_validoff = bp->nb_dirtyoff;
3565                         bp->nb_validend = bp->nb_dirtyend;
3566                 } else {
3567                         bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
3568                         bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
3569                 }
3570                 if (!ISSET(bp->nb_flags, NB_CACHE)) {
3571                         nfs_buf_normalize_valid_range(np, bp);
3572                 }
3573
3574                 /*
3575                  * Since this block is being modified, it must be written
3576                  * again and not just committed.
3577                  */
3578                 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
3579                         nfs_node_lock_force(np);
3580                         if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
3581                                 np->n_needcommitcnt--;
3582                                 CHECK_NEEDCOMMITCNT(np);
3583                         }
3584                         CLR(bp->nb_flags, NB_NEEDCOMMIT);
3585                         nfs_node_unlock(np);
3586                 }
3587
3588                 if (ioflag & IO_SYNC) {
3589                         error = nfs_buf_write(bp);
3590                         if (error) {
3591                                 goto out;
3592                         }
3593                 } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
3594                     (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
3595                         SET(bp->nb_flags, NB_ASYNC);
3596                         error = nfs_buf_write(bp);
3597                         if (error) {
3598                                 goto out;
3599                         }
3600                 } else {
3601                         /* If the block wasn't already delayed: charge for the write */
3602                         if (!ISSET(bp->nb_flags, NB_DELWRI)) {
3603                                 proc_t p = vfs_context_proc(ctx);
3604                                 if (p && p->p_stats) {
3605                                         OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
3606                                 }
3607                         }
3608                         nfs_buf_write_delayed(bp);
3609                 }
3610
3611
3612                 if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS) {
3613                         nfs_flushcommits(np, 1);
3614                 }
3615         } while (uio_resid(uio) > 0 && n > 0);
3616
3617 out:
3618         nfs_node_lock_force(np);
3619         np->n_wrbusy--;
3620         nfs_node_unlock(np);
3621         nfs_data_unlock(np);
3622         FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
3623         return error;
3624 }
3625
3626
3627 /*
3628  * NFS write call
3629  */
3630 int
3631 nfs_write_rpc(
3632         nfsnode_t np,
3633         uio_t uio,
3634         vfs_context_t ctx,
3635         int *iomodep,
3636         uint64_t *wverfp)
3637 {
3638         return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
3639 }
3640
3641 int
3642 nfs_write_rpc2(
3643         nfsnode_t np,
3644         uio_t uio,
3645         thread_t thd,
3646         kauth_cred_t cred,
3647         int *iomodep,
3648         uint64_t *wverfp)
3649 {
3650         struct nfsmount *nmp;
3651         int error = 0, nfsvers;
3652         int wverfset, commit, committed;
3653         uint64_t wverf = 0, wverf2;
3654         size_t nmwsize, totalsize, tsiz, len, rlen;
3655         struct nfsreq rq, *req = &rq;
3656 #if CONFIG_NFS4
3657         uint32_t stategenid = 0, restart = 0;
3658 #endif
3659         uint32_t vrestart = 0;
3660         uio_t uio_save = NULL;
3661
3662 #if DIAGNOSTIC
3663         /* XXX limitation based on need to back up uio on short write */
3664         if (uio_iovcnt(uio) != 1) {
3665                 panic("nfs3_write_rpc: iovcnt > 1");
3666         }
3667 #endif
3668         FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
3669         nmp = NFSTONMP(np);
3670         if (nfs_mount_gone(nmp)) {
3671                 return ENXIO;
3672         }
3673         nfsvers = nmp->nm_vers;
3674         nmwsize = nmp->nm_wsize;
3675
3676         wverfset = 0;
3677         committed = NFS_WRITE_FILESYNC;
3678
3679         totalsize = tsiz = uio_resid(uio);
3680         if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
3681                 FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
3682                 return EFBIG;
3683         }
3684
3685         uio_save = uio_duplicate(uio);
3686         if (uio_save == NULL) {
3687                 return EIO;
3688         }
3689
3690         while (tsiz > 0) {
3691                 len = (tsiz > nmwsize) ? nmwsize : tsiz;
3692                 FSDBG(537, np, uio_offset(uio), len, 0);
3693                 if (np->n_flag & NREVOKE) {
3694                         error = EIO;
3695                         break;
3696                 }
3697 #if CONFIG_NFS4
3698                 if (nmp->nm_vers >= NFS_VER4) {
3699                         stategenid = nmp->nm_stategenid;
3700                 }
3701 #endif
3702                 error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
3703                 if (!error) {
3704                         error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
3705                 }
3706                 nmp = NFSTONMP(np);
3707                 if (nfs_mount_gone(nmp)) {
3708                         error = ENXIO;
3709                 }
3710 #if CONFIG_NFS4
3711                 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
3712                     (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
3713                         lck_mtx_lock(&nmp->nm_lock);
3714                         if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
3715                                 NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
3716                                 nfs_need_recover(nmp, error);
3717                         }
3718                         lck_mtx_unlock(&nmp->nm_lock);
3719                         if (np->n_flag & NREVOKE) {
3720                                 error = EIO;
3721                         } else {
3722                                 if (error == NFSERR_GRACE) {
3723                                         tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
3724                                 }
3725                                 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
3726                                         continue;
3727                                 }
3728                         }
3729                 }
3730 #endif
3731                 if (error) {
3732                         break;
3733                 }
3734                 if (nfsvers == NFS_VER2) {
3735                         tsiz -= len;
3736                         continue;
3737                 }
3738
3739                 /* check for a short write */
3740                 if (rlen < len) {
3741                         /* Reset the uio to reflect the actual transfer */
3742                         *uio = *uio_save;
3743                         uio_update(uio, totalsize - (tsiz - rlen));
3744                         len = rlen;
3745                 }
3746
3747                 /* return lowest commit level returned */
3748                 if (commit < committed) {
3749                         committed = commit;
3750                 }
3751
3752                 tsiz -= len;
3753
3754                 /* check write verifier */
3755                 if (!wverfset) {
3756                         wverf = wverf2;
3757                         wverfset = 1;
3758                 } else if (wverf != wverf2) {
3759                         /* verifier changed, so we need to restart all the writes */
3760                         if (++vrestart > 100) {
3761                                 /* give up after too many restarts */
3762                                 error = EIO;
3763                                 break;
3764                         }
3765                         *uio = *uio_save;       // Reset the uio back to the start
3766                         committed = NFS_WRITE_FILESYNC;
3767                         wverfset = 0;
3768                         tsiz = totalsize;
3769                 }
3770         }
3771         if (uio_save) {
3772                 uio_free(uio_save);
3773         }
3774         if (wverfset && wverfp) {
3775                 *wverfp = wverf;
3776         }
3777         *iomodep = committed;
3778         if (error) {
3779                 uio_setresid(uio, tsiz);
3780         }
3781         FSDBG_BOT(537, np, committed, uio_resid(uio), error);
3782         return error;
3783 }
3784
3785 int
3786 nfs3_write_rpc_async(
3787         nfsnode_t np,
3788         uio_t uio,
3789         size_t len,
3790         thread_t thd,
3791         kauth_cred_t cred,
3792         int iomode,
3793         struct nfsreq_cbinfo *cb,
3794         struct nfsreq **reqp)
3795 {
3796         struct nfsmount *nmp;
3797         mount_t mp;
3798         int error = 0, nfsvers;
3799         struct nfsm_chain nmreq;
3800
3801         nmp = NFSTONMP(np);
3802         if (nfs_mount_gone(nmp)) {
3803                 return ENXIO;
3804         }
3805         nfsvers = nmp->nm_vers;
3806
3807         /* for async mounts, don't bother sending sync write requests */
3808         if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
3809             ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) {
3810                 iomode = NFS_WRITE_UNSTABLE;
3811         }
3812
3813         nfsm_chain_null(&nmreq);
3814         nfsm_chain_build_alloc_init(error, &nmreq,
3815             NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
3816         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
3817         if (nfsvers == NFS_VER3) {
3818                 nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
3819                 nfsm_chain_add_32(error, &nmreq, len);
3820                 nfsm_chain_add_32(error, &nmreq, iomode);
3821         } else {
3822                 nfsm_chain_add_32(error, &nmreq, 0);
3823                 nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
3824                 nfsm_chain_add_32(error, &nmreq, 0);
3825         }
3826         nfsm_chain_add_32(error, &nmreq, len);
3827         nfsmout_if(error);
3828         error = nfsm_chain_add_uio(&nmreq, uio, len);
3829         nfsm_chain_build_done(error, &nmreq);
3830         nfsmout_if(error);
3831         error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, NULL, 0, cb, reqp);
3832 nfsmout:
3833         nfsm_chain_cleanup(&nmreq);
3834         return error;
3835 }
3836
3837 int
3838 nfs3_write_rpc_async_finish(
3839         nfsnode_t np,
3840         struct nfsreq *req,
3841         int *iomodep,
3842         size_t *rlenp,
3843         uint64_t *wverfp)
3844 {
3845         struct nfsmount *nmp;
3846         int error = 0, lockerror = ENOENT, nfsvers, status;
3847         int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
3848         u_int64_t xid, wverf;
3849         mount_t mp;
3850         struct nfsm_chain nmrep;
3851
3852         nmp = NFSTONMP(np);
3853         if (nfs_mount_gone(nmp)) {
3854                 nfs_request_async_cancel(req);
3855                 return ENXIO;
3856         }
3857         nfsvers = nmp->nm_vers;
3858
3859         nfsm_chain_null(&nmrep);
3860
3861         error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3862         if (error == EINPROGRESS) { /* async request restarted */
3863                 return error;
3864         }
3865         nmp = NFSTONMP(np);
3866         if (nfs_mount_gone(nmp)) {
3867                 error = ENXIO;
3868         }
3869         if (!error && (lockerror = nfs_node_lock(np))) {
3870                 error = lockerror;
3871         }
3872         if (nfsvers == NFS_VER3) {
3873                 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
3874                 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
3875                 if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
3876                         updatemtime = 1;
3877                 }
3878                 if (!error) {
3879                         error = status;
3880                 }
3881                 nfsm_chain_get_32(error, &nmrep, rlen);
3882                 nfsmout_if(error);
3883                 *rlenp = rlen;
3884                 if (rlen <= 0) {
3885                         error = NFSERR_IO;
3886                 }
3887                 nfsm_chain_get_32(error, &nmrep, committed);
3888                 nfsm_chain_get_64(error, &nmrep, wverf);
3889                 nfsmout_if(error);
3890                 if (wverfp) {
3891                         *wverfp = wverf;
3892                 }
3893                 lck_mtx_lock(&nmp->nm_lock);
3894                 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
3895                         nmp->nm_verf = wverf;
3896                         nmp->nm_state |= NFSSTA_HASWRITEVERF;
3897                 } else if (nmp->nm_verf != wverf) {
3898                         nmp->nm_verf = wverf;
3899                 }
3900                 lck_mtx_unlock(&nmp->nm_lock);
3901         } else {
3902                 if (!error) {
3903                         error = status;
3904                 }
3905                 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
3906                 nfsmout_if(error);
3907         }
3908         if (updatemtime) {
3909                 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
3910         }
3911 nfsmout:
3912         if (!lockerror) {
3913                 nfs_node_unlock(np);
3914         }
3915         nfsm_chain_cleanup(&nmrep);
3916         if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
3917             ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) {
3918                 committed = NFS_WRITE_FILESYNC;
3919         }
3920         *iomodep = committed;
3921         return error;
3922 }
3923
3924 /*
3925  * NFS mknod vnode op
3926  *
3927  * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
3928  * mode set to specify the file type and the size field for rdev.
3929  */
3930 int
3931 nfs3_vnop_mknod(
3932         struct vnop_mknod_args /* {
3933                                 *  struct vnodeop_desc *a_desc;
3934                                 *  vnode_t a_dvp;
3935                                 *  vnode_t *a_vpp;
3936                                 *  struct componentname *a_cnp;
3937                                 *  struct vnode_attr *a_vap;
3938                                 *  vfs_context_t a_context;
3939                                 *  } */*ap)
3940 {
3941         vnode_t dvp = ap->a_dvp;
3942         vnode_t *vpp = ap->a_vpp;
3943         struct componentname *cnp = ap->a_cnp;
3944         struct vnode_attr *vap = ap->a_vap;
3945         vfs_context_t ctx = ap->a_context;
3946         vnode_t newvp = NULL;
3947         nfsnode_t np = NULL;
3948         struct nfsmount *nmp;
3949         nfsnode_t dnp = VTONFS(dvp);
3950         struct nfs_vattr nvattr;
3951         fhandle_t fh;
3952         int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
3953         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
3954         u_int32_t rdev;
3955         u_int64_t xid = 0, dxid;
3956         int nfsvers, gotuid, gotgid;
3957         struct nfsm_chain nmreq, nmrep;
3958         struct nfsreq rq, *req = &rq;
3959
3960         nmp = VTONMP(dvp);
3961         if (nfs_mount_gone(nmp)) {
3962                 return ENXIO;
3963         }
3964         nfsvers = nmp->nm_vers;
3965
3966         if (!VATTR_IS_ACTIVE(vap, va_type)) {
3967                 return EINVAL;
3968         }
3969         if (vap->va_type == VCHR || vap->va_type == VBLK) {
3970                 if (!VATTR_IS_ACTIVE(vap, va_rdev)) {
3971                         return EINVAL;
3972                 }
3973                 rdev = vap->va_rdev;
3974         } else if (vap->va_type == VFIFO || vap->va_type == VSOCK) {
3975                 rdev = 0xffffffff;
3976         } else {
3977                 return ENOTSUP;
3978         }
3979         if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
3980                 return ENAMETOOLONG;
3981         }
3982
3983         nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
3984
3985         VATTR_SET_SUPPORTED(vap, va_mode);
3986         VATTR_SET_SUPPORTED(vap, va_uid);
3987         VATTR_SET_SUPPORTED(vap, va_gid);
3988         VATTR_SET_SUPPORTED(vap, va_data_size);
3989         VATTR_SET_SUPPORTED(vap, va_access_time);
3990         VATTR_SET_SUPPORTED(vap, va_modify_time);
3991         gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3992         gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3993
3994         nfsm_chain_null(&nmreq);
3995         nfsm_chain_null(&nmrep);
3996
3997         nfsm_chain_build_alloc_init(error, &nmreq,
3998             NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
3999             nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
4000         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4001         nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4002         if (nfsvers == NFS_VER3) {
4003                 nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
4004                 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
4005                 if (vap->va_type == VCHR || vap->va_type == VBLK) {
4006                         nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
4007                         nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
4008                 }
4009         } else {
4010                 nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
4011         }
4012         nfsm_chain_build_done(error, &nmreq);
4013         if (!error) {
4014                 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4015         }
4016         nfsmout_if(error);
4017
4018         error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKNOD,
4019             vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4020         if (!error) {
4021                 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4022         }
4023
4024         if ((lockerror = nfs_node_lock(dnp))) {
4025                 error = lockerror;
4026         }
4027         /* XXX no EEXIST kludge here? */
4028         dxid = xid;
4029         if (!error && !status) {
4030                 if (dnp->n_flag & NNEGNCENTRIES) {
4031                         dnp->n_flag &= ~NNEGNCENTRIES;
4032                         cache_purge_negatives(dvp);
4033                 }
4034                 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
4035         }
4036         if (nfsvers == NFS_VER3) {
4037                 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
4038         }
4039         if (!error) {
4040                 error = status;
4041         }
4042 nfsmout:
4043         nfsm_chain_cleanup(&nmreq);
4044         nfsm_chain_cleanup(&nmrep);
4045
4046         if (!lockerror) {
4047                 dnp->n_flag |= NMODIFIED;
4048                 /* if directory hadn't changed, update namecache mtime */
4049                 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
4050                         NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4051                 }
4052                 nfs_node_unlock(dnp);
4053                 /* nfs_getattr() will check changed and purge caches */
4054                 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4055         }
4056
4057         if (!error && fh.fh_len) {
4058                 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
4059         }
4060         if (!error && !np) {
4061                 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
4062         }
4063         if (!error && np) {
4064                 newvp = NFSTOV(np);
4065         }
4066         if (!busyerror) {
4067                 nfs_node_clear_busy(dnp);
4068         }
4069
4070         if (!error && (gotuid || gotgid) &&
4071             (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
4072             (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
4073             (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
4074                 /* clear ID bits if server didn't use them (or we can't tell) */
4075                 VATTR_CLEAR_SUPPORTED(vap, va_uid);
4076                 VATTR_CLEAR_SUPPORTED(vap, va_gid);
4077         }
4078         if (error) {
4079                 if (newvp) {
4080                         nfs_node_unlock(np);
4081                         vnode_put(newvp);
4082                 }
4083         } else {
4084                 *vpp = newvp;
4085                 nfs_node_unlock(np);
4086         }
4087         return error;
4088 }
4089
4090 static uint32_t create_verf;
4091 /*
4092  * NFS file create call
4093  */
4094 int
4095 nfs3_vnop_create(
4096         struct vnop_create_args /* {
4097                                  *  struct vnodeop_desc *a_desc;
4098                                  *  vnode_t a_dvp;
4099                                  *  vnode_t *a_vpp;
4100                                  *  struct componentname *a_cnp;
4101                                  *  struct vnode_attr *a_vap;
4102                                  *  vfs_context_t a_context;
4103                                  *  } */*ap)
4104 {
4105         vfs_context_t ctx = ap->a_context;
4106         vnode_t dvp = ap->a_dvp;
4107         struct vnode_attr *vap = ap->a_vap;
4108         struct componentname *cnp = ap->a_cnp;
4109         struct nfs_vattr nvattr;
4110         fhandle_t fh;
4111         nfsnode_t np = NULL;
4112         struct nfsmount *nmp;
4113         nfsnode_t dnp = VTONFS(dvp);
4114         vnode_t newvp = NULL;
4115         int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0, fmode = 0;
4116         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4117         int nfsvers, gotuid, gotgid;
4118         u_int64_t xid, dxid;
4119         uint32_t val;
4120         struct nfsm_chain nmreq, nmrep;
4121         struct nfsreq rq, *req = &rq;
4122         struct nfs_dulookup dul;
4123         int dul_in_progress = 0;
4124         int namedattrs;
4125
4126         nmp = VTONMP(dvp);
4127         if (nfs_mount_gone(nmp)) {
4128                 return ENXIO;
4129         }
4130         nfsvers = nmp->nm_vers;
4131         namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4132
4133         if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
4134                 return ENAMETOOLONG;
4135         }
4136
4137         nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4138
4139         VATTR_SET_SUPPORTED(vap, va_mode);
4140         VATTR_SET_SUPPORTED(vap, va_uid);
4141         VATTR_SET_SUPPORTED(vap, va_gid);
4142         VATTR_SET_SUPPORTED(vap, va_data_size);
4143         VATTR_SET_SUPPORTED(vap, va_access_time);
4144         VATTR_SET_SUPPORTED(vap, va_modify_time);
4145         gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4146         gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4147
4148         if ((vap->va_vaflags & VA_EXCLUSIVE)
4149             ) {
4150                 fmode |= O_EXCL;
4151                 if (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time)) {
4152                         vap->va_vaflags |= VA_UTIMES_NULL;
4153                 }
4154         }
4155
4156 again:
4157         error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4158         if (!namedattrs) {
4159                 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4160         }
4161
4162         nfsm_chain_null(&nmreq);
4163         nfsm_chain_null(&nmrep);
4164
4165         nfsm_chain_build_alloc_init(error, &nmreq,
4166             NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
4167             nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
4168         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4169         nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4170         if (nfsvers == NFS_VER3) {
4171                 if (fmode & O_EXCL) {
4172                         nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
4173                         lck_rw_lock_shared(in_ifaddr_rwlock);
4174                         if (!TAILQ_EMPTY(&in_ifaddrhead)) {
4175                                 val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
4176                         } else {
4177                                 val = create_verf;
4178                         }
4179                         lck_rw_done(in_ifaddr_rwlock);
4180                         nfsm_chain_add_32(error, &nmreq, val);
4181                         ++create_verf;
4182                         nfsm_chain_add_32(error, &nmreq, create_verf);
4183                 } else {
4184                         nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
4185                         nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
4186                 }
4187         } else {
4188                 nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
4189         }
4190         nfsm_chain_build_done(error, &nmreq);
4191         nfsmout_if(error);
4192
4193         error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
4194             vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4195         if (!error) {
4196                 if (!namedattrs) {
4197                         nfs_dulookup_start(&dul, dnp, ctx);
4198                         dul_in_progress = 1;
4199                 }
4200                 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4201         }
4202
4203         if ((lockerror = nfs_node_lock(dnp))) {
4204                 error = lockerror;
4205         }
4206         dxid = xid;
4207         if (!error && !status) {
4208                 if (dnp->n_flag & NNEGNCENTRIES) {
4209                         dnp->n_flag &= ~NNEGNCENTRIES;
4210                         cache_purge_negatives(dvp);
4211                 }
4212                 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
4213         }
4214         if (nfsvers == NFS_VER3) {
4215                 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
4216         }
4217         if (!error) {
4218                 error = status;
4219         }
4220 nfsmout:
4221         nfsm_chain_cleanup(&nmreq);
4222         nfsm_chain_cleanup(&nmrep);
4223
4224         if (!lockerror) {
4225                 dnp->n_flag |= NMODIFIED;
4226                 /* if directory hadn't changed, update namecache mtime */
4227                 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
4228                         NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4229                 }
4230                 nfs_node_unlock(dnp);
4231                 /* nfs_getattr() will check changed and purge caches */
4232                 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4233         }
4234
4235         if (!error && fh.fh_len) {
4236                 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
4237         }
4238         if (!error && !np) {
4239                 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
4240         }
4241         if (!error && np) {
4242                 newvp = NFSTOV(np);
4243         }
4244
4245         if (dul_in_progress) {
4246                 nfs_dulookup_finish(&dul, dnp, ctx);
4247         }
4248         if (!busyerror) {
4249                 nfs_node_clear_busy(dnp);
4250         }
4251
4252         if (error) {
4253                 if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
4254                         fmode &= ~O_EXCL;
4255                         goto again;
4256                 }
4257                 if (newvp) {
4258                         nfs_node_unlock(np);
4259                         vnode_put(newvp);
4260                 }
4261         } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
4262                 nfs_node_unlock(np);
4263                 error = nfs3_setattr_rpc(np, vap, ctx);
4264                 if (error && (gotuid || gotgid)) {
4265                         /* it's possible the server didn't like our attempt to set IDs. */
4266                         /* so, let's try it again without those */
4267                         VATTR_CLEAR_ACTIVE(vap, va_uid);
4268                         VATTR_CLEAR_ACTIVE(vap, va_gid);
4269                         error = nfs3_setattr_rpc(np, vap, ctx);
4270                 }
4271                 if (error) {
4272                         vnode_put(newvp);
4273                 } else {
4274                         nfs_node_lock_force(np);
4275                 }
4276         }
4277         if (!error) {
4278                 *ap->a_vpp = newvp;
4279         }
4280         if (!error && (gotuid || gotgid) &&
4281             (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
4282             (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
4283             (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
4284                 /* clear ID bits if server didn't use them (or we can't tell) */
4285                 VATTR_CLEAR_SUPPORTED(vap, va_uid);
4286                 VATTR_CLEAR_SUPPORTED(vap, va_gid);
4287         }
4288         if (!error) {
4289                 nfs_node_unlock(np);
4290         }
4291         return error;
4292 }
4293
4294 /*
4295  * NFS file remove call
4296  * To try and make NFS semantics closer to UFS semantics, a file that has
4297  * other processes using the vnode is renamed instead of removed and then
4298  * removed later on the last close.
4299  * - If vnode_isinuse()
4300  *        If a rename is not already in the works
4301  *           call nfs_sillyrename() to set it up
4302  *     else
4303  *        do the remove RPC
4304  */
4305 int
4306 nfs_vnop_remove(
4307         struct vnop_remove_args /* {
4308                                  *  struct vnodeop_desc *a_desc;
4309                                  *  vnode_t a_dvp;
4310                                  *  vnode_t a_vp;
4311                                  *  struct componentname *a_cnp;
4312                                  *  int a_flags;
4313                                  *  vfs_context_t a_context;
4314                                  *  } */*ap)
4315 {
4316         vfs_context_t ctx = ap->a_context;
4317         vnode_t vp = ap->a_vp;
4318         vnode_t dvp = ap->a_dvp;
4319         struct componentname *cnp = ap->a_cnp;
4320         nfsnode_t dnp = VTONFS(dvp);
4321         nfsnode_t np = VTONFS(vp);
4322         int error = 0, nfsvers, namedattrs, inuse, gotattr = 0, flushed = 0, setsize = 0;
4323         struct nfs_vattr nvattr;
4324         struct nfsmount *nmp;
4325         struct nfs_dulookup dul;
4326
4327         /* XXX prevent removing a sillyrenamed file? */
4328
4329         nmp = NFSTONMP(dnp);
4330         if (nfs_mount_gone(nmp)) {
4331                 return ENXIO;
4332         }
4333         nfsvers = nmp->nm_vers;
4334         namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4335
4336 again_relock:
4337         error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
4338         if (error) {
4339                 return error;
4340         }
4341
4342         /* lock the node while we remove the file */
4343         lck_mtx_lock(nfs_node_hash_mutex);
4344         while (np->n_hflag & NHLOCKED) {
4345                 np->n_hflag |= NHLOCKWANT;
4346                 msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
4347         }
4348         np->n_hflag |= NHLOCKED;
4349         lck_mtx_unlock(nfs_node_hash_mutex);
4350
4351         if (!namedattrs) {
4352                 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4353         }
4354 again:
4355         inuse = vnode_isinuse(vp, 0);
4356         if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
4357                 /* Caller requested Carbon delete semantics, but file is busy */
4358                 error = EBUSY;
4359                 goto out;
4360         }
4361         if (inuse && !gotattr) {
4362                 if (nfs_getattr(np, &nvattr, ctx, NGA_CACHED)) {
4363                         nvattr.nva_nlink = 1;
4364                 }
4365                 gotattr = 1;
4366                 goto again;
4367         }
4368         if (!inuse || (np->n_sillyrename && (nvattr.nva_nlink > 1))) {
4369                 if (!inuse && !flushed) { /* flush all the buffers first */
4370                         /* unlock the node */
4371                         lck_mtx_lock(nfs_node_hash_mutex);
4372                         np->n_hflag &= ~NHLOCKED;
4373                         if (np->n_hflag & NHLOCKWANT) {
4374                                 np->n_hflag &= ~NHLOCKWANT;
4375                                 wakeup(np);
4376                         }
4377                         lck_mtx_unlock(nfs_node_hash_mutex);
4378                         nfs_node_clear_busy2(dnp, np);
4379                         error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
4380                         FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
4381                         flushed = 1;
4382                         if (error == EINTR) {
4383                                 nfs_node_lock_force(np);
4384                                 NATTRINVALIDATE(np);
4385                                 nfs_node_unlock(np);
4386                                 return error;
4387                         }
4388                         if (!namedattrs) {
4389                                 nfs_dulookup_finish(&dul, dnp, ctx);
4390                         }
4391                         goto again_relock;
4392                 }
4393 #if CONFIG_NFS4
4394                 if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
4395                         nfs4_delegation_return(np, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
4396                 }
4397 #endif
4398                 /*
4399                  * Purge the name cache so that the chance of a lookup for
4400                  * the name succeeding while the remove is in progress is
4401                  * minimized.
4402                  */
4403                 nfs_name_cache_purge(dnp, np, cnp, ctx);
4404
4405                 if (!namedattrs) {
4406                         nfs_dulookup_start(&dul, dnp, ctx);
4407                 }
4408
4409                 /* Do the rpc */
4410                 error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
4411                     vfs_context_thread(ctx), vfs_context_ucred(ctx));
4412
4413                 /*
4414                  * Kludge City: If the first reply to the remove rpc is lost..
4415                  *   the reply to the retransmitted request will be ENOENT
4416                  *   since the file was in fact removed
4417                  *   Therefore, we cheat and return success.
4418                  */
4419                 if (error == ENOENT) {
4420                         error = 0;
4421                 }
4422
4423                 if (!error && !inuse && !np->n_sillyrename) {
4424                         /*
4425                          * removal succeeded, it's not in use, and not silly renamed so
4426                          * remove nfsnode from hash now so we can't accidentally find it
4427                          * again if another object gets created with the same filehandle
4428                          * before this vnode gets reclaimed
4429                          */
4430                         lck_mtx_lock(nfs_node_hash_mutex);
4431                         if (np->n_hflag & NHHASHED) {
4432                                 LIST_REMOVE(np, n_hash);
4433                                 np->n_hflag &= ~NHHASHED;
4434                                 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
4435                         }
4436                         lck_mtx_unlock(nfs_node_hash_mutex);
4437                         /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
4438                         /* clear all flags other than these */
4439                         nfs_node_lock_force(np);
4440                         np->n_flag &= (NMODIFIED);
4441                         NATTRINVALIDATE(np);
4442                         nfs_node_unlock(np);
4443                         vnode_recycle(vp);
4444                         setsize = 1;
4445                 } else {
4446                         nfs_node_lock_force(np);
4447                         NATTRINVALIDATE(np);
4448                         nfs_node_unlock(np);
4449                 }
4450         } else if (!np->n_sillyrename) {
4451                 if (!namedattrs) {
4452                         nfs_dulookup_start(&dul, dnp, ctx);
4453                 }
4454                 error = nfs_sillyrename(dnp, np, cnp, ctx);
4455                 nfs_node_lock_force(np);
4456                 NATTRINVALIDATE(np);
4457                 nfs_node_unlock(np);
4458         } else {
4459                 nfs_node_lock_force(np);
4460                 NATTRINVALIDATE(np);
4461                 nfs_node_unlock(np);
4462                 if (!namedattrs) {
4463                         nfs_dulookup_start(&dul, dnp, ctx);
4464                 }
4465         }
4466
4467         /* nfs_getattr() will check changed and purge caches */
4468         nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
4469         if (!namedattrs) {
4470                 nfs_dulookup_finish(&dul, dnp, ctx);
4471         }
4472 out:
4473         /* unlock the node */
4474         lck_mtx_lock(nfs_node_hash_mutex);
4475         np->n_hflag &= ~NHLOCKED;
4476         if (np->n_hflag & NHLOCKWANT) {
4477                 np->n_hflag &= ~NHLOCKWANT;
4478                 wakeup(np);
4479         }
4480         lck_mtx_unlock(nfs_node_hash_mutex);
4481         nfs_node_clear_busy2(dnp, np);
4482         if (setsize) {
4483                 ubc_setsize(vp, 0);
4484         }
4485         return error;
4486 }
4487
4488 /*
4489  * NFS silly-renamed file removal function called from nfs_vnop_inactive
4490  */
4491 int
4492 nfs_removeit(struct nfs_sillyrename *nsp)
4493 {
4494         struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp);
4495         if (nfs_mount_gone(nmp)) {
4496                 return ENXIO;
4497         }
4498         return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred);
4499 }
4500
4501 /*
4502  * NFS remove rpc, called from nfs_remove() and nfs_removeit().
4503  */
4504 int
4505 nfs3_remove_rpc(
4506         nfsnode_t dnp,
4507         char *name,
4508         int namelen,
4509         thread_t thd,
4510         kauth_cred_t cred)
4511 {
4512         int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
4513         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4514         struct nfsmount *nmp;
4515         int nfsvers;
4516         u_int64_t xid;
4517         struct nfsm_chain nmreq, nmrep;
4518
4519         nmp = NFSTONMP(dnp);
4520         if (nfs_mount_gone(nmp)) {
4521                 return ENXIO;
4522         }
4523         nfsvers = nmp->nm_vers;
4524         if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN)) {
4525                 return ENAMETOOLONG;
4526         }
4527
4528         nfsm_chain_null(&nmreq);
4529         nfsm_chain_null(&nmrep);
4530
4531         nfsm_chain_build_alloc_init(error, &nmreq,
4532             NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
4533         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4534         nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
4535         nfsm_chain_build_done(error, &nmreq);
4536         nfsmout_if(error);
4537
4538         error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, NULL, 0, &nmrep, &xid, &status);
4539
4540         if ((lockerror = nfs_node_lock(dnp))) {
4541                 error = lockerror;
4542         }
4543         if (nfsvers == NFS_VER3) {
4544                 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
4545         }
4546         nfsmout_if(error);
4547         dnp->n_flag |= NMODIFIED;
4548         /* if directory hadn't changed, update namecache mtime */
4549         if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
4550                 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4551         }
4552         if (!wccpostattr) {
4553                 NATTRINVALIDATE(dnp);
4554         }
4555         if (!error) {
4556                 error = status;
4557         }
4558 nfsmout:
4559         if (!lockerror) {
4560                 nfs_node_unlock(dnp);
4561         }
4562         nfsm_chain_cleanup(&nmreq);
4563         nfsm_chain_cleanup(&nmrep);
4564         return error;
4565 }
4566
4567 /*
4568  * NFS file rename call
4569  */
4570 int
4571 nfs_vnop_rename(
4572         struct vnop_rename_args  /* {
4573                                   *  struct vnodeop_desc *a_desc;
4574                                   *  vnode_t a_fdvp;
4575                                   *  vnode_t a_fvp;
4576                                   *  struct componentname *a_fcnp;
4577                                   *  vnode_t a_tdvp;
4578                                   *  vnode_t a_tvp;
4579                                   *  struct componentname *a_tcnp;
4580                                   *  vfs_context_t a_context;
4581                                   *  } */*ap)
4582 {
4583         vfs_context_t ctx = ap->a_context;
4584         vnode_t fdvp = ap->a_fdvp;
4585         vnode_t fvp = ap->a_fvp;
4586         vnode_t tdvp = ap->a_tdvp;
4587         vnode_t tvp = ap->a_tvp;
4588         nfsnode_t fdnp, fnp, tdnp, tnp;
4589         struct componentname *tcnp = ap->a_tcnp;
4590         struct componentname *fcnp = ap->a_fcnp;
4591         int error, nfsvers, inuse = 0, tvprecycle = 0, locked = 0;
4592         mount_t fmp, tdmp, tmp;
4593         struct nfs_vattr nvattr;
4594         struct nfsmount *nmp;
4595
4596         fdnp = VTONFS(fdvp);
4597         fnp = VTONFS(fvp);
4598         tdnp = VTONFS(tdvp);
4599         tnp = tvp ? VTONFS(tvp) : NULL;
4600
4601         nmp = NFSTONMP(fdnp);
4602         if (nfs_mount_gone(nmp)) {
4603                 return ENXIO;
4604         }
4605         nfsvers = nmp->nm_vers;
4606
4607         error = nfs_node_set_busy4(fdnp, fnp, tdnp, tnp, vfs_context_thread(ctx));
4608         if (error) {
4609                 return error;
4610         }
4611
4612         if (tvp && (tvp != fvp)) {
4613                 /* lock the node while we rename over the existing file */
4614                 lck_mtx_lock(nfs_node_hash_mutex);
4615                 while (tnp->n_hflag & NHLOCKED) {
4616                         tnp->n_hflag |= NHLOCKWANT;
4617                         msleep(tnp, nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
4618                 }
4619                 tnp->n_hflag |= NHLOCKED;
4620                 lck_mtx_unlock(nfs_node_hash_mutex);
4621                 locked = 1;
4622         }
4623
4624         /* Check for cross-device rename */
4625         fmp = vnode_mount(fvp);
4626         tmp = tvp ? vnode_mount(tvp) : NULL;
4627         tdmp = vnode_mount(tdvp);
4628         if ((fmp != tdmp) || (tvp && (fmp != tmp))) {
4629                 error = EXDEV;
4630                 goto out;
4631         }
4632
4633         /* XXX prevent renaming from/over a sillyrenamed file? */
4634
4635         /*
4636          * If the tvp exists and is in use, sillyrename it before doing the
4637          * rename of the new file over it.
4638          * XXX Can't sillyrename a directory.
4639          * Don't sillyrename if source and target are same vnode (hard
4640          * links or case-variants)
4641          */
4642         if (tvp && (tvp != fvp)) {
4643                 inuse = vnode_isinuse(tvp, 0);
4644         }
4645         if (inuse && !tnp->n_sillyrename && (vnode_vtype(tvp) != VDIR)) {
4646                 error = nfs_sillyrename(tdnp, tnp, tcnp, ctx);
4647                 if (error) {
4648                         /* sillyrename failed. Instead of pressing on, return error */
4649                         goto out; /* should not be ENOENT. */
4650                 } else {
4651                         /* sillyrename succeeded.*/
4652                         tvp = NULL;
4653                 }
4654         }
4655 #if CONFIG_NFS4
4656         else if (tvp && (nmp->nm_vers >= NFS_VER4) && (tnp->n_openflags & N_DELEG_MASK)) {
4657                 nfs4_delegation_return(tnp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
4658         }
4659 #endif
4660         error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen,
4661             tdnp, tcnp->cn_nameptr, tcnp->cn_namelen, ctx);
4662
4663         /*
4664          * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
4665          */
4666         if (error == ENOENT) {
4667                 error = 0;
4668         }
4669
4670         if (tvp && (tvp != fvp) && !tnp->n_sillyrename) {
4671                 nfs_node_lock_force(tnp);
4672                 tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
4673                     (nfs_getattrcache(tnp, &nvattr, 0) || (nvattr.nva_nlink == 1)));
4674                 nfs_node_unlock(tnp);
4675                 lck_mtx_lock(nfs_node_hash_mutex);
4676                 if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
4677                         /*
4678                          * remove nfsnode from hash now so we can't accidentally find it
4679                          * again if another object gets created with the same filehandle
4680                          * before this vnode gets reclaimed
4681                          */
4682                         LIST_REMOVE(tnp, n_hash);
4683                         tnp->n_hflag &= ~NHHASHED;
4684                         FSDBG(266, 0, tnp, tnp->n_flag, 0xb1eb1e);
4685                 }
4686                 lck_mtx_unlock(nfs_node_hash_mutex);
4687         }
4688
4689         /* purge the old name cache entries and enter the new one */
4690         nfs_name_cache_purge(fdnp, fnp, fcnp, ctx);
4691         if (tvp) {
4692                 nfs_name_cache_purge(tdnp, tnp, tcnp, ctx);
4693                 if (tvprecycle) {
4694                         /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
4695                         /* clear all flags other than these */
4696                         nfs_node_lock_force(tnp);
4697                         tnp->n_flag &= (NMODIFIED);
4698                         nfs_node_unlock(tnp);
4699                         vnode_recycle(tvp);
4700                 }
4701         }
4702         if (!error) {
4703                 nfs_node_lock_force(tdnp);
4704                 if (tdnp->n_flag & NNEGNCENTRIES) {
4705                         tdnp->n_flag &= ~NNEGNCENTRIES;
4706                         cache_purge_negatives(tdvp);
4707                 }
4708                 nfs_node_unlock(tdnp);
4709                 nfs_node_lock_force(fnp);
4710                 cache_enter(tdvp, fvp, tcnp);
4711                 if (tdvp != fdvp) {     /* update parent pointer */
4712                         if (fnp->n_parent && !vnode_get(fnp->n_parent)) {
4713                                 /* remove ref from old parent */
4714                                 vnode_rele(fnp->n_parent);
4715                                 vnode_put(fnp->n_parent);
4716                         }
4717                         fnp->n_parent = tdvp;
4718                         if (tdvp && !vnode_get(tdvp)) {
4719                                 /* add ref to new parent */
4720                                 vnode_ref(tdvp);
4721                                 vnode_put(tdvp);
4722                         } else {
4723                                 fnp->n_parent = NULL;
4724                         }
4725                 }
4726                 nfs_node_unlock(fnp);
4727         }
4728 out:
4729         /* nfs_getattr() will check changed and purge caches */
4730         nfs_getattr(fdnp, NULL, ctx, NGA_CACHED);
4731         nfs_getattr(tdnp, NULL, ctx, NGA_CACHED);
4732         if (locked) {
4733                 /* unlock node */
4734                 lck_mtx_lock(nfs_node_hash_mutex);
4735                 tnp->n_hflag &= ~NHLOCKED;
4736                 if (tnp->n_hflag & NHLOCKWANT) {
4737                         tnp->n_hflag &= ~NHLOCKWANT;
4738                         wakeup(tnp);
4739                 }
4740                 lck_mtx_unlock(nfs_node_hash_mutex);
4741         }
4742         nfs_node_clear_busy4(fdnp, fnp, tdnp, tnp);
4743         return error;
4744 }
4745
4746 /*
4747  * Do an NFS rename rpc. Called from nfs_vnop_rename() and nfs_sillyrename().
4748  */
4749 int
4750 nfs3_rename_rpc(
4751         nfsnode_t fdnp,
4752         char *fnameptr,
4753         int fnamelen,
4754         nfsnode_t tdnp,
4755         char *tnameptr,
4756         int tnamelen,
4757         vfs_context_t ctx)
4758 {
4759         int error = 0, lockerror = ENOENT, status, fwccpostattr = 0, twccpostattr = 0;
4760         struct timespec fpremtime = { .tv_sec = 0, .tv_nsec = 0 }, tpremtime = { .tv_sec = 0, .tv_nsec = 0 };
4761         struct nfsmount *nmp;
4762         int nfsvers;
4763         u_int64_t xid, txid;
4764         struct nfsm_chain nmreq, nmrep;
4765
4766         nmp = NFSTONMP(fdnp);
4767         if (nfs_mount_gone(nmp)) {
4768                 return ENXIO;
4769         }
4770         nfsvers = nmp->nm_vers;
4771         if ((nfsvers == NFS_VER2) &&
4772             ((fnamelen > NFS_MAXNAMLEN) || (tnamelen > NFS_MAXNAMLEN))) {
4773                 return ENAMETOOLONG;
4774         }
4775
4776         nfsm_chain_null(&nmreq);
4777         nfsm_chain_null(&nmrep);
4778
4779         nfsm_chain_build_alloc_init(error, &nmreq,
4780             (NFSX_FH(nfsvers) + NFSX_UNSIGNED) * 2 +
4781             nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
4782         nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
4783         nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
4784         nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
4785         nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
4786         nfsm_chain_build_done(error, &nmreq);
4787         nfsmout_if(error);
4788
4789         error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, NULL, &nmrep, &xid, &status);
4790
4791         if ((lockerror = nfs_node_lock2(fdnp, tdnp))) {
4792                 error = lockerror;
4793         }
4794         if (nfsvers == NFS_VER3) {
4795                 txid = xid;
4796                 nfsm_chain_get_wcc_data(error, &nmrep, fdnp, &fpremtime, &fwccpostattr, &xid);
4797                 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &tpremtime, &twccpostattr, &txid);
4798         }
4799         if (!error) {
4800                 error = status;
4801         }
4802 nfsmout:
4803         nfsm_chain_cleanup(&nmreq);
4804         nfsm_chain_cleanup(&nmrep);
4805         if (!lockerror) {
4806                 fdnp->n_flag |= NMODIFIED;
4807                 /* if directory hadn't changed, update namecache mtime */
4808                 if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==)) {
4809                         NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr);
4810                 }
4811                 if (!fwccpostattr) {
4812                         NATTRINVALIDATE(fdnp);
4813                 }
4814                 tdnp->n_flag |= NMODIFIED;
4815                 /* if directory hadn't changed, update namecache mtime */
4816                 if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==)) {
4817                         NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
4818                 }
4819                 if (!twccpostattr) {
4820                         NATTRINVALIDATE(tdnp);
4821                 }
4822                 nfs_node_unlock2(fdnp, tdnp);
4823         }
4824         return error;
4825 }
4826
4827 /*
4828  * NFS hard link create call
4829  */
4830 int
4831 nfs3_vnop_link(
4832         struct vnop_link_args /* {
4833                                *  struct vnodeop_desc *a_desc;
4834                                *  vnode_t a_vp;
4835                                *  vnode_t a_tdvp;
4836                                *  struct componentname *a_cnp;
4837                                *  vfs_context_t a_context;
4838                                *  } */*ap)
4839 {
4840         vfs_context_t ctx = ap->a_context;
4841         vnode_t vp = ap->a_vp;
4842         vnode_t tdvp = ap->a_tdvp;
4843         struct componentname *cnp = ap->a_cnp;
4844         int error = 0, lockerror = ENOENT, status, wccpostattr = 0, attrflag = 0;
4845         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4846         struct nfsmount *nmp;
4847         nfsnode_t np = VTONFS(vp);
4848         nfsnode_t tdnp = VTONFS(tdvp);
4849         int nfsvers;
4850         u_int64_t xid, txid;
4851         struct nfsm_chain nmreq, nmrep;
4852
4853         if (vnode_mount(vp) != vnode_mount(tdvp)) {
4854                 return EXDEV;
4855         }
4856
4857         nmp = VTONMP(vp);
4858         if (nfs_mount_gone(nmp)) {
4859                 return ENXIO;
4860         }
4861         nfsvers = nmp->nm_vers;
4862         if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
4863                 return ENAMETOOLONG;
4864         }
4865
4866         /*
4867          * Push all writes to the server, so that the attribute cache
4868          * doesn't get "out of sync" with the server.
4869          * XXX There should be a better way!
4870          */
4871         nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
4872
4873         error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx));
4874         if (error) {
4875                 return error;
4876         }
4877
4878         nfsm_chain_null(&nmreq);
4879         nfsm_chain_null(&nmrep);
4880
4881         nfsm_chain_build_alloc_init(error, &nmreq,
4882             NFSX_FH(nfsvers) * 2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
4883         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4884         nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
4885         nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4886         nfsm_chain_build_done(error, &nmreq);
4887         nfsmout_if(error);
4888         error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx, NULL, &nmrep, &xid, &status);
4889
4890         if ((lockerror = nfs_node_lock2(tdnp, np))) {
4891                 error = lockerror;
4892                 goto nfsmout;
4893         }
4894         if (nfsvers == NFS_VER3) {
4895                 txid = xid;
4896                 nfsm_chain_postop_attr_update_flag(error, &nmrep, np, attrflag, &xid);
4897                 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &premtime, &wccpostattr, &txid);
4898         }
4899         if (!error) {
4900                 error = status;
4901         }
4902 nfsmout:
4903         nfsm_chain_cleanup(&nmreq);
4904         nfsm_chain_cleanup(&nmrep);
4905         if (!lockerror) {
4906                 if (!attrflag) {
4907                         NATTRINVALIDATE(np);
4908                 }
4909                 tdnp->n_flag |= NMODIFIED;
4910                 /* if directory hadn't changed, update namecache mtime */
4911                 if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==)) {
4912                         NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
4913                 }
4914                 if (!wccpostattr) {
4915                         NATTRINVALIDATE(tdnp);
4916                 }
4917                 if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
4918                         tdnp->n_flag &= ~NNEGNCENTRIES;
4919                         cache_purge_negatives(tdvp);
4920                 }
4921                 nfs_node_unlock2(tdnp, np);
4922         }
4923         nfs_node_clear_busy2(tdnp, np);
4924         /*
4925          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
4926          */
4927         if (error == EEXIST) {
4928                 error = 0;
4929         }
4930         return error;
4931 }
4932
4933 /*
4934  * NFS symbolic link create call
4935  */
4936 int
4937 nfs3_vnop_symlink(
4938         struct vnop_symlink_args /* {
4939                                   *  struct vnodeop_desc *a_desc;
4940                                   *  vnode_t a_dvp;
4941                                   *  vnode_t *a_vpp;
4942                                   *  struct componentname *a_cnp;
4943                                   *  struct vnode_attr *a_vap;
4944                                   *  char *a_target;
4945                                   *  vfs_context_t a_context;
4946                                   *  } */*ap)
4947 {
4948         vfs_context_t ctx = ap->a_context;
4949         vnode_t dvp = ap->a_dvp;
4950         struct vnode_attr *vap = ap->a_vap;
4951         struct componentname *cnp = ap->a_cnp;
4952         struct nfs_vattr nvattr;
4953         fhandle_t fh;
4954         int slen, error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
4955         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4956         vnode_t newvp = NULL;
4957         int nfsvers, gotuid, gotgid;
4958         u_int64_t xid = 0, dxid;
4959         nfsnode_t np = NULL;
4960         nfsnode_t dnp = VTONFS(dvp);
4961         struct nfsmount *nmp;
4962         struct nfsm_chain nmreq, nmrep;
4963         struct nfsreq rq, *req = &rq;
4964         struct nfs_dulookup dul;
4965         int namedattrs;
4966         int dul_in_progress = 0;
4967
4968         nmp = VTONMP(dvp);
4969         if (nfs_mount_gone(nmp)) {
4970                 return ENXIO;
4971         }
4972         nfsvers = nmp->nm_vers;
4973         namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4974
4975         slen = strlen(ap->a_target);
4976         if ((nfsvers == NFS_VER2) &&
4977             ((cnp->cn_namelen > NFS_MAXNAMLEN) || (slen > NFS_MAXPATHLEN))) {
4978                 return ENAMETOOLONG;
4979         }
4980
4981         nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4982
4983         VATTR_SET_SUPPORTED(vap, va_mode);
4984         VATTR_SET_SUPPORTED(vap, va_uid);
4985         VATTR_SET_SUPPORTED(vap, va_gid);
4986         VATTR_SET_SUPPORTED(vap, va_data_size);
4987         VATTR_SET_SUPPORTED(vap, va_access_time);
4988         VATTR_SET_SUPPORTED(vap, va_modify_time);
4989         gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4990         gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4991
4992         error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4993         if (!namedattrs) {
4994                 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4995         }
4996
4997         nfsm_chain_null(&nmreq);
4998         nfsm_chain_null(&nmrep);
4999
5000         nfsm_chain_build_alloc_init(error, &nmreq,
5001             NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
5002             nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(nfsvers));
5003         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5004         nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
5005         if (nfsvers == NFS_VER3) {
5006                 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
5007         }
5008         nfsm_chain_add_name(error, &nmreq, ap->a_target, slen, nmp);
5009         if (nfsvers == NFS_VER2) {
5010                 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
5011         }
5012         nfsm_chain_build_done(error, &nmreq);
5013         nfsmout_if(error);
5014
5015         error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK,
5016             vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
5017         if (!error) {
5018                 if (!namedattrs) {
5019                         nfs_dulookup_start(&dul, dnp, ctx);
5020                         dul_in_progress = 1;
5021                 }
5022                 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
5023         }
5024
5025         if ((lockerror = nfs_node_lock(dnp))) {
5026                 error = lockerror;
5027         }
5028         dxid = xid;
5029         if (!error && !status) {
5030                 if (dnp->n_flag & NNEGNCENTRIES) {
5031                         dnp->n_flag &= ~NNEGNCENTRIES;
5032                         cache_purge_negatives(dvp);
5033                 }
5034                 if (nfsvers == NFS_VER3) {
5035                         error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
5036                 } else {
5037                         fh.fh_len = 0;
5038                 }
5039         }
5040         if (nfsvers == NFS_VER3) {
5041                 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
5042         }
5043         if (!error) {
5044                 error = status;
5045         }
5046 nfsmout:
5047         nfsm_chain_cleanup(&nmreq);
5048         nfsm_chain_cleanup(&nmrep);
5049
5050         if (!lockerror) {
5051                 dnp->n_flag |= NMODIFIED;
5052                 /* if directory hadn't changed, update namecache mtime */
5053                 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
5054                         NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
5055                 }
5056                 nfs_node_unlock(dnp);
5057                 /* nfs_getattr() will check changed and purge caches */
5058                 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
5059         }
5060
5061         if (!error && fh.fh_len) {
5062                 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
5063         }
5064         if (!error && np) {
5065                 newvp = NFSTOV(np);
5066         }
5067
5068         if (dul_in_progress) {
5069                 nfs_dulookup_finish(&dul, dnp, ctx);
5070         }
5071
5072         /*
5073          * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
5074          * if we can succeed in looking up the symlink.
5075          */
5076         if ((error == EEXIST) || (!error && !newvp)) {
5077                 if (newvp) {
5078                         nfs_node_unlock(np);
5079                         vnode_put(newvp);
5080                         newvp = NULL;
5081                 }
5082                 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
5083                 if (!error) {
5084                         newvp = NFSTOV(np);
5085                         if (vnode_vtype(newvp) != VLNK) {
5086                                 error = EEXIST;
5087                         }
5088                 }
5089         }
5090         if (!busyerror) {
5091                 nfs_node_clear_busy(dnp);
5092         }
5093         if (!error && (gotuid || gotgid) &&
5094             (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
5095             (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
5096             (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
5097                 /* clear ID bits if server didn't use them (or we can't tell) */
5098                 VATTR_CLEAR_SUPPORTED(vap, va_uid);
5099                 VATTR_CLEAR_SUPPORTED(vap, va_gid);
5100         }
5101         if (error) {
5102                 if (newvp) {
5103                         nfs_node_unlock(np);
5104                         vnode_put(newvp);
5105                 }
5106         } else {
5107                 nfs_node_unlock(np);
5108                 *ap->a_vpp = newvp;
5109         }
5110         return error;
5111 }
5112
5113 /*
5114  * NFS make dir call
5115  */
5116 int
5117 nfs3_vnop_mkdir(
5118         struct vnop_mkdir_args /* {
5119                                 *  struct vnodeop_desc *a_desc;
5120                                 *  vnode_t a_dvp;
5121                                 *  vnode_t *a_vpp;
5122                                 *  struct componentname *a_cnp;
5123                                 *  struct vnode_attr *a_vap;
5124                                 *  vfs_context_t a_context;
5125                                 *  } */*ap)
5126 {
5127         vfs_context_t ctx = ap->a_context;
5128         vnode_t dvp = ap->a_dvp;
5129         struct vnode_attr *vap = ap->a_vap;
5130         struct componentname *cnp = ap->a_cnp;
5131         struct nfs_vattr nvattr;
5132         nfsnode_t np = NULL;
5133         struct nfsmount *nmp;
5134         nfsnode_t dnp = VTONFS(dvp);
5135         vnode_t newvp = NULL;
5136         int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
5137         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
5138         int nfsvers, gotuid, gotgid;
5139         u_int64_t xid = 0, dxid;
5140         fhandle_t fh;
5141         struct nfsm_chain nmreq, nmrep;
5142         struct nfsreq rq, *req = &rq;
5143         struct nfs_dulookup dul;
5144         int namedattrs;
5145         int dul_in_progress = 0;
5146
5147         nmp = VTONMP(dvp);
5148         if (nfs_mount_gone(nmp)) {
5149                 return ENXIO;
5150         }
5151         nfsvers = nmp->nm_vers;
5152         namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
5153
5154         if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
5155                 return ENAMETOOLONG;
5156         }
5157
5158         nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
5159
5160         VATTR_SET_SUPPORTED(vap, va_mode);
5161         VATTR_SET_SUPPORTED(vap, va_uid);
5162         VATTR_SET_SUPPORTED(vap, va_gid);
5163         VATTR_SET_SUPPORTED(vap, va_data_size);
5164         VATTR_SET_SUPPORTED(vap, va_access_time);
5165         VATTR_SET_SUPPORTED(vap, va_modify_time);
5166         gotuid = VATTR_IS_ACTIVE(vap, va_uid);
5167         gotgid = VATTR_IS_ACTIVE(vap, va_gid);
5168
5169         error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
5170         if (!namedattrs) {
5171                 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
5172         }
5173
5174         nfsm_chain_null(&nmreq);
5175         nfsm_chain_null(&nmrep);
5176
5177         nfsm_chain_build_alloc_init(error, &nmreq,
5178             NFSX_FH(nfsvers) + NFSX_UNSIGNED +
5179             nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
5180         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5181         nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
5182         if (nfsvers == NFS_VER3) {
5183                 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
5184         } else {
5185                 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
5186         }
5187         nfsm_chain_build_done(error, &nmreq);
5188         nfsmout_if(error);
5189
5190         error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR,
5191             vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
5192         if (!error) {
5193                 if (!namedattrs) {
5194                         nfs_dulookup_start(&dul, dnp, ctx);
5195                         dul_in_progress = 1;
5196                 }
5197                 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
5198         }
5199
5200         if ((lockerror = nfs_node_lock(dnp))) {
5201                 error = lockerror;
5202         }
5203         dxid = xid;
5204         if (!error && !status) {
5205                 if (dnp->n_flag & NNEGNCENTRIES) {
5206                         dnp->n_flag &= ~NNEGNCENTRIES;
5207                         cache_purge_negatives(dvp);
5208                 }
5209                 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
5210         }
5211         if (nfsvers == NFS_VER3) {
5212                 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
5213         }
5214         if (!error) {
5215                 error = status;
5216         }
5217 nfsmout:
5218         nfsm_chain_cleanup(&nmreq);
5219         nfsm_chain_cleanup(&nmrep);
5220
5221         if (!lockerror) {
5222                 dnp->n_flag |= NMODIFIED;
5223                 /* if directory hadn't changed, update namecache mtime */
5224                 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
5225                         NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
5226                 }
5227                 nfs_node_unlock(dnp);
5228                 /* nfs_getattr() will check changed and purge caches */
5229                 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
5230         }
5231
5232         if (!error && fh.fh_len) {
5233                 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
5234         }
5235         if (!error && np) {
5236                 newvp = NFSTOV(np);
5237         }
5238
5239         if (dul_in_progress) {
5240                 nfs_dulookup_finish(&dul, dnp, ctx);
5241         }
5242
5243         /*
5244          * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
5245          * if we can succeed in looking up the directory.
5246          */
5247         if ((error == EEXIST) || (!error && !newvp)) {
5248                 if (newvp) {
5249                         nfs_node_unlock(np);
5250                         vnode_put(newvp);
5251                         newvp = NULL;
5252                 }
5253                 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
5254                 if (!error) {
5255                         newvp = NFSTOV(np);
5256                         if (vnode_vtype(newvp) != VDIR) {
5257                                 error = EEXIST;
5258                         }
5259                 }
5260         }
5261         if (!busyerror) {
5262                 nfs_node_clear_busy(dnp);
5263         }
5264         if (!error && (gotuid || gotgid) &&
5265             (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
5266             (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
5267             (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
5268                 /* clear ID bits if server didn't use them (or we can't tell) */
5269                 VATTR_CLEAR_SUPPORTED(vap, va_uid);
5270                 VATTR_CLEAR_SUPPORTED(vap, va_gid);
5271         }
5272         if (error) {
5273                 if (newvp) {
5274                         nfs_node_unlock(np);
5275                         vnode_put(newvp);
5276                 }
5277         } else {
5278                 nfs_node_unlock(np);
5279                 *ap->a_vpp = newvp;
5280         }
5281         return error;
5282 }
5283
5284 /*
5285  * NFS remove directory call
5286  */
5287 int
5288 nfs3_vnop_rmdir(
5289         struct vnop_rmdir_args /* {
5290                                 *  struct vnodeop_desc *a_desc;
5291                                 *  vnode_t a_dvp;
5292                                 *  vnode_t a_vp;
5293                                 *  struct componentname *a_cnp;
5294                                 *  vfs_context_t a_context;
5295                                 *  } */*ap)
5296 {
5297         vfs_context_t ctx = ap->a_context;
5298         vnode_t vp = ap->a_vp;
5299         vnode_t dvp = ap->a_dvp;
5300         struct componentname *cnp = ap->a_cnp;
5301         int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
5302         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
5303         struct nfsmount *nmp;
5304         nfsnode_t np = VTONFS(vp);
5305         nfsnode_t dnp = VTONFS(dvp);
5306         int nfsvers;
5307         u_int64_t xid;
5308         struct nfsm_chain nmreq, nmrep;
5309         struct nfsreq rq, *req = &rq;
5310         struct nfs_dulookup dul;
5311         int namedattrs;
5312         int dul_in_progress = 0;
5313
5314         nmp = VTONMP(vp);
5315         if (nfs_mount_gone(nmp)) {
5316                 return ENXIO;
5317         }
5318         nfsvers = nmp->nm_vers;
5319         namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
5320
5321         if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
5322                 return ENAMETOOLONG;
5323         }
5324
5325         if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx)))) {
5326                 return error;
5327         }
5328
5329         if (!namedattrs) {
5330                 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
5331         }
5332
5333         nfsm_chain_null(&nmreq);
5334         nfsm_chain_null(&nmrep);
5335
5336         nfsm_chain_build_alloc_init(error, &nmreq,
5337             NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
5338         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5339         nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
5340         nfsm_chain_build_done(error, &nmreq);
5341         nfsmout_if(error);
5342
5343         error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_RMDIR,
5344             vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
5345         if (!error) {
5346                 if (!namedattrs) {
5347                         nfs_dulookup_start(&dul, dnp, ctx);
5348                         dul_in_progress = 1;
5349                 }
5350                 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
5351         }
5352
5353         if ((lockerror = nfs_node_lock(dnp))) {
5354                 error = lockerror;
5355         }
5356         if (nfsvers == NFS_VER3) {
5357                 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
5358         }
5359         if (!error) {
5360                 error = status;
5361         }
5362 nfsmout:
5363         nfsm_chain_cleanup(&nmreq);
5364         nfsm_chain_cleanup(&nmrep);
5365
5366         if (!lockerror) {
5367                 dnp->n_flag |= NMODIFIED;
5368                 /* if directory hadn't changed, update namecache mtime */
5369                 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
5370                         NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
5371                 }
5372                 nfs_node_unlock(dnp);
5373                 nfs_name_cache_purge(dnp, np, cnp, ctx);
5374                 /* nfs_getattr() will check changed and purge caches */
5375                 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
5376         }
5377         if (dul_in_progress) {
5378                 nfs_dulookup_finish(&dul, dnp, ctx);
5379         }
5380         nfs_node_clear_busy2(dnp, np);
5381
5382         /*
5383          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
5384          */
5385         if (error == ENOENT) {
5386                 error = 0;
5387         }
5388         if (!error) {
5389                 /*
5390                  * remove nfsnode from hash now so we can't accidentally find it
5391                  * again if another object gets created with the same filehandle
5392                  * before this vnode gets reclaimed
5393                  */
5394                 lck_mtx_lock(nfs_node_hash_mutex);
5395                 if (np->n_hflag & NHHASHED) {
5396                         LIST_REMOVE(np, n_hash);
5397                         np->n_hflag &= ~NHHASHED;
5398                         FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
5399                 }
5400                 lck_mtx_unlock(nfs_node_hash_mutex);
5401         }
5402         return error;
5403 }
5404
5405 /*
5406  * NFS readdir call
5407  *
5408  * The incoming "offset" is a directory cookie indicating where in the
5409  * directory entries should be read from.  A zero cookie means start at
5410  * the beginning of the directory.  Any other cookie will be a cookie
5411  * returned from the server.
5412  *
5413  * Using that cookie, determine which buffer (and where in that buffer)
5414  * to start returning entries from.  Buffer logical block numbers are
5415  * the cookies they start at.  If a buffer is found that is not full,
5416  * call into the bio/RPC code to fill it.  The RPC code will probably
5417  * fill several buffers (dropping the first, requiring a re-get).
5418  *
5419  * When done copying entries to the buffer, set the offset to the current
5420  * entry's cookie and enter that cookie in the cookie cache.
5421  *
5422  * Note: because the getdirentries(2) API returns a long-typed offset,
5423  * the incoming offset is a potentially truncated cookie (ptc).
5424  * The cookie matching code is aware of this and will fall back to
5425  * matching only 32 bits of the cookie.
5426  */
5427 int
5428 nfs_vnop_readdir(
5429         struct vnop_readdir_args /* {
5430                                   *  struct vnodeop_desc *a_desc;
5431                                   *  vnode_t a_vp;
5432                                   *  struct uio *a_uio;
5433                                   *  int a_flags;
5434                                   *  int *a_eofflag;
5435                                   *  int *a_numdirent;
5436                                   *  vfs_context_t a_context;
5437                                   *  } */*ap)
5438 {
5439         vfs_context_t ctx = ap->a_context;
5440         vnode_t dvp = ap->a_vp;
5441         nfsnode_t dnp = VTONFS(dvp);
5442         struct nfsmount *nmp;
5443         uio_t uio = ap->a_uio;
5444         int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
5445         uint16_t i, iptc, rlen, nlen;
5446         uint64_t cookie, nextcookie, lbn = 0;
5447         struct nfsbuf *bp = NULL;
5448         struct nfs_dir_buf_header *ndbhp;
5449         struct direntry *dp, *dpptc;
5450         struct dirent dent;
5451         char *cp = NULL;
5452         thread_t thd;
5453
5454         nmp = VTONMP(dvp);
5455         if (nfs_mount_gone(nmp)) {
5456                 return ENXIO;
5457         }
5458         nfsvers = nmp->nm_vers;
5459         bigcookies = (nmp->nm_state & NFSSTA_BIGCOOKIES);
5460         extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
5461
5462         if (vnode_vtype(dvp) != VDIR) {
5463                 return EPERM;
5464         }
5465
5466         if (ap->a_eofflag) {
5467                 *ap->a_eofflag = 0;
5468         }
5469
5470         if (uio_resid(uio) == 0) {
5471                 return 0;
5472         }
5473 #if CONFIG_NFS4
5474         if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
5475                 /* trigger directories should never be read, return nothing */
5476                 return 0;
5477         }
5478 #endif
5479         thd = vfs_context_thread(ctx);
5480         numdirent = done = 0;
5481         nextcookie = uio_offset(uio);
5482         ptc = bigcookies && NFS_DIR_COOKIE_POTENTIALLY_TRUNCATED(nextcookie);
5483
5484         if ((error = nfs_node_lock(dnp))) {
5485                 goto out;
5486         }
5487
5488         if (dnp->n_flag & NNEEDINVALIDATE) {
5489                 dnp->n_flag &= ~NNEEDINVALIDATE;
5490                 nfs_invaldir(dnp);
5491                 nfs_node_unlock(dnp);
5492                 error = nfs_vinvalbuf(dvp, 0, ctx, 1);
5493                 if (!error) {
5494                         error = nfs_node_lock(dnp);
5495                 }
5496                 if (error) {
5497                         goto out;
5498                 }
5499         }
5500
5501         /*
5502          * check for need to invalidate when (re)starting at beginning
5503          */
5504         if (!nextcookie) {
5505                 if (dnp->n_flag & NMODIFIED) {
5506                         nfs_invaldir(dnp);
5507                         nfs_node_unlock(dnp);
5508                         if ((error = nfs_vinvalbuf(dvp, 0, ctx, 1))) {
5509                                 goto out;
5510                         }
5511                 } else {
5512                         nfs_node_unlock(dnp);
5513                 }
5514                 /* nfs_getattr() will check changed and purge caches */
5515                 if ((error = nfs_getattr(dnp, NULL, ctx, NGA_UNCACHED))) {
5516                         goto out;
5517                 }
5518         } else {
5519                 nfs_node_unlock(dnp);
5520         }
5521
5522         error = nfs_dir_cookie_to_lbn(dnp, nextcookie, &ptc, &lbn);
5523         if (error) {
5524                 if (error < 0) { /* just hit EOF cookie */
5525                         done = 1;
5526                         error = 0;
5527                 }
5528                 if (ap->a_eofflag) {
5529                         *ap->a_eofflag = 1;
5530                 }
5531         }
5532
5533         while (!error && !done) {
5534                 OSAddAtomic64(1, &nfsstats.biocache_readdirs);
5535                 cookie = nextcookie;
5536 getbuffer:
5537                 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
5538                 if (error) {
5539                         goto out;
5540                 }
5541                 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5542                 if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
5543                         if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
5544                                 ndbhp->ndbh_flags = 0;
5545                                 ndbhp->ndbh_count = 0;
5546                                 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
5547                                 ndbhp->ndbh_ncgen = dnp->n_ncgen;
5548                         }
5549                         error = nfs_buf_readdir(bp, ctx);
5550                         if (error == NFSERR_DIRBUFDROPPED) {
5551                                 goto getbuffer;
5552                         }
5553                         if (error) {
5554                                 nfs_buf_release(bp, 1);
5555                         }
5556                         if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
5557                                 if (!nfs_node_lock(dnp)) {
5558                                         nfs_invaldir(dnp);
5559                                         nfs_node_unlock(dnp);
5560                                 }
5561                                 nfs_vinvalbuf(dvp, 0, ctx, 1);
5562                                 if (error == NFSERR_BAD_COOKIE) {
5563                                         error = ENOENT;
5564                                 }
5565                         }
5566                         if (error) {
5567                                 goto out;
5568                         }
5569                 }
5570
5571                 /* find next entry to return */
5572                 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5573                 i = 0;
5574                 if ((lbn != cookie) && !(ptc && NFS_DIR_COOKIE_SAME32(lbn, cookie))) {
5575                         dpptc = NULL;
5576                         iptc = 0;
5577                         for (; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
5578                                 if (ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
5579                                         iptc = i;
5580                                         dpptc = dp;
5581                                 }
5582                                 nextcookie = dp->d_seekoff;
5583                                 dp = NFS_DIRENTRY_NEXT(dp);
5584                         }
5585                         if ((i == ndbhp->ndbh_count) && dpptc) {
5586                                 i = iptc;
5587                                 dp = dpptc;
5588                         }
5589                         if (i < ndbhp->ndbh_count) {
5590                                 nextcookie = dp->d_seekoff;
5591                                 dp = NFS_DIRENTRY_NEXT(dp);
5592                                 i++;
5593                         }
5594                 }
5595                 ptc = 0;  /* only have to deal with ptc on first cookie */
5596
5597                 /* return as many entries as we can */
5598                 for (; i < ndbhp->ndbh_count; i++) {
5599                         if (extended) {
5600                                 rlen = dp->d_reclen;
5601                                 cp = (char*)dp;
5602                         } else {
5603                                 if (!cp) {
5604                                         cp = (char*)&dent;
5605                                         bzero(cp, sizeof(dent));
5606                                 }
5607                                 if (dp->d_namlen > (sizeof(dent.d_name) - 1)) {
5608                                         nlen = sizeof(dent.d_name) - 1;
5609                                 } else {
5610                                         nlen = dp->d_namlen;
5611                                 }
5612                                 rlen = NFS_DIRENT_LEN(nlen);
5613                                 dent.d_reclen = rlen;
5614                                 dent.d_ino = dp->d_ino;
5615                                 dent.d_type = dp->d_type;
5616                                 dent.d_namlen = nlen;
5617                                 strlcpy(dent.d_name, dp->d_name, nlen + 1);
5618                         }
5619                         /* check that the record fits */
5620                         if (rlen > uio_resid(uio)) {
5621                                 done = 1;
5622                                 break;
5623                         }
5624                         if ((error = uiomove(cp, rlen, uio))) {
5625                                 break;
5626                         }
5627                         numdirent++;
5628                         nextcookie = dp->d_seekoff;
5629                         dp = NFS_DIRENTRY_NEXT(dp);
5630                 }
5631
5632                 if (i == ndbhp->ndbh_count) {
5633                         /* hit end of buffer, move to next buffer */
5634                         lbn = nextcookie;
5635                         /* if we also hit EOF, we're done */
5636                         if (ISSET(ndbhp->ndbh_flags, NDB_EOF)) {
5637                                 done = 1;
5638                                 if (ap->a_eofflag) {
5639                                         *ap->a_eofflag = 1;
5640                                 }
5641                         }
5642                 }
5643                 if (!error) {
5644                         uio_setoffset(uio, nextcookie);
5645                 }
5646                 if (!error && !done && (nextcookie == cookie)) {
5647                         printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
5648                         error = EIO;
5649                 }
5650                 nfs_buf_release(bp, 1);
5651         }
5652
5653         if (!error) {
5654                 nfs_dir_cookie_cache(dnp, nextcookie, lbn);
5655         }
5656
5657         if (ap->a_numdirent) {
5658                 *ap->a_numdirent = numdirent;
5659         }
5660 out:
5661         return error;
5662 }
5663
5664
5665 /*
5666  * Invalidate cached directory information, except for the actual directory
5667  * blocks (which are invalidated separately).
5668  */
5669 void
5670 nfs_invaldir(nfsnode_t dnp)
5671 {
5672         if (vnode_vtype(NFSTOV(dnp)) != VDIR) {
5673                 return;
5674         }
5675         dnp->n_eofcookie = 0;
5676         dnp->n_cookieverf = 0;
5677         if (!dnp->n_cookiecache) {
5678                 return;
5679         }
5680         dnp->n_cookiecache->free = 0;
5681         dnp->n_cookiecache->mru = -1;
5682         memset(dnp->n_cookiecache->next, -1, NFSNUMCOOKIES);
5683 }
5684
5685 /*
5686  * calculate how much space is available for additional directory entries.
5687  */
5688 uint32_t
5689 nfs_dir_buf_freespace(struct nfsbuf *bp, int rdirplus)
5690 {
5691         struct nfs_dir_buf_header *ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5692         uint32_t space;
5693
5694         if (!ndbhp) {
5695                 return 0;
5696         }
5697         space = bp->nb_bufsize - ndbhp->ndbh_entry_end;
5698         if (rdirplus) {
5699                 space -= ndbhp->ndbh_count * sizeof(struct nfs_vattr);
5700         }
5701         return space;
5702 }
5703
5704 /*
5705  * add/update a cookie->lbn entry in the directory cookie cache
5706  */
5707 void
5708 nfs_dir_cookie_cache(nfsnode_t dnp, uint64_t cookie, uint64_t lbn)
5709 {
5710         struct nfsdmap *ndcc;
5711         int8_t i, prev;
5712
5713         if (!cookie) {
5714                 return;
5715         }
5716
5717         if (nfs_node_lock(dnp)) {
5718                 return;
5719         }
5720
5721         if (cookie == dnp->n_eofcookie) { /* EOF cookie */
5722                 nfs_node_unlock(dnp);
5723                 return;
5724         }
5725
5726         ndcc = dnp->n_cookiecache;
5727         if (!ndcc) {
5728                 /* allocate the cookie cache structure */
5729                 MALLOC_ZONE(dnp->n_cookiecache, struct nfsdmap *,
5730                     sizeof(struct nfsdmap), M_NFSDIROFF, M_WAITOK);
5731                 if (!dnp->n_cookiecache) {
5732                         nfs_node_unlock(dnp);
5733                         return;
5734                 }
5735                 ndcc = dnp->n_cookiecache;
5736                 ndcc->free = 0;
5737                 ndcc->mru = -1;
5738                 memset(ndcc->next, -1, NFSNUMCOOKIES);
5739         }
5740
5741         /*
5742          * Search the list for this cookie.
5743          * Keep track of previous and last entries.
5744          */
5745         prev = -1;
5746         i = ndcc->mru;
5747         while ((i != -1) && (cookie != ndcc->cookies[i].key)) {
5748                 if (ndcc->next[i] == -1) { /* stop on last entry so we can reuse */
5749                         break;
5750                 }
5751                 prev = i;
5752                 i = ndcc->next[i];
5753         }
5754         if ((i != -1) && (cookie == ndcc->cookies[i].key)) {
5755                 /* found it, remove from list */
5756                 if (prev != -1) {
5757                         ndcc->next[prev] = ndcc->next[i];
5758                 } else {
5759                         ndcc->mru = ndcc->next[i];
5760                 }
5761         } else {
5762                 /* not found, use next free entry or reuse last entry */
5763                 if (ndcc->free != NFSNUMCOOKIES) {
5764                         i = ndcc->free++;
5765                 } else {
5766                         ndcc->next[prev] = -1;
5767                 }
5768                 ndcc->cookies[i].key = cookie;
5769                 ndcc->cookies[i].lbn = lbn;
5770         }
5771         /* insert cookie at head of MRU list */
5772         ndcc->next[i] = ndcc->mru;
5773         ndcc->mru = i;
5774         nfs_node_unlock(dnp);
5775 }
5776
5777 /*
5778  * Try to map the given directory cookie to a directory buffer (return lbn).
5779  * If we have a possibly truncated cookie (ptc), check for 32-bit matches too.
5780  */
5781 int
5782 nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
5783 {
5784         struct nfsdmap *ndcc = dnp->n_cookiecache;
5785         int8_t eofptc, found;
5786         int i, iptc;
5787         struct nfsmount *nmp;
5788         struct nfsbuf *bp, *lastbp;
5789         struct nfsbuflists blist;
5790         struct direntry *dp, *dpptc;
5791         struct nfs_dir_buf_header *ndbhp;
5792
5793         if (!cookie) {  /* initial cookie */
5794                 *lbnp = 0;
5795                 *ptc = 0;
5796                 return 0;
5797         }
5798
5799         if (nfs_node_lock(dnp)) {
5800                 return ENOENT;
5801         }
5802
5803         if (cookie == dnp->n_eofcookie) { /* EOF cookie */
5804                 nfs_node_unlock(dnp);
5805                 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5806                 *ptc = 0;
5807                 return -1;
5808         }
5809         /* note if cookie is a 32-bit match with the EOF cookie */
5810         eofptc = *ptc ? NFS_DIR_COOKIE_SAME32(cookie, dnp->n_eofcookie) : 0;
5811         iptc = -1;
5812
5813         /* search the list for the cookie */
5814         for (i = ndcc ? ndcc->mru : -1; i >= 0; i = ndcc->next[i]) {
5815                 if (ndcc->cookies[i].key == cookie) {
5816                         /* found a match for this cookie */
5817                         *lbnp = ndcc->cookies[i].lbn;
5818                         nfs_node_unlock(dnp);
5819                         OSAddAtomic64(1, &nfsstats.direofcache_hits);
5820                         *ptc = 0;
5821                         return 0;
5822                 }
5823                 /* check for 32-bit match */
5824                 if (*ptc && (iptc == -1) && NFS_DIR_COOKIE_SAME32(ndcc->cookies[i].key, cookie)) {
5825                         iptc = i;
5826                 }
5827         }
5828         /* exact match not found */
5829         if (eofptc) {
5830                 /* but 32-bit match hit the EOF cookie */
5831                 nfs_node_unlock(dnp);
5832                 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5833                 return -1;
5834         }
5835         if (iptc >= 0) {
5836                 /* but 32-bit match got a hit */
5837                 *lbnp = ndcc->cookies[iptc].lbn;
5838                 nfs_node_unlock(dnp);
5839                 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5840                 return 0;
5841         }
5842         nfs_node_unlock(dnp);
5843
5844         /*
5845          * No match found in the cookie cache... hmm...
5846          * Let's search the directory's buffers for the cookie.
5847          */
5848         nmp = NFSTONMP(dnp);
5849         if (nfs_mount_gone(nmp)) {
5850                 return ENXIO;
5851         }
5852         dpptc = NULL;
5853         found = 0;
5854
5855         lck_mtx_lock(nfs_buf_mutex);
5856         /*
5857          * Scan the list of buffers, keeping them in order.
5858          * Note that itercomplete inserts each of the remaining buffers
5859          * into the head of list (thus reversing the elements).  So, we
5860          * make sure to iterate through all buffers, inserting them after
5861          * each other, to keep them in order.
5862          * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
5863          * we don't drop nfs_buf_mutex.
5864          */
5865         if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
5866                 lastbp = NULL;
5867                 while ((bp = LIST_FIRST(&blist))) {
5868                         LIST_REMOVE(bp, nb_vnbufs);
5869                         if (!lastbp) {
5870                                 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
5871                         } else {
5872                                 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
5873                         }
5874                         lastbp = bp;
5875                         if (found) {
5876                                 continue;
5877                         }
5878                         nfs_buf_refget(bp);
5879                         if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
5880                                 /* just skip this buffer */
5881                                 nfs_buf_refrele(bp);
5882                                 continue;
5883                         }
5884                         nfs_buf_refrele(bp);
5885
5886                         /* scan the buffer for the cookie */
5887                         ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5888                         dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5889                         dpptc = NULL;
5890                         for (i = 0; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
5891                                 if (*ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
5892                                         dpptc = dp;
5893                                         iptc = i;
5894                                 }
5895                                 dp = NFS_DIRENTRY_NEXT(dp);
5896                         }
5897                         if ((i == ndbhp->ndbh_count) && dpptc) {
5898                                 /* found only a PTC match */
5899                                 dp = dpptc;
5900                                 i = iptc;
5901                         } else if (i < ndbhp->ndbh_count) {
5902                                 *ptc = 0;
5903                         }
5904                         if (i < (ndbhp->ndbh_count - 1)) {
5905                                 /* next entry is *in* this buffer: return this block */
5906                                 *lbnp = bp->nb_lblkno;
5907                                 found = 1;
5908                         } else if (i == (ndbhp->ndbh_count - 1)) {
5909                                 /* next entry refers to *next* buffer: return next block */
5910                                 *lbnp = dp->d_seekoff;
5911                                 found = 1;
5912                         }
5913                         nfs_buf_drop(bp);
5914                 }
5915                 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
5916         }
5917         lck_mtx_unlock(nfs_buf_mutex);
5918         if (found) {
5919                 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5920                 return 0;
5921         }
5922
5923         /* still not found... oh well, just start a new block */
5924         *lbnp = cookie;
5925         OSAddAtomic64(1, &nfsstats.direofcache_misses);
5926         return 0;
5927 }
5928
5929 /*
5930  * scan a directory buffer for the given name
5931  * Returns: ESRCH if not found, ENOENT if found invalid, 0 if found
5932  * Note: should only be called with RDIRPLUS directory buffers
5933  */
5934
5935 #define NDBS_PURGE      1
5936 #define NDBS_UPDATE     2
5937
5938 int
5939 nfs_dir_buf_search(
5940         struct nfsbuf *bp,
5941         struct componentname *cnp,
5942         fhandle_t *fhp,
5943         struct nfs_vattr *nvap,
5944         uint64_t *xidp,
5945         time_t *attrstampp,
5946         daddr64_t *nextlbnp,
5947         int flags)
5948 {
5949         struct direntry *dp;
5950         struct nfs_dir_buf_header *ndbhp;
5951         struct nfs_vattr *nvattrp;
5952         daddr64_t nextlbn = 0;
5953         int i, error = ESRCH;
5954         uint32_t fhlen;
5955
5956         /* scan the buffer for the name */
5957         ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5958         dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5959         for (i = 0; i < ndbhp->ndbh_count; i++) {
5960                 nextlbn = dp->d_seekoff;
5961                 if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) {
5962                         fhlen = dp->d_name[dp->d_namlen + 1];
5963                         nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
5964                         if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhlen == 0) ||
5965                             (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
5966                                 /* entry is not valid */
5967                                 error = ENOENT;
5968                                 break;
5969                         }
5970                         if (flags == NDBS_PURGE) {
5971                                 dp->d_fileno = 0;
5972                                 bzero(nvattrp, sizeof(*nvattrp));
5973                                 error = ENOENT;
5974                                 break;
5975                         }
5976                         if (flags == NDBS_UPDATE) {
5977                                 /* update direntry's attrs if fh matches */
5978                                 if ((fhp->fh_len == fhlen) && !bcmp(&dp->d_name[dp->d_namlen + 2], fhp->fh_data, fhlen)) {
5979                                         bcopy(nvap, nvattrp, sizeof(*nvap));
5980                                         dp->d_fileno = nvattrp->nva_fileid;
5981                                         nvattrp->nva_fileid = *xidp;
5982                                         *(time_t*)(&dp->d_name[dp->d_namlen + 2 + fhp->fh_len]) = *attrstampp;
5983                                 }
5984                                 error = 0;
5985                                 break;
5986                         }
5987                         /* copy out fh, attrs, attrstamp, and xid */
5988                         fhp->fh_len = fhlen;
5989                         bcopy(&dp->d_name[dp->d_namlen + 2], fhp->fh_data, MAX(fhp->fh_len, (int)sizeof(fhp->fh_data)));
5990                         *attrstampp = *(time_t*)(&dp->d_name[dp->d_namlen + 2 + fhp->fh_len]);
5991                         bcopy(nvattrp, nvap, sizeof(*nvap));
5992                         *xidp = nvap->nva_fileid;
5993                         nvap->nva_fileid = dp->d_fileno;
5994                         error = 0;
5995                         break;
5996                 }
5997                 dp = NFS_DIRENTRY_NEXT(dp);
5998         }
5999         if (nextlbnp) {
6000                 *nextlbnp = nextlbn;
6001         }
6002         return error;
6003 }
6004
6005 /*
6006  * Look up a name in a directory's buffers.
6007  * Note: should only be called with RDIRPLUS directory buffers
6008  */
6009 int
6010 nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cnp, vfs_context_t ctx, int purge)
6011 {
6012         nfsnode_t newnp;
6013         struct nfsmount *nmp;
6014         int error = 0, i, found = 0, count = 0;
6015         u_int64_t xid;
6016         struct nfs_vattr nvattr;
6017         fhandle_t fh;
6018         time_t attrstamp = 0;
6019         thread_t thd = vfs_context_thread(ctx);
6020         struct nfsbuf *bp, *lastbp, *foundbp;
6021         struct nfsbuflists blist;
6022         daddr64_t lbn, nextlbn;
6023         int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_');
6024
6025         nmp = NFSTONMP(dnp);
6026         if (nfs_mount_gone(nmp)) {
6027                 return ENXIO;
6028         }
6029         if (!purge) {
6030                 *npp = NULL;
6031         }
6032
6033         /* first check most recent buffer (and next one too) */
6034         lbn = dnp->n_lastdbl;
6035         for (i = 0; i < 2; i++) {
6036                 if ((error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ | NBLK_ONLYVALID, &bp))) {
6037                         return error;
6038                 }
6039                 if (!bp) {
6040                         break;
6041                 }
6042                 count++;
6043                 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, &nextlbn, purge ? NDBS_PURGE : 0);
6044                 nfs_buf_release(bp, 0);
6045                 if (error == ESRCH) {
6046                         error = 0;
6047                 } else {
6048                         found = 1;
6049                         break;
6050                 }
6051                 lbn = nextlbn;
6052         }
6053
6054         lck_mtx_lock(nfs_buf_mutex);
6055         if (found) {
6056                 dnp->n_lastdbl = lbn;
6057                 goto done;
6058         }
6059
6060         /*
6061          * Scan the list of buffers, keeping them in order.
6062          * Note that itercomplete inserts each of the remaining buffers
6063          * into the head of list (thus reversing the elements).  So, we
6064          * make sure to iterate through all buffers, inserting them after
6065          * each other, to keep them in order.
6066          * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
6067          * we don't drop nfs_buf_mutex.
6068          */
6069         if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
6070                 lastbp = foundbp = NULL;
6071                 while ((bp = LIST_FIRST(&blist))) {
6072                         LIST_REMOVE(bp, nb_vnbufs);
6073                         if (!lastbp) {
6074                                 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
6075                         } else {
6076                                 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
6077                         }
6078                         lastbp = bp;
6079                         if (error || found) {
6080                                 continue;
6081                         }
6082                         if (!purge && dotunder && (count > 100)) { /* don't waste too much time looking for ._ files */
6083                                 continue;
6084                         }
6085                         nfs_buf_refget(bp);
6086                         lbn = bp->nb_lblkno;
6087                         if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
6088                                 /* just skip this buffer */
6089                                 nfs_buf_refrele(bp);
6090                                 continue;
6091                         }
6092                         nfs_buf_refrele(bp);
6093                         count++;
6094                         error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, purge ? NDBS_PURGE : 0);
6095                         if (error == ESRCH) {
6096                                 error = 0;
6097                         } else {
6098                                 found = 1;
6099                                 foundbp = bp;
6100                         }
6101                         nfs_buf_drop(bp);
6102                 }
6103                 if (found) {
6104                         LIST_REMOVE(foundbp, nb_vnbufs);
6105                         LIST_INSERT_HEAD(&dnp->n_cleanblkhd, foundbp, nb_vnbufs);
6106                         dnp->n_lastdbl = foundbp->nb_lblkno;
6107                 }
6108                 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
6109         }
6110 done:
6111         lck_mtx_unlock(nfs_buf_mutex);
6112
6113         if (!error && found && !purge) {
6114                 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
6115                     &nvattr, &xid, dnp->n_auth, NG_MAKEENTRY, &newnp);
6116                 if (error) {
6117                         return error;
6118                 }
6119                 newnp->n_attrstamp = attrstamp;
6120                 *npp = newnp;
6121                 nfs_node_unlock(newnp);
6122                 /* check if the dir buffer's attrs are out of date */
6123                 if (!nfs_getattr(newnp, &nvattr, ctx, NGA_CACHED) &&
6124                     (newnp->n_attrstamp != attrstamp)) {
6125                         /* they are, so update them */
6126                         error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ | NBLK_ONLYVALID, &bp);
6127                         if (!error && bp) {
6128                                 attrstamp = newnp->n_attrstamp;
6129                                 xid = newnp->n_xid;
6130                                 nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, NDBS_UPDATE);
6131                                 nfs_buf_release(bp, 0);
6132                         }
6133                         error = 0;
6134                 }
6135         }
6136
6137         return error;
6138 }
6139
6140 /*
6141  * Purge name cache entries for the given node.
6142  * For RDIRPLUS, also invalidate the entry in the directory's buffers.
6143  */
6144 void
6145 nfs_name_cache_purge(nfsnode_t dnp, nfsnode_t np, struct componentname *cnp, vfs_context_t ctx)
6146 {
6147         struct nfsmount *nmp = NFSTONMP(dnp);
6148
6149         cache_purge(NFSTOV(np));
6150         if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
6151                 nfs_dir_buf_cache_lookup(dnp, NULL, cnp, ctx, 1);
6152         }
6153 }
6154
6155 /*
6156  * NFS V3 readdir (plus) RPC.
6157  */
6158 int
6159 nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
6160 {
6161         struct nfsmount *nmp;
6162         int error = 0, lockerror, nfsvers, rdirplus, bigcookies;
6163         int i, status, attrflag, fhflag, more_entries = 1, eof, bp_dropped = 0;
6164         uint32_t nmreaddirsize, nmrsize;
6165         uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
6166         uint64_t cookie, lastcookie, xid, savedxid, fileno;
6167         struct nfsm_chain nmreq, nmrep, nmrepsave;
6168         fhandle_t fh;
6169         struct nfs_vattr *nvattrp;
6170         struct nfs_dir_buf_header *ndbhp;
6171         struct direntry *dp;
6172         char *padstart, padlen;
6173         struct timeval now;
6174
6175         nmp = NFSTONMP(dnp);
6176         if (nfs_mount_gone(nmp)) {
6177                 return ENXIO;
6178         }
6179         nfsvers = nmp->nm_vers;
6180         nmreaddirsize = nmp->nm_readdirsize;
6181         nmrsize = nmp->nm_rsize;
6182         bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
6183 noplus:
6184         rdirplus = ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) ? 1 : 0;
6185
6186         if ((lockerror = nfs_node_lock(dnp))) {
6187                 return lockerror;
6188         }
6189
6190         /* determine cookie to use, and move dp to the right offset */
6191         ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
6192         dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
6193         if (ndbhp->ndbh_count) {
6194                 for (i = 0; i < ndbhp->ndbh_count - 1; i++) {
6195                         dp = NFS_DIRENTRY_NEXT(dp);
6196                 }
6197                 cookie = dp->d_seekoff;
6198                 dp = NFS_DIRENTRY_NEXT(dp);
6199         } else {
6200                 cookie = bp->nb_lblkno;
6201                 /* increment with every buffer read */
6202                 OSAddAtomic64(1, &nfsstats.readdir_bios);
6203         }
6204         lastcookie = cookie;
6205
6206         /*
6207          * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
6208          * the buffer is full (or we hit EOF).  Then put the remainder of the
6209          * results in the next buffer(s).
6210          */
6211         nfsm_chain_null(&nmreq);
6212         nfsm_chain_null(&nmrep);
6213         while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
6214                 nfsm_chain_build_alloc_init(error, &nmreq,
6215                     NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers) + NFSX_UNSIGNED);
6216                 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
6217                 if (nfsvers == NFS_VER3) {
6218                         /* opaque values don't need swapping, but as long */
6219                         /* as we are consistent about it, it should be ok */
6220                         nfsm_chain_add_64(error, &nmreq, cookie);
6221                         nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
6222                 } else {
6223                         nfsm_chain_add_32(error, &nmreq, cookie);
6224                 }
6225                 nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
6226                 if (rdirplus) {
6227                         nfsm_chain_add_32(error, &nmreq, nmrsize);
6228                 }
6229                 nfsm_chain_build_done(error, &nmreq);
6230                 nfs_node_unlock(dnp);
6231                 lockerror = ENOENT;
6232                 nfsmout_if(error);
6233
6234                 error = nfs_request(dnp, NULL, &nmreq,
6235                     rdirplus ? NFSPROC_READDIRPLUS : NFSPROC_READDIR,
6236                     ctx, NULL, &nmrep, &xid, &status);
6237
6238                 if ((lockerror = nfs_node_lock(dnp))) {
6239                         error = lockerror;
6240                 }
6241
6242                 savedxid = xid;
6243                 if (nfsvers == NFS_VER3) {
6244                         nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
6245                 }
6246                 if (!error) {
6247                         error = status;
6248                 }
6249                 if (nfsvers == NFS_VER3) {
6250                         nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
6251                 }
6252                 nfsm_chain_get_32(error, &nmrep, more_entries);
6253
6254                 if (!lockerror) {
6255                         nfs_node_unlock(dnp);
6256                         lockerror = ENOENT;
6257                 }
6258                 if (error == NFSERR_NOTSUPP) {
6259                         /* oops... it doesn't look like readdirplus is supported */
6260                         lck_mtx_lock(&nmp->nm_lock);
6261                         NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
6262                         lck_mtx_unlock(&nmp->nm_lock);
6263                         goto noplus;
6264                 }
6265                 nfsmout_if(error);
6266
6267                 if (rdirplus) {
6268                         microuptime(&now);
6269                 }
6270
6271                 /* loop through the entries packing them into the buffer */
6272                 while (more_entries) {
6273                         if (nfsvers == NFS_VER3) {
6274                                 nfsm_chain_get_64(error, &nmrep, fileno);
6275                         } else {
6276                                 nfsm_chain_get_32(error, &nmrep, fileno);
6277                         }
6278                         nfsm_chain_get_32(error, &nmrep, namlen);
6279                         nfsmout_if(error);
6280                         /* just truncate names that don't fit in direntry.d_name */
6281                         if (namlen <= 0) {
6282                                 error = EBADRPC;
6283                                 goto nfsmout;
6284                         }
6285                         if (namlen > (sizeof(dp->d_name) - 1)) {
6286                                 skiplen = namlen - sizeof(dp->d_name) + 1;
6287                                 namlen = sizeof(dp->d_name) - 1;
6288                         } else {
6289                                 skiplen = 0;
6290                         }
6291                         /* guess that fh size will be same as parent */
6292                         fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
6293                         xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
6294                         attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
6295                         reclen = NFS_DIRENTRY_LEN(namlen + xlen);
6296                         space_needed = reclen + attrlen;
6297                         space_free = nfs_dir_buf_freespace(bp, rdirplus);
6298                         if (space_needed > space_free) {
6299                                 /*
6300                                  * We still have entries to pack, but we've
6301                                  * run out of room in the current buffer.
6302                                  * So we need to move to the next buffer.
6303                                  * The block# for the next buffer is the
6304                                  * last cookie in the current buffer.
6305                                  */
6306 nextbuffer:
6307                                 ndbhp->ndbh_flags |= NDB_FULL;
6308                                 nfs_buf_release(bp, 0);
6309                                 bp_dropped = 1;
6310                                 bp = NULL;
6311                                 error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
6312                                 nfsmout_if(error);
6313                                 /* initialize buffer */
6314                                 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
6315                                 ndbhp->ndbh_flags = 0;
6316                                 ndbhp->ndbh_count = 0;
6317                                 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
6318                                 ndbhp->ndbh_ncgen = dnp->n_ncgen;
6319                                 space_free = nfs_dir_buf_freespace(bp, rdirplus);
6320                                 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
6321                                 /* increment with every buffer read */
6322                                 OSAddAtomic64(1, &nfsstats.readdir_bios);
6323                         }
6324                         nmrepsave = nmrep;
6325                         dp->d_fileno = fileno;
6326                         dp->d_namlen = namlen;
6327                         dp->d_reclen = reclen;
6328                         dp->d_type = DT_UNKNOWN;
6329                         nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
6330                         nfsmout_if(error);
6331                         dp->d_name[namlen] = '\0';
6332                         if (skiplen) {
6333                                 nfsm_chain_adv(error, &nmrep,
6334                                     nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
6335                         }
6336                         if (nfsvers == NFS_VER3) {
6337                                 nfsm_chain_get_64(error, &nmrep, cookie);
6338                         } else {
6339                                 nfsm_chain_get_32(error, &nmrep, cookie);
6340                         }
6341                         nfsmout_if(error);
6342                         dp->d_seekoff = cookie;
6343                         if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
6344                                 /* we've got a big cookie, make sure flag is set */
6345                                 lck_mtx_lock(&nmp->nm_lock);
6346                                 nmp->nm_state |= NFSSTA_BIGCOOKIES;
6347                                 lck_mtx_unlock(&nmp->nm_lock);
6348                                 bigcookies = 1;
6349                         }
6350                         if (rdirplus) {
6351                                 nvattrp = NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count);
6352                                 /* check for attributes */
6353                                 nfsm_chain_get_32(error, &nmrep, attrflag);
6354                                 nfsmout_if(error);
6355                                 if (attrflag) {
6356                                         /* grab attributes */
6357                                         error = nfs_parsefattr(nmp, &nmrep, NFS_VER3, nvattrp);
6358                                         nfsmout_if(error);
6359                                         dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
6360                                         /* fileid is already in d_fileno, so stash xid in attrs */
6361                                         nvattrp->nva_fileid = savedxid;
6362                                 } else {
6363                                         /* mark the attributes invalid */
6364                                         bzero(nvattrp, sizeof(struct nfs_vattr));
6365                                 }
6366                                 /* check for file handle */
6367                                 nfsm_chain_get_32(error, &nmrep, fhflag);
6368                                 nfsmout_if(error);
6369                                 if (fhflag) {
6370                                         nfsm_chain_get_fh(error, &nmrep, NFS_VER3, &fh);
6371                                         nfsmout_if(error);
6372                                         fhlen = fh.fh_len + 1;
6373                                         xlen = fhlen + sizeof(time_t);
6374                                         reclen = NFS_DIRENTRY_LEN(namlen + xlen);
6375                                         space_needed = reclen + attrlen;
6376                                         if (space_needed > space_free) {
6377                                                 /* didn't actually have the room... move on to next buffer */
6378                                                 nmrep = nmrepsave;
6379                                                 goto nextbuffer;
6380                                         }
6381                                         /* pack the file handle into the record */
6382                                         dp->d_name[dp->d_namlen + 1] = fh.fh_len;
6383                                         bcopy(fh.fh_data, &dp->d_name[dp->d_namlen + 2], fh.fh_len);
6384                                 } else {
6385                                         /* mark the file handle invalid */
6386                                         fh.fh_len = 0;
6387                                         fhlen = fh.fh_len + 1;
6388                                         xlen = fhlen + sizeof(time_t);
6389                                         reclen = NFS_DIRENTRY_LEN(namlen + xlen);
6390                                         bzero(&dp->d_name[dp->d_namlen + 1], fhlen);
6391                                 }
6392                                 *(time_t*)(&dp->d_name[dp->d_namlen + 1 + fhlen]) = now.tv_sec;
6393                                 dp->d_reclen = reclen;
6394                         }
6395                         padstart = dp->d_name + dp->d_namlen + 1 + xlen;
6396                         ndbhp->ndbh_count++;
6397                         lastcookie = cookie;
6398                         /* advance to next direntry in buffer */
6399                         dp = NFS_DIRENTRY_NEXT(dp);
6400                         ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
6401                         /* zero out the pad bytes */
6402                         padlen = (char*)dp - padstart;
6403                         if (padlen > 0) {
6404                                 bzero(padstart, padlen);
6405                         }
6406                         /* check for more entries */
6407                         nfsm_chain_get_32(error, &nmrep, more_entries);
6408                         nfsmout_if(error);
6409                 }
6410                 /* Finally, get the eof boolean */
6411                 nfsm_chain_get_32(error, &nmrep, eof);
6412                 nfsmout_if(error);
6413                 if (eof) {
6414                         ndbhp->ndbh_flags |= (NDB_FULL | NDB_EOF);
6415                         nfs_node_lock_force(dnp);
6416                         dnp->n_eofcookie = lastcookie;
6417                         nfs_node_unlock(dnp);
6418                 } else {
6419                         more_entries = 1;
6420                 }
6421                 if (bp_dropped) {
6422                         nfs_buf_release(bp, 0);
6423                         bp = NULL;
6424                         break;
6425                 }
6426                 if ((lockerror = nfs_node_lock(dnp))) {
6427                         error = lockerror;
6428                 }
6429                 nfsmout_if(error);
6430                 nfsm_chain_cleanup(&nmrep);
6431                 nfsm_chain_null(&nmreq);
6432         }
6433 nfsmout:
6434         if (bp_dropped && bp) {
6435                 nfs_buf_release(bp, 0);
6436         }
6437         if (!lockerror) {
6438                 nfs_node_unlock(dnp);
6439         }
6440         nfsm_chain_cleanup(&nmreq);
6441         nfsm_chain_cleanup(&nmrep);
6442         return bp_dropped ? NFSERR_DIRBUFDROPPED : error;
6443 }
6444
6445 /*
6446  * Silly rename. To make the NFS filesystem that is stateless look a little
6447  * more like the "ufs" a remove of an active vnode is translated to a rename
6448  * to a funny looking filename that is removed by nfs_vnop_inactive on the
6449  * nfsnode. There is the potential for another process on a different client
6450  * to create the same funny name between when the lookitup() fails and the
6451  * rename() completes, but...
6452  */
6453
6454 /* format of "random" silly names - includes a number and pid */
6455 /* (note: shouldn't exceed size of nfs_sillyrename.nsr_name) */
6456 #define NFS_SILLYNAME_FORMAT ".nfs.%08x.%04x"
6457 /* starting from zero isn't silly enough */
6458 static uint32_t nfs_sillyrename_number = 0x20051025;
6459
6460 int
6461 nfs_sillyrename(
6462         nfsnode_t dnp,
6463         nfsnode_t np,
6464         struct componentname *cnp,
6465         vfs_context_t ctx)
6466 {
6467         struct nfs_sillyrename *nsp;
6468         int error;
6469         short pid;
6470         kauth_cred_t cred;
6471         uint32_t num;
6472         struct nfsmount *nmp;
6473
6474         nmp = NFSTONMP(dnp);
6475         if (nfs_mount_gone(nmp)) {
6476                 return ENXIO;
6477         }
6478
6479         nfs_name_cache_purge(dnp, np, cnp, ctx);
6480
6481         MALLOC_ZONE(nsp, struct nfs_sillyrename *,
6482             sizeof(struct nfs_sillyrename), M_NFSREQ, M_WAITOK);
6483         if (!nsp) {
6484                 return ENOMEM;
6485         }
6486         cred = vfs_context_ucred(ctx);
6487         kauth_cred_ref(cred);
6488         nsp->nsr_cred = cred;
6489         nsp->nsr_dnp = dnp;
6490         error = vnode_ref(NFSTOV(dnp));
6491         if (error) {
6492                 goto bad_norele;
6493         }
6494
6495         /* Fudge together a funny name */
6496         pid = vfs_context_pid(ctx);
6497         num = OSAddAtomic(1, &nfs_sillyrename_number);
6498         nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
6499             NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
6500         if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name)) {
6501                 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
6502         }
6503
6504         /* Try lookitups until we get one that isn't there */
6505         while (nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, NULL) == 0) {
6506                 num = OSAddAtomic(1, &nfs_sillyrename_number);
6507                 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
6508                     NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
6509                 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name)) {
6510                         nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
6511                 }
6512         }
6513
6514         /* now, do the rename */
6515         error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
6516             dnp, nsp->nsr_name, nsp->nsr_namlen, ctx);
6517
6518         /* Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */
6519         if (error == ENOENT) {
6520                 error = 0;
6521         }
6522         if (!error) {
6523                 nfs_node_lock_force(dnp);
6524                 if (dnp->n_flag & NNEGNCENTRIES) {
6525                         dnp->n_flag &= ~NNEGNCENTRIES;
6526                         cache_purge_negatives(NFSTOV(dnp));
6527                 }
6528                 nfs_node_unlock(dnp);
6529         }
6530         FSDBG(267, dnp, np, num, error);
6531         if (error) {
6532                 goto bad;
6533         }
6534         error = nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, &np);
6535         nfs_node_lock_force(np);
6536         np->n_sillyrename = nsp;
6537         nfs_node_unlock(np);
6538         return 0;
6539 bad:
6540         vnode_rele(NFSTOV(dnp));
6541 bad_norele:
6542         nsp->nsr_cred = NOCRED;
6543         kauth_cred_unref(&cred);
6544         FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ);
6545         return error;
6546 }
6547
6548 int
6549 nfs3_lookup_rpc_async(
6550         nfsnode_t dnp,
6551         char *name,
6552         int namelen,
6553         vfs_context_t ctx,
6554         struct nfsreq **reqp)
6555 {
6556         struct nfsmount *nmp;
6557         struct nfsm_chain nmreq;
6558         int error = 0, nfsvers;
6559
6560         nmp = NFSTONMP(dnp);
6561         if (nfs_mount_gone(nmp)) {
6562                 return ENXIO;
6563         }
6564         nfsvers = nmp->nm_vers;
6565
6566         nfsm_chain_null(&nmreq);
6567
6568         nfsm_chain_build_alloc_init(error, &nmreq,
6569             NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
6570         nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
6571         nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
6572         nfsm_chain_build_done(error, &nmreq);
6573         nfsmout_if(error);
6574         error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_LOOKUP,
6575             vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, reqp);
6576 nfsmout:
6577         nfsm_chain_cleanup(&nmreq);
6578         return error;
6579 }
6580
6581 int
6582 nfs3_lookup_rpc_async_finish(
6583         nfsnode_t dnp,
6584         __unused char *name,
6585         __unused int namelen,
6586         vfs_context_t ctx,
6587         struct nfsreq *req,
6588         u_int64_t *xidp,
6589         fhandle_t *fhp,
6590         struct nfs_vattr *nvap)
6591 {
6592         int error = 0, lockerror = ENOENT, status, nfsvers, attrflag;
6593         u_int64_t xid;
6594         struct nfsmount *nmp;
6595         struct nfsm_chain nmrep;
6596
6597         nmp = NFSTONMP(dnp);
6598         if (nmp == NULL) {
6599                 return ENXIO;
6600         }
6601         nfsvers = nmp->nm_vers;
6602
6603         nfsm_chain_null(&nmrep);
6604
6605         error = nfs_request_async_finish(req, &nmrep, xidp, &status);
6606
6607         if ((lockerror = nfs_node_lock(dnp))) {
6608                 error = lockerror;
6609         }
6610         xid = *xidp;
6611         if (error || status) {
6612                 if (nfsvers == NFS_VER3) {
6613                         nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
6614                 }
6615                 if (!error) {
6616                         error = status;
6617                 }
6618                 goto nfsmout;
6619         }
6620
6621         nfsmout_if(error || !fhp || !nvap);
6622
6623         /* get the file handle */
6624         nfsm_chain_get_fh(error, &nmrep, nfsvers, fhp);
6625
6626         /* get the attributes */
6627         if (nfsvers == NFS_VER3) {
6628                 nfsm_chain_postop_attr_get(nmp, error, &nmrep, attrflag, nvap);
6629                 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
6630                 if (!error && !attrflag) {
6631                         error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp);
6632                 }
6633         } else {
6634                 error = nfs_parsefattr(nmp, &nmrep, nfsvers, nvap);
6635         }
6636 nfsmout:
6637         if (!lockerror) {
6638                 nfs_node_unlock(dnp);
6639         }
6640         nfsm_chain_cleanup(&nmrep);
6641         return error;
6642 }
6643
6644 /*
6645  * Look up a file name and optionally either update the file handle or
6646  * allocate an nfsnode, depending on the value of npp.
6647  * npp == NULL  --> just do the lookup
6648  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
6649  *                      handled too
6650  * *npp != NULL --> update the file handle in the vnode
6651  */
6652 int
6653 nfs_lookitup(
6654         nfsnode_t dnp,
6655         char *name,
6656         int namelen,
6657         vfs_context_t ctx,
6658         nfsnode_t *npp)
6659 {
6660         int error = 0;
6661         nfsnode_t np, newnp = NULL;
6662         u_int64_t xid;
6663         fhandle_t fh;
6664         struct nfsmount *nmp;
6665         struct nfs_vattr nvattr;
6666         struct nfsreq rq, *req = &rq;
6667
6668         nmp = NFSTONMP(dnp);
6669         if (nfs_mount_gone(nmp)) {
6670                 return ENXIO;
6671         }
6672
6673         if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
6674             (namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
6675                 return ENAMETOOLONG;
6676         }
6677
6678         NVATTR_INIT(&nvattr);
6679
6680         /* check for lookup of "." */
6681         if ((name[0] == '.') && (namelen == 1)) {
6682                 /* skip lookup, we know who we are */
6683                 fh.fh_len = 0;
6684                 newnp = dnp;
6685                 goto nfsmout;
6686         }
6687
6688         error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, name, namelen, ctx, &req);
6689         nfsmout_if(error);
6690         error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, name, namelen, ctx, req, &xid, &fh, &nvattr);
6691         nfsmout_if(!npp || error);
6692
6693         if (*npp) {
6694                 np = *npp;
6695                 if (fh.fh_len != np->n_fhsize) {
6696                         u_char *oldbuf = (np->n_fhsize > NFS_SMALLFH) ? np->n_fhp : NULL;
6697                         if (fh.fh_len > NFS_SMALLFH) {
6698                                 MALLOC_ZONE(np->n_fhp, u_char *, fh.fh_len, M_NFSBIGFH, M_WAITOK);
6699                                 if (!np->n_fhp) {
6700                                         np->n_fhp = oldbuf;
6701                                         error = ENOMEM;
6702                                         goto nfsmout;
6703                                 }
6704                         } else {
6705                                 np->n_fhp = &np->n_fh[0];
6706                         }
6707                         if (oldbuf) {
6708                                 FREE_ZONE(oldbuf, np->n_fhsize, M_NFSBIGFH);
6709                         }
6710                 }
6711                 bcopy(fh.fh_data, np->n_fhp, fh.fh_len);
6712                 np->n_fhsize = fh.fh_len;
6713                 nfs_node_lock_force(np);
6714                 error = nfs_loadattrcache(np, &nvattr, &xid, 0);
6715                 nfs_node_unlock(np);
6716                 nfsmout_if(error);
6717                 newnp = np;
6718         } else if (NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) {
6719                 nfs_node_lock_force(dnp);
6720                 if (dnp->n_xid <= xid) {
6721                         error = nfs_loadattrcache(dnp, &nvattr, &xid, 0);
6722                 }
6723                 nfs_node_unlock(dnp);
6724                 nfsmout_if(error);
6725                 newnp = dnp;
6726         } else {
6727                 struct componentname cn, *cnp = &cn;
6728                 bzero(cnp, sizeof(*cnp));
6729                 cnp->cn_nameptr = name;
6730                 cnp->cn_namelen = namelen;
6731                 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
6732                     &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
6733                 nfsmout_if(error);
6734                 newnp = np;
6735         }
6736
6737 nfsmout:
6738         if (npp && !*npp && !error) {
6739                 *npp = newnp;
6740         }
6741         NVATTR_CLEANUP(&nvattr);
6742         return error;
6743 }
6744
6745 /*
6746  * set up and initialize a "._" file lookup structure used for
6747  * performing async lookups.
6748  */
6749 void
6750 nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen, vfs_context_t ctx)
6751 {
6752         int error, du_namelen;
6753         vnode_t du_vp;
6754         struct nfsmount *nmp = NFSTONMP(dnp);
6755
6756         /* check for ._ file in name cache */
6757         dulp->du_flags = 0;
6758         bzero(&dulp->du_cn, sizeof(dulp->du_cn));
6759         du_namelen = namelen + 2;
6760         if (!nmp || NMFLAG(nmp, NONEGNAMECACHE)) {
6761                 return;
6762         }
6763         if ((namelen >= 2) && (name[0] == '.') && (name[1] == '_')) {
6764                 return;
6765         }
6766         if (du_namelen >= (int)sizeof(dulp->du_smallname)) {
6767                 MALLOC(dulp->du_cn.cn_nameptr, char *, du_namelen + 1, M_TEMP, M_WAITOK);
6768         } else {
6769                 dulp->du_cn.cn_nameptr = dulp->du_smallname;
6770         }
6771         if (!dulp->du_cn.cn_nameptr) {
6772                 return;
6773         }
6774         dulp->du_cn.cn_namelen = du_namelen;
6775         snprintf(dulp->du_cn.cn_nameptr, du_namelen + 1, "._%s", name);
6776         dulp->du_cn.cn_nameptr[du_namelen] = '\0';
6777         dulp->du_cn.cn_nameiop = LOOKUP;
6778         dulp->du_cn.cn_flags = MAKEENTRY;
6779
6780         error = cache_lookup(NFSTOV(dnp), &du_vp, &dulp->du_cn);
6781         if (error == -1) {
6782                 vnode_put(du_vp);
6783         } else if (!error) {
6784                 nmp = NFSTONMP(dnp);
6785                 if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
6786                         /* if rdirplus, try dir buf cache lookup */
6787                         nfsnode_t du_np = NULL;
6788                         if (!nfs_dir_buf_cache_lookup(dnp, &du_np, &dulp->du_cn, ctx, 0) && du_np) {
6789                                 /* dir buf cache hit */
6790                                 du_vp = NFSTOV(du_np);
6791                                 vnode_put(du_vp);
6792                                 error = -1;
6793                         }
6794                 }
6795                 if (!error) {
6796                         dulp->du_flags |= NFS_DULOOKUP_DOIT;
6797                 }
6798         }
6799 }
6800
6801 /*
6802  * start an async "._" file lookup request
6803  */
6804 void
6805 nfs_dulookup_start(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
6806 {
6807         struct nfsmount *nmp = NFSTONMP(dnp);
6808         struct nfsreq *req = &dulp->du_req;
6809
6810         if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT) || (dulp->du_flags & NFS_DULOOKUP_INPROG)) {
6811                 return;
6812         }
6813         if (!nmp->nm_funcs->nf_lookup_rpc_async(dnp, dulp->du_cn.cn_nameptr,
6814             dulp->du_cn.cn_namelen, ctx, &req)) {
6815                 dulp->du_flags |= NFS_DULOOKUP_INPROG;
6816         }
6817 }
6818
6819 /*
6820  * finish an async "._" file lookup request and clean up the structure
6821  */
6822 void
6823 nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
6824 {
6825         struct nfsmount *nmp = NFSTONMP(dnp);
6826         int error;
6827         nfsnode_t du_np;
6828         u_int64_t xid;
6829         fhandle_t fh;
6830         struct nfs_vattr nvattr;
6831
6832         if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_INPROG)) {
6833                 goto out;
6834         }
6835
6836         NVATTR_INIT(&nvattr);
6837         error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, dulp->du_cn.cn_nameptr,
6838             dulp->du_cn.cn_namelen, ctx, &dulp->du_req, &xid, &fh, &nvattr);
6839         dulp->du_flags &= ~NFS_DULOOKUP_INPROG;
6840         if (error == ENOENT) {
6841                 /* add a negative entry in the name cache */
6842                 nfs_node_lock_force(dnp);
6843                 cache_enter(NFSTOV(dnp), NULL, &dulp->du_cn);
6844                 dnp->n_flag |= NNEGNCENTRIES;
6845                 nfs_node_unlock(dnp);
6846         } else if (!error) {
6847                 error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len,
6848                     &nvattr, &xid, dulp->du_req.r_auth, NG_MAKEENTRY, &du_np);
6849                 if (!error) {
6850                         nfs_node_unlock(du_np);
6851                         vnode_put(NFSTOV(du_np));
6852                 }
6853         }
6854         NVATTR_CLEANUP(&nvattr);
6855 out:
6856         if (dulp->du_flags & NFS_DULOOKUP_INPROG) {
6857                 nfs_request_async_cancel(&dulp->du_req);
6858         }
6859         if (dulp->du_cn.cn_nameptr && (dulp->du_cn.cn_nameptr != dulp->du_smallname)) {
6860                 FREE(dulp->du_cn.cn_nameptr, M_TEMP);
6861         }
6862 }
6863
6864
6865 /*
6866  * NFS Version 3 commit RPC
6867  */
6868 int
6869 nfs3_commit_rpc(
6870         nfsnode_t np,
6871         uint64_t offset,
6872         uint64_t count,
6873         kauth_cred_t cred,
6874         uint64_t wverf)
6875 {
6876         struct nfsmount *nmp;
6877         int error = 0, lockerror, status, wccpostattr = 0, nfsvers;
6878         struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
6879         u_int64_t xid, newwverf;
6880         uint32_t count32;
6881         struct nfsm_chain nmreq, nmrep;
6882
6883         nmp = NFSTONMP(np);
6884         FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
6885         if (nfs_mount_gone(nmp)) {
6886                 return ENXIO;
6887         }
6888         if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
6889                 return 0;
6890         }
6891         nfsvers = nmp->nm_vers;
6892
6893         if (count > UINT32_MAX) {
6894                 count32 = 0;
6895         } else {
6896                 count32 = count;
6897         }
6898
6899         nfsm_chain_null(&nmreq);
6900         nfsm_chain_null(&nmrep);
6901
6902         nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
6903         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6904         nfsm_chain_add_64(error, &nmreq, offset);
6905         nfsm_chain_add_32(error, &nmreq, count32);
6906         nfsm_chain_build_done(error, &nmreq);
6907         nfsmout_if(error);
6908         error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT,
6909             current_thread(), cred, NULL, 0, &nmrep, &xid, &status);
6910         if ((lockerror = nfs_node_lock(np))) {
6911                 error = lockerror;
6912         }
6913         /* can we do anything useful with the wcc info? */
6914         nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
6915         if (!lockerror) {
6916                 nfs_node_unlock(np);
6917         }
6918         if (!error) {
6919                 error = status;
6920         }
6921         nfsm_chain_get_64(error, &nmrep, newwverf);
6922         nfsmout_if(error);
6923         lck_mtx_lock(&nmp->nm_lock);
6924         if (nmp->nm_verf != newwverf) {
6925                 nmp->nm_verf = newwverf;
6926         }
6927         if (wverf != newwverf) {
6928                 error = NFSERR_STALEWRITEVERF;
6929         }
6930         lck_mtx_unlock(&nmp->nm_lock);
6931 nfsmout:
6932         nfsm_chain_cleanup(&nmreq);
6933         nfsm_chain_cleanup(&nmrep);
6934         return error;
6935 }
6936
6937
6938 int
6939 nfs_vnop_blockmap(
6940         __unused struct vnop_blockmap_args /* {
6941                                             *  struct vnodeop_desc *a_desc;
6942                                             *  vnode_t a_vp;
6943                                             *  off_t a_foffset;
6944                                             *  size_t a_size;
6945                                             *  daddr64_t *a_bpn;
6946                                             *  size_t *a_run;
6947                                             *  void *a_poff;
6948                                             *  int a_flags;
6949                                             *  } */*ap)
6950 {
6951         return ENOTSUP;
6952 }
6953
6954
6955 /*
6956  * fsync vnode op. Just call nfs_flush().
6957  */
6958 /* ARGSUSED */
6959 int
6960 nfs_vnop_fsync(
6961         struct vnop_fsync_args /* {
6962                                 *  struct vnodeop_desc *a_desc;
6963                                 *  vnode_t a_vp;
6964                                 *  int a_waitfor;
6965                                 *  vfs_context_t a_context;
6966                                 *  } */*ap)
6967 {
6968         return nfs_flush(VTONFS(ap->a_vp), ap->a_waitfor, vfs_context_thread(ap->a_context), 0);
6969 }
6970
6971
6972 /*
6973  * Do an NFS pathconf RPC.
6974  */
6975 int
6976 nfs3_pathconf_rpc(
6977         nfsnode_t np,
6978         struct nfs_fsattr *nfsap,
6979         vfs_context_t ctx)
6980 {
6981         u_int64_t xid;
6982         int error = 0, lockerror, status, nfsvers;
6983         struct nfsm_chain nmreq, nmrep;
6984         struct nfsmount *nmp = NFSTONMP(np);
6985         uint32_t val = 0;
6986
6987         if (nfs_mount_gone(nmp)) {
6988                 return ENXIO;
6989         }
6990         nfsvers = nmp->nm_vers;
6991
6992         nfsm_chain_null(&nmreq);
6993         nfsm_chain_null(&nmrep);
6994
6995         /* fetch pathconf info from server */
6996         nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
6997         nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6998         nfsm_chain_build_done(error, &nmreq);
6999         nfsmout_if(error);
7000         error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx, NULL, &nmrep, &xid, &status);
7001         if ((lockerror = nfs_node_lock(np))) {
7002                 error = lockerror;
7003         }
7004         nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
7005         if (!lockerror) {
7006                 nfs_node_unlock(np);
7007         }
7008         if (!error) {
7009                 error = status;
7010         }
7011         nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink);
7012         nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxname);
7013         nfsap->nfsa_flags &= ~(NFS_FSFLAG_NO_TRUNC | NFS_FSFLAG_CHOWN_RESTRICTED | NFS_FSFLAG_CASE_INSENSITIVE | NFS_FSFLAG_CASE_PRESERVING);
7014         nfsm_chain_get_32(error, &nmrep, val);
7015         if (val) {
7016                 nfsap->nfsa_flags |= NFS_FSFLAG_NO_TRUNC;
7017         }
7018         nfsm_chain_get_32(error, &nmrep, val);
7019         if (val) {
7020                 nfsap->nfsa_flags |= NFS_FSFLAG_CHOWN_RESTRICTED;
7021         }
7022         nfsm_chain_get_32(error, &nmrep, val);
7023         if (val) {
7024                 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_INSENSITIVE;
7025         }
7026         nfsm_chain_get_32(error, &nmrep, val);
7027         if (val) {
7028                 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_PRESERVING;
7029         }
7030         NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK);
7031         NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME);
7032         NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC);
7033         NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
7034         NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
7035         NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
7036 nfsmout:
7037         nfsm_chain_cleanup(&nmreq);
7038         nfsm_chain_cleanup(&nmrep);
7039         return error;
7040 }
7041
7042 /* save pathconf info for NFSv3 mount */
7043 void
7044 nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap)
7045 {
7046         nmp->nm_fsattr.nfsa_maxlink = nfsap->nfsa_maxlink;
7047         nmp->nm_fsattr.nfsa_maxname = nfsap->nfsa_maxname;
7048         nmp->nm_fsattr.nfsa_flags &= ~(NFS_FSFLAG_NO_TRUNC | NFS_FSFLAG_CHOWN_RESTRICTED | NFS_FSFLAG_CASE_INSENSITIVE | NFS_FSFLAG_CASE_PRESERVING);
7049         nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC;
7050         nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED;
7051         nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE;
7052         nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING;
7053         NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXLINK);
7054         NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
7055         NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_NO_TRUNC);
7056         NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
7057         NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
7058         NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
7059         nmp->nm_state |= NFSSTA_GOTPATHCONF;
7060 }
7061
7062 /*
7063  * Return POSIX pathconf information applicable to nfs.
7064  *
7065  * The NFS V2 protocol doesn't support this, so just return EINVAL
7066  * for V2.
7067  */
7068 /* ARGSUSED */
7069 int
7070 nfs_vnop_pathconf(
7071         struct vnop_pathconf_args /* {
7072                                    *  struct vnodeop_desc *a_desc;
7073                                    *  vnode_t a_vp;
7074                                    *  int a_name;
7075                                    *  int32_t *a_retval;
7076                                    *  vfs_context_t a_context;
7077                                    *  } */*ap)
7078 {
7079         vnode_t vp = ap->a_vp;
7080         nfsnode_t np = VTONFS(vp);
7081         struct nfsmount *nmp;
7082         struct nfs_fsattr nfsa, *nfsap;
7083         int error = 0;
7084         uint64_t maxFileSize;
7085         uint nbits;
7086
7087         nmp = VTONMP(vp);
7088         if (nfs_mount_gone(nmp)) {
7089                 return ENXIO;
7090         }
7091
7092         switch (ap->a_name) {
7093         case _PC_LINK_MAX:
7094         case _PC_NAME_MAX:
7095         case _PC_CHOWN_RESTRICTED:
7096         case _PC_NO_TRUNC:
7097         case _PC_CASE_SENSITIVE:
7098         case _PC_CASE_PRESERVING:
7099                 break;
7100         case _PC_FILESIZEBITS:
7101                 if (nmp->nm_vers == NFS_VER2) {
7102                         *ap->a_retval = 32;
7103                         return 0;
7104                 }
7105                 break;
7106         case _PC_XATTR_SIZE_BITS:
7107                 /* Do we support xattrs natively? */
7108                 if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR) {
7109                         break;  /* Yes */
7110                 }
7111         /* No... so just return an error */
7112         /* FALLTHROUGH */
7113         default:
7114                 /* don't bother contacting the server if we know the answer */
7115                 return EINVAL;
7116         }
7117
7118         if (nmp->nm_vers == NFS_VER2) {
7119                 return EINVAL;
7120         }
7121
7122         lck_mtx_lock(&nmp->nm_lock);
7123         if (nmp->nm_vers == NFS_VER3) {
7124                 if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) {
7125                         /* no pathconf info cached */
7126                         lck_mtx_unlock(&nmp->nm_lock);
7127                         NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
7128                         error = nfs3_pathconf_rpc(np, &nfsa, ap->a_context);
7129                         if (error) {
7130                                 return error;
7131                         }
7132                         nmp = VTONMP(vp);
7133                         if (nfs_mount_gone(nmp)) {
7134                                 return ENXIO;
7135                         }
7136                         lck_mtx_lock(&nmp->nm_lock);
7137                         if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS) {
7138                                 /* all files have the same pathconf info, */
7139                                 /* so cache a copy of the results */
7140                                 nfs3_pathconf_cache(nmp, &nfsa);
7141                         }
7142                         nfsap = &nfsa;
7143                 } else {
7144                         nfsap = &nmp->nm_fsattr;
7145                 }
7146         }
7147 #if CONFIG_NFS4
7148         else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
7149                 /* no pathconf info cached */
7150                 lck_mtx_unlock(&nmp->nm_lock);
7151                 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
7152                 error = nfs4_pathconf_rpc(np, &nfsa, ap->a_context);
7153                 if (error) {
7154                         return error;
7155                 }
7156                 nmp = VTONMP(vp);
7157                 if (nfs_mount_gone(nmp)) {
7158                         return ENXIO;
7159                 }
7160                 lck_mtx_lock(&nmp->nm_lock);
7161                 nfsap = &nfsa;
7162         }
7163 #endif
7164         else {
7165                 nfsap = &nmp->nm_fsattr;
7166         }
7167         switch (ap->a_name) {
7168         case _PC_LINK_MAX:
7169                 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK)) {
7170                         *ap->a_retval = nfsap->nfsa_maxlink;
7171 #if CONFIG_NFS4
7172                 } else if ((nmp->nm_vers == NFS_VER4) && NFS_BITMAP_ISSET(np->n_vattr.nva_bitmap, NFS_FATTR_MAXLINK)) {
7173                         *ap->a_retval = np->n_vattr.nva_maxlink;
7174 #endif
7175                 } else {
7176                         error = EINVAL;
7177                 }
7178                 break;
7179         case _PC_NAME_MAX:
7180                 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME)) {
7181                         *ap->a_retval = nfsap->nfsa_maxname;
7182                 } else {
7183                         error = EINVAL;
7184                 }
7185                 break;
7186         case _PC_CHOWN_RESTRICTED:
7187                 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED)) {
7188                         *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0;
7189                 } else {
7190                         error = EINVAL;
7191                 }
7192                 break;
7193         case _PC_NO_TRUNC:
7194                 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC)) {
7195                         *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0;
7196                 } else {
7197                         error = EINVAL;
7198                 }
7199                 break;
7200         case _PC_CASE_SENSITIVE:
7201                 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) {
7202                         *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1;
7203                 } else {
7204                         error = EINVAL;
7205                 }
7206                 break;
7207         case _PC_CASE_PRESERVING:
7208                 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING)) {
7209                         *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0;
7210                 } else {
7211                         error = EINVAL;
7212                 }
7213                 break;
7214         case _PC_XATTR_SIZE_BITS: /* same as file size bits if named attrs supported */
7215         case _PC_FILESIZEBITS:
7216                 if (!NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
7217                         *ap->a_retval = 64;
7218                         error = 0;
7219                         break;
7220                 }
7221                 maxFileSize = nfsap->nfsa_maxfilesize;
7222                 nbits = 1;
7223                 if (maxFileSize & 0xffffffff00000000ULL) {
7224                         nbits += 32;
7225                         maxFileSize >>= 32;
7226                 }
7227                 if (maxFileSize & 0xffff0000) {
7228                         nbits += 16;
7229                         maxFileSize >>= 16;
7230                 }
7231                 if (maxFileSize & 0xff00) {
7232                         nbits += 8;
7233                         maxFileSize >>= 8;
7234                 }
7235                 if (maxFileSize & 0xf0) {
7236                         nbits += 4;
7237                         maxFileSize >>= 4;
7238                 }
7239                 if (maxFileSize & 0xc) {
7240                         nbits += 2;
7241                         maxFileSize >>= 2;
7242                 }
7243                 if (maxFileSize & 0x2) {
7244                         nbits += 1;
7245                 }
7246                 *ap->a_retval = nbits;
7247                 break;
7248         default:
7249                 error = EINVAL;
7250         }
7251
7252         lck_mtx_unlock(&nmp->nm_lock);
7253
7254         return error;
7255 }
7256
7257 /*
7258  * Read wrapper for special devices.
7259  */
7260 int
7261 nfsspec_vnop_read(
7262         struct vnop_read_args /* {
7263                                *  struct vnodeop_desc *a_desc;
7264                                *  vnode_t a_vp;
7265                                *  struct uio *a_uio;
7266                                *  int a_ioflag;
7267                                *  vfs_context_t a_context;
7268                                *  } */*ap)
7269 {
7270         nfsnode_t np = VTONFS(ap->a_vp);
7271         struct timespec now;
7272         int error;
7273
7274         /*
7275          * Set access flag.
7276          */
7277         if ((error = nfs_node_lock(np))) {
7278                 return error;
7279         }
7280         np->n_flag |= NACC;
7281         nanotime(&now);
7282         np->n_atim.tv_sec = now.tv_sec;
7283         np->n_atim.tv_nsec = now.tv_nsec;
7284         nfs_node_unlock(np);
7285         return VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap);
7286 }
7287
7288 /*
7289  * Write wrapper for special devices.
7290  */
7291 int
7292 nfsspec_vnop_write(
7293         struct vnop_write_args /* {
7294                                 *  struct vnodeop_desc *a_desc;
7295                                 *  vnode_t a_vp;
7296                                 *  struct uio *a_uio;
7297                                 *  int a_ioflag;
7298                                 *  vfs_context_t a_context;
7299                                 *  } */*ap)
7300 {
7301         nfsnode_t np = VTONFS(ap->a_vp);
7302         struct timespec now;
7303         int error;
7304
7305         /*
7306          * Set update flag.
7307          */
7308         if ((error = nfs_node_lock(np))) {
7309                 return error;
7310         }
7311         np->n_flag |= NUPD;
7312         nanotime(&now);
7313         np->n_mtim.tv_sec = now.tv_sec;
7314         np->n_mtim.tv_nsec = now.tv_nsec;
7315         nfs_node_unlock(np);
7316         return VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap);
7317 }
7318
7319 /*
7320  * Close wrapper for special devices.
7321  *
7322  * Update the times on the nfsnode then do device close.
7323  */
7324 int
7325 nfsspec_vnop_close(
7326         struct vnop_close_args /* {
7327                                 *  struct vnodeop_desc *a_desc;
7328                                 *  vnode_t a_vp;
7329                                 *  int a_fflag;
7330                                 *  vfs_context_t a_context;
7331                                 *  } */*ap)
7332 {
7333         vnode_t vp = ap->a_vp;
7334         nfsnode_t np = VTONFS(vp);
7335         struct vnode_attr vattr;
7336         mount_t mp;
7337         int error;
7338
7339         if ((error = nfs_node_lock(np))) {
7340                 return error;
7341         }
7342         if (np->n_flag & (NACC | NUPD)) {
7343                 np->n_flag |= NCHG;
7344                 if (!vnode_isinuse(vp, 0) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
7345                         VATTR_INIT(&vattr);
7346                         if (np->n_flag & NACC) {
7347                                 vattr.va_access_time = np->n_atim;
7348                                 VATTR_SET_ACTIVE(&vattr, va_access_time);
7349                         }
7350                         if (np->n_flag & NUPD) {
7351                                 vattr.va_modify_time = np->n_mtim;
7352                                 VATTR_SET_ACTIVE(&vattr, va_modify_time);
7353                         }
7354                         nfs_node_unlock(np);
7355                         vnode_setattr(vp, &vattr, ap->a_context);
7356                 } else {
7357                         nfs_node_unlock(np);
7358                 }
7359         } else {
7360                 nfs_node_unlock(np);
7361         }
7362         return VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap);
7363 }
7364
7365 #if FIFO
7366 extern vnop_t **fifo_vnodeop_p;
7367
7368 /*
7369  * Read wrapper for fifos.
7370  */
7371 int
7372 nfsfifo_vnop_read(
7373         struct vnop_read_args /* {
7374                                *  struct vnodeop_desc *a_desc;
7375                                *  vnode_t a_vp;
7376                                *  struct uio *a_uio;
7377                                *  int a_ioflag;
7378                                *  vfs_context_t a_context;
7379                                *  } */*ap)
7380 {
7381         nfsnode_t np = VTONFS(ap->a_vp);
7382         struct timespec now;
7383         int error;
7384
7385         /*
7386          * Set access flag.
7387          */
7388         if ((error = nfs_node_lock(np))) {
7389                 return error;
7390         }
7391         np->n_flag |= NACC;
7392         nanotime(&now);
7393         np->n_atim.tv_sec = now.tv_sec;
7394         np->n_atim.tv_nsec = now.tv_nsec;
7395         nfs_node_unlock(np);
7396         return VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap);
7397 }
7398
7399 /*
7400  * Write wrapper for fifos.
7401  */
7402 int
7403 nfsfifo_vnop_write(
7404         struct vnop_write_args /* {
7405                                 *  struct vnodeop_desc *a_desc;
7406                                 *  vnode_t a_vp;
7407                                 *  struct uio *a_uio;
7408                                 *  int a_ioflag;
7409                                 *  vfs_context_t a_context;
7410                                 *  } */*ap)
7411 {
7412         nfsnode_t np = VTONFS(ap->a_vp);
7413         struct timespec now;
7414         int error;
7415
7416         /*
7417          * Set update flag.
7418          */
7419         if ((error = nfs_node_lock(np))) {
7420                 return error;
7421         }
7422         np->n_flag |= NUPD;
7423         nanotime(&now);
7424         np->n_mtim.tv_sec = now.tv_sec;
7425         np->n_mtim.tv_nsec = now.tv_nsec;
7426         nfs_node_unlock(np);
7427         return VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap);
7428 }
7429
7430 /*
7431  * Close wrapper for fifos.
7432  *
7433  * Update the times on the nfsnode then do fifo close.
7434  */
7435 int
7436 nfsfifo_vnop_close(
7437         struct vnop_close_args /* {
7438                                 *  struct vnodeop_desc *a_desc;
7439                                 *  vnode_t a_vp;
7440                                 *  int a_fflag;
7441                                 *  vfs_context_t a_context;
7442                                 *  } */*ap)
7443 {
7444         vnode_t vp = ap->a_vp;
7445         nfsnode_t np = VTONFS(vp);
7446         struct vnode_attr vattr;
7447         struct timespec now;
7448         mount_t mp;
7449         int error;
7450
7451         if ((error = nfs_node_lock(np))) {
7452                 return error;
7453         }
7454         if (np->n_flag & (NACC | NUPD)) {
7455                 nanotime(&now);
7456                 if (np->n_flag & NACC) {
7457                         np->n_atim.tv_sec = now.tv_sec;
7458                         np->n_atim.tv_nsec = now.tv_nsec;
7459                 }
7460                 if (np->n_flag & NUPD) {
7461                         np->n_mtim.tv_sec = now.tv_sec;
7462                         np->n_mtim.tv_nsec = now.tv_nsec;
7463                 }
7464                 np->n_flag |= NCHG;
7465                 if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
7466                         VATTR_INIT(&vattr);
7467                         if (np->n_flag & NACC) {
7468                                 vattr.va_access_time = np->n_atim;
7469                                 VATTR_SET_ACTIVE(&vattr, va_access_time);
7470                         }
7471                         if (np->n_flag & NUPD) {
7472                                 vattr.va_modify_time = np->n_mtim;
7473                                 VATTR_SET_ACTIVE(&vattr, va_modify_time);
7474                         }
7475                         nfs_node_unlock(np);
7476                         vnode_setattr(vp, &vattr, ap->a_context);
7477                 } else {
7478                         nfs_node_unlock(np);
7479                 }
7480         } else {
7481                 nfs_node_unlock(np);
7482         }
7483         return VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap);
7484 }
7485 #endif /* FIFO */
7486
7487 /*ARGSUSED*/
7488 int
7489 nfs_vnop_ioctl(
7490         struct vnop_ioctl_args /* {
7491                                 *  struct vnodeop_desc *a_desc;
7492                                 *  vnode_t a_vp;
7493                                 *  u_int32_t a_command;
7494                                 *  caddr_t a_data;
7495                                 *  int a_fflag;
7496                                 *  vfs_context_t a_context;
7497                                 *  } */*ap)
7498 {
7499         vfs_context_t ctx = ap->a_context;
7500         vnode_t vp = ap->a_vp;
7501         struct nfsmount *mp = VTONMP(vp);
7502         int error = ENOTTY;
7503 #if CONFIG_NFS_GSS
7504         struct user_nfs_gss_principal gprinc = {};
7505         uint32_t len;
7506 #endif
7507
7508         if (mp == NULL) {
7509                 return ENXIO;
7510         }
7511         switch (ap->a_command) {
7512         case F_FULLFSYNC:
7513                 if (vnode_vfsisrdonly(vp)) {
7514                         return EROFS;
7515                 }
7516                 error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0);
7517                 break;
7518 #if CONFIG_NFS_GSS
7519         case NFS_IOC_DESTROY_CRED:
7520                 if (!auth_is_kerberized(mp->nm_auth)) {
7521                         return ENOTSUP;
7522                 }
7523                 error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx));
7524                 break;
7525         case NFS_IOC_SET_CRED:
7526         case NFS_IOC_SET_CRED64:
7527                 if (!auth_is_kerberized(mp->nm_auth)) {
7528                         return ENOTSUP;
7529                 }
7530                 if ((ap->a_command == NFS_IOC_SET_CRED && vfs_context_is64bit(ctx)) ||
7531                     (ap->a_command == NFS_IOC_SET_CRED64 && !vfs_context_is64bit(ctx))) {
7532                         return EINVAL;
7533                 }
7534                 if (vfs_context_is64bit(ctx)) {
7535                         gprinc = *(struct user_nfs_gss_principal *)ap->a_data;
7536                 } else {
7537                         struct nfs_gss_principal *tp;
7538                         tp = (struct nfs_gss_principal *)ap->a_data;
7539                         gprinc.princlen = tp->princlen;
7540                         gprinc.nametype = tp->nametype;
7541                         gprinc.principal = CAST_USER_ADDR_T(tp->principal);
7542                 }
7543                 NFS_DBG(NFS_FAC_GSS, 7, "Enter NFS_FSCTL_SET_CRED (64-bit=%d): principal length %d name type %d usr pointer 0x%llx\n", vfs_context_is64bit(ctx), gprinc.princlen, gprinc.nametype, (unsigned long long)gprinc.principal);
7544                 if (gprinc.princlen > MAXPATHLEN) {
7545                         return EINVAL;
7546                 }
7547                 uint8_t *p;
7548                 MALLOC(p, uint8_t *, gprinc.princlen + 1, M_TEMP, M_WAITOK | M_ZERO);
7549                 if (p == NULL) {
7550                         return ENOMEM;
7551                 }
7552                 error = copyin(gprinc.principal, p, gprinc.princlen);
7553                 if (error) {
7554                         NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_SET_CRED could not copy in princiapl data of len %d: %d\n",
7555                             gprinc.princlen, error);
7556                         FREE(p, M_TEMP);
7557                         return error;
7558                 }
7559                 NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s\n", p);
7560                 error = nfs_gss_clnt_ctx_set_principal(mp, ctx, p, gprinc.princlen, gprinc.nametype);
7561                 NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s returned %d\n", p, error);
7562                 FREE(p, M_TEMP);
7563                 break;
7564         case NFS_IOC_GET_CRED:
7565         case NFS_IOC_GET_CRED64:
7566                 if (!auth_is_kerberized(mp->nm_auth)) {
7567                         return ENOTSUP;
7568                 }
7569                 if ((ap->a_command == NFS_IOC_GET_CRED && vfs_context_is64bit(ctx)) ||
7570                     (ap->a_command == NFS_IOC_GET_CRED64 && !vfs_context_is64bit(ctx))) {
7571                         return EINVAL;
7572                 }
7573                 error = nfs_gss_clnt_ctx_get_principal(mp, ctx, &gprinc);
7574                 if (error) {
7575                         break;
7576                 }
7577                 if (vfs_context_is64bit(ctx)) {
7578                         struct user_nfs_gss_principal *upp = (struct user_nfs_gss_principal *)ap->a_data;
7579                         len = upp->princlen;
7580                         if (gprinc.princlen < len) {
7581                                 len = gprinc.princlen;
7582                         }
7583                         upp->princlen = gprinc.princlen;
7584                         upp->nametype = gprinc.nametype;
7585                         upp->flags = gprinc.flags;
7586                         if (gprinc.principal) {
7587                                 error = copyout((void *)gprinc.principal, upp->principal, len);
7588                         } else {
7589                                 upp->principal = USER_ADDR_NULL;
7590                         }
7591                 } else {
7592                         struct nfs_gss_principal *u32pp = (struct nfs_gss_principal *)ap->a_data;
7593                         len = u32pp->princlen;
7594                         if (gprinc.princlen < len) {
7595                                 len = gprinc.princlen;
7596                         }
7597                         u32pp->princlen = gprinc.princlen;
7598                         u32pp->nametype = gprinc.nametype;
7599                         u32pp->flags = gprinc.flags;
7600                         if (gprinc.principal) {
7601                                 error = copyout((void *)gprinc.principal, u32pp->principal, len);
7602                         } else {
7603                                 u32pp->principal = (user32_addr_t)0;
7604                         }
7605                 }
7606                 if (error) {
7607                         NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_GET_CRED could not copy out princiapl data of len %d: %d\n",
7608                             gprinc.princlen, error);
7609                 }
7610                 if (gprinc.principal) {
7611                         FREE(gprinc.principal, M_TEMP);
7612                 }
7613 #endif /* CONFIG_NFS_GSS */
7614         }
7615
7616         return error;
7617 }
7618
7619 /*ARGSUSED*/
7620 int
7621 nfs_vnop_select(
7622         __unused struct vnop_select_args /* {
7623                                           *  struct vnodeop_desc *a_desc;
7624                                           *  vnode_t a_vp;
7625                                           *  int a_which;
7626                                           *  int a_fflags;
7627                                           *  void *a_wql;
7628                                           *  vfs_context_t a_context;
7629                                           *  } */*ap)
7630 {
7631         /*
7632          * We were once bogusly seltrue() which returns 1.  Is this right?
7633          */
7634         return 1;
7635 }
7636
7637 /*
7638  * vnode OP for pagein using UPL
7639  *
7640  * No buffer I/O, just RPCs straight into the mapped pages.
7641  */
7642 int
7643 nfs_vnop_pagein(
7644         struct vnop_pagein_args /* {
7645                                  *  struct vnodeop_desc *a_desc;
7646                                  *  vnode_t a_vp;
7647                                  *  upl_t a_pl;
7648                                  *  vm_offset_t a_pl_offset;
7649                                  *  off_t a_f_offset;
7650                                  *  size_t a_size;
7651                                  *  int a_flags;
7652                                  *  vfs_context_t a_context;
7653                                  *  } */*ap)
7654 {
7655         vnode_t vp = ap->a_vp;
7656         upl_t pl = ap->a_pl;
7657         size_t size = ap->a_size;
7658         off_t f_offset = ap->a_f_offset;
7659         vm_offset_t pl_offset = ap->a_pl_offset;
7660         int flags = ap->a_flags;
7661         thread_t thd;
7662         kauth_cred_t cred;
7663         nfsnode_t np = VTONFS(vp);
7664         size_t nmrsize, iosize, txsize, rxsize, retsize;
7665         off_t txoffset;
7666         struct nfsmount *nmp;
7667         int error = 0;
7668         vm_offset_t ioaddr, rxaddr;
7669         uio_t uio;
7670         char uio_buf[UIO_SIZEOF(1)];
7671         int nofreeupl = flags & UPL_NOCOMMIT;
7672         upl_page_info_t *plinfo;
7673 #define MAXPAGINGREQS   16      /* max outstanding RPCs for pagein/pageout */
7674         struct nfsreq *req[MAXPAGINGREQS];
7675         int nextsend, nextwait;
7676 #if CONFIG_NFS4
7677         uint32_t stategenid = 0;
7678 #endif
7679         uint32_t restart = 0;
7680         kern_return_t kret;
7681
7682         FSDBG(322, np, f_offset, size, flags);
7683         if (pl == (upl_t)NULL) {
7684                 panic("nfs_pagein: no upl");
7685         }
7686
7687         if (size <= 0) {
7688                 printf("nfs_pagein: invalid size %ld", size);
7689                 if (!nofreeupl) {
7690                         (void) ubc_upl_abort_range(pl, pl_offset, size, 0);
7691                 }
7692                 return EINVAL;
7693         }
7694         if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) {
7695                 if (!nofreeupl) {
7696                         ubc_upl_abort_range(pl, pl_offset, size,
7697                             UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
7698                 }
7699                 return EINVAL;
7700         }
7701
7702         thd = vfs_context_thread(ap->a_context);
7703         cred = ubc_getcred(vp);
7704         if (!IS_VALID_CRED(cred)) {
7705                 cred = vfs_context_ucred(ap->a_context);
7706         }
7707
7708         uio = uio_createwithbuffer(1, f_offset, UIO_SYSSPACE, UIO_READ,
7709             &uio_buf, sizeof(uio_buf));
7710
7711         nmp = VTONMP(vp);
7712         if (nfs_mount_gone(nmp)) {
7713                 if (!nofreeupl) {
7714                         ubc_upl_abort_range(pl, pl_offset, size,
7715                             UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
7716                 }
7717                 return ENXIO;
7718         }
7719         nmrsize = nmp->nm_rsize;
7720
7721         plinfo = ubc_upl_pageinfo(pl);
7722         kret = ubc_upl_map(pl, &ioaddr);
7723         if (kret != KERN_SUCCESS) {
7724                 panic("nfs_vnop_pagein: ubc_upl_map() failed with (%d)", kret);
7725         }
7726         ioaddr += pl_offset;
7727
7728 tryagain:
7729 #if CONFIG_NFS4
7730         if (nmp->nm_vers >= NFS_VER4) {
7731                 stategenid = nmp->nm_stategenid;
7732         }
7733 #endif
7734         txsize = rxsize = size;
7735         txoffset = f_offset;
7736         rxaddr = ioaddr;
7737
7738         bzero(req, sizeof(req));
7739         nextsend = nextwait = 0;
7740         do {
7741                 if (np->n_flag & NREVOKE) {
7742                         error = EIO;
7743                         break;
7744                 }
7745                 /* send requests while we need to and have available slots */
7746                 while ((txsize > 0) && (req[nextsend] == NULL)) {
7747                         iosize = MIN(nmrsize, txsize);
7748                         if ((error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, iosize, thd, cred, NULL, &req[nextsend]))) {
7749                                 req[nextsend] = NULL;
7750                                 break;
7751                         }
7752                         txoffset += iosize;
7753                         txsize -= iosize;
7754                         nextsend = (nextsend + 1) % MAXPAGINGREQS;
7755                 }
7756                 /* wait while we need to and break out if more requests to send */
7757                 while ((rxsize > 0) && req[nextwait]) {
7758                         iosize = retsize = MIN(nmrsize, rxsize);
7759                         uio_reset(uio, uio_offset(uio), UIO_SYSSPACE, UIO_READ);
7760                         uio_addiov(uio, CAST_USER_ADDR_T(rxaddr), iosize);
7761                         FSDBG(322, uio_offset(uio), uio_resid(uio), rxaddr, rxsize);
7762 #if UPL_DEBUG
7763                         upl_ubc_alias_set(pl, (uintptr_t) current_thread(), (uintptr_t) 2);
7764 #endif /* UPL_DEBUG */
7765                         OSAddAtomic64(1, &nfsstats.pageins);
7766                         error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL);
7767                         req[nextwait] = NULL;
7768                         nextwait = (nextwait + 1) % MAXPAGINGREQS;
7769 #if CONFIG_NFS4
7770                         if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
7771                                 lck_mtx_lock(&nmp->nm_lock);
7772                                 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
7773                                         NP(np, "nfs_vnop_pagein: error %d, initiating recovery", error);
7774                                         nfs_need_recover(nmp, error);
7775                                 }
7776                                 lck_mtx_unlock(&nmp->nm_lock);
7777                                 restart++;
7778                                 goto cancel;
7779                         }
7780 #endif
7781                         if (error) {
7782                                 FSDBG(322, uio_offset(uio), uio_resid(uio), error, -1);
7783                                 break;
7784                         }
7785                         if (retsize < iosize) {
7786                                 /* Just zero fill the rest of the valid area. */
7787                                 int zcnt = iosize - retsize;
7788                                 bzero((char *)rxaddr + retsize, zcnt);
7789                                 FSDBG(324, uio_offset(uio), retsize, zcnt, rxaddr);
7790                                 uio_update(uio, zcnt);
7791                         }
7792                         rxaddr += iosize;
7793                         rxsize -= iosize;
7794                         if (txsize) {
7795                                 break;
7796                         }
7797                 }
7798         } while (!error && (txsize || rxsize));
7799
7800         restart = 0;
7801
7802         if (error) {
7803 #if CONFIG_NFS4
7804 cancel:
7805 #endif
7806                 /* cancel any outstanding requests */
7807                 while (req[nextwait]) {
7808                         nfs_request_async_cancel(req[nextwait]);
7809                         req[nextwait] = NULL;
7810                         nextwait = (nextwait + 1) % MAXPAGINGREQS;
7811                 }
7812                 if (np->n_flag & NREVOKE) {
7813                         error = EIO;
7814                 } else if (restart) {
7815                         if (restart <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
7816                                 if (error == NFSERR_GRACE) {
7817                                         tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
7818                                 }
7819                                 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
7820                                         goto tryagain;
7821                                 }
7822                         } else {
7823                                 NP(np, "nfs_pagein: too many restarts, aborting");
7824                         }
7825                 }
7826         }
7827
7828         ubc_upl_unmap(pl);
7829
7830         if (!nofreeupl) {
7831                 if (error) {
7832                         ubc_upl_abort_range(pl, pl_offset, size,
7833                             UPL_ABORT_ERROR |
7834                             UPL_ABORT_FREE_ON_EMPTY);
7835                 } else {
7836                         ubc_upl_commit_range(pl, pl_offset, size,
7837                             UPL_COMMIT_CLEAR_DIRTY |
7838                             UPL_COMMIT_FREE_ON_EMPTY);
7839                 }
7840         }
7841         return error;
7842 }
7843
7844
7845 /*
7846  * the following are needed only by nfs_pageout to know how to handle errors
7847  * see nfs_pageout comments on explanation of actions.
7848  * the errors here are copied from errno.h and errors returned by servers
7849  * are expected to match the same numbers here. If not, our actions maybe
7850  * erroneous.
7851  */
7852 char nfs_pageouterrorhandler(int);
7853 enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, SEVER};
7854 #define NFS_ELAST 88
7855 static u_char errorcount[NFS_ELAST + 1]; /* better be zeros when initialized */
7856 static const char errortooutcome[NFS_ELAST + 1] = {
7857         NOACTION,
7858         DUMP,                   /* EPERM        1       Operation not permitted */
7859         DUMP,                   /* ENOENT       2       No such file or directory */
7860         DUMPANDLOG,             /* ESRCH        3       No such process */
7861         RETRY,                  /* EINTR        4       Interrupted system call */
7862         DUMP,                   /* EIO          5       Input/output error */
7863         DUMP,                   /* ENXIO        6       Device not configured */
7864         DUMPANDLOG,             /* E2BIG        7       Argument list too long */
7865         DUMPANDLOG,             /* ENOEXEC      8       Exec format error */
7866         DUMPANDLOG,             /* EBADF        9       Bad file descriptor */
7867         DUMPANDLOG,             /* ECHILD       10      No child processes */
7868         DUMPANDLOG,             /* EDEADLK      11      Resource deadlock avoided - was EAGAIN */
7869         RETRY,                  /* ENOMEM       12      Cannot allocate memory */
7870         DUMP,                   /* EACCES       13      Permission denied */
7871         DUMPANDLOG,             /* EFAULT       14      Bad address */
7872         DUMPANDLOG,             /* ENOTBLK      15      POSIX - Block device required */
7873         RETRY,                  /* EBUSY        16      Device busy */
7874         DUMP,                   /* EEXIST       17      File exists */
7875         DUMP,                   /* EXDEV        18      Cross-device link */
7876         DUMP,                   /* ENODEV       19      Operation not supported by device */
7877         DUMP,                   /* ENOTDIR      20      Not a directory */
7878         DUMP,                   /* EISDIR       21      Is a directory */
7879         DUMP,                   /* EINVAL       22      Invalid argument */
7880         DUMPANDLOG,             /* ENFILE       23      Too many open files in system */
7881         DUMPANDLOG,             /* EMFILE       24      Too many open files */
7882         DUMPANDLOG,             /* ENOTTY       25      Inappropriate ioctl for device */
7883         DUMPANDLOG,             /* ETXTBSY      26      Text file busy - POSIX */
7884         DUMP,                   /* EFBIG        27      File too large */
7885         DUMP,                   /* ENOSPC       28      No space left on device */
7886         DUMPANDLOG,             /* ESPIPE       29      Illegal seek */
7887         DUMP,                   /* EROFS        30      Read-only file system */
7888         DUMP,                   /* EMLINK       31      Too many links */
7889         RETRY,                  /* EPIPE        32      Broken pipe */
7890         /* math software */
7891         DUMPANDLOG,             /* EDOM                         33      Numerical argument out of domain */
7892         DUMPANDLOG,             /* ERANGE                       34      Result too large */
7893         RETRY,                  /* EAGAIN/EWOULDBLOCK   35      Resource temporarily unavailable */
7894         DUMPANDLOG,             /* EINPROGRESS          36      Operation now in progress */
7895         DUMPANDLOG,             /* EALREADY                     37      Operation already in progress */
7896         /* ipc/network software -- argument errors */
7897         DUMPANDLOG,             /* ENOTSOC                      38      Socket operation on non-socket */
7898         DUMPANDLOG,             /* EDESTADDRREQ         39      Destination address required */
7899         DUMPANDLOG,             /* EMSGSIZE                     40      Message too long */
7900         DUMPANDLOG,             /* EPROTOTYPE           41      Protocol wrong type for socket */
7901         DUMPANDLOG,             /* ENOPROTOOPT          42      Protocol not available */
7902         DUMPANDLOG,             /* EPROTONOSUPPORT      43      Protocol not supported */
7903         DUMPANDLOG,             /* ESOCKTNOSUPPORT      44      Socket type not supported */
7904         DUMPANDLOG,             /* ENOTSUP                      45      Operation not supported */
7905         DUMPANDLOG,             /* EPFNOSUPPORT         46      Protocol family not supported */
7906         DUMPANDLOG,             /* EAFNOSUPPORT         47      Address family not supported by protocol family */
7907         DUMPANDLOG,             /* EADDRINUSE           48      Address already in use */
7908         DUMPANDLOG,             /* EADDRNOTAVAIL        49      Can't assign requested address */
7909         /* ipc/network software -- operational errors */
7910         RETRY,                  /* ENETDOWN                     50      Network is down */
7911         RETRY,                  /* ENETUNREACH          51      Network is unreachable */
7912         RETRY,                  /* ENETRESET            52      Network dropped connection on reset */
7913         RETRY,                  /* ECONNABORTED         53      Software caused connection abort */
7914         RETRY,                  /* ECONNRESET           54      Connection reset by peer */
7915         RETRY,                  /* ENOBUFS                      55      No buffer space available */
7916         RETRY,                  /* EISCONN                      56      Socket is already connected */
7917         RETRY,                  /* ENOTCONN                     57      Socket is not connected */
7918         RETRY,                  /* ESHUTDOWN            58      Can't send after socket shutdown */
7919         RETRY,                  /* ETOOMANYREFS         59      Too many references: can't splice */
7920         RETRY,                  /* ETIMEDOUT            60      Operation timed out */
7921         RETRY,                  /* ECONNREFUSED         61      Connection refused */
7922
7923         DUMPANDLOG,             /* ELOOP                        62      Too many levels of symbolic links */
7924         DUMP,                   /* ENAMETOOLONG         63      File name too long */
7925         RETRY,                  /* EHOSTDOWN            64      Host is down */
7926         RETRY,                  /* EHOSTUNREACH         65      No route to host */
7927         DUMP,                   /* ENOTEMPTY            66      Directory not empty */
7928         /* quotas & mush */
7929         DUMPANDLOG,             /* PROCLIM                      67      Too many processes */
7930         DUMPANDLOG,             /* EUSERS                       68      Too many users */
7931         DUMPANDLOG,             /* EDQUOT                       69      Disc quota exceeded */
7932         /* Network File System */
7933         DUMP,                   /* ESTALE                       70      Stale NFS file handle */
7934         DUMP,                   /* EREMOTE                      71      Too many levels of remote in path */
7935         DUMPANDLOG,             /* EBADRPC                      72      RPC struct is bad */
7936         DUMPANDLOG,             /* ERPCMISMATCH         73      RPC version wrong */
7937         DUMPANDLOG,             /* EPROGUNAVAIL         74      RPC prog. not avail */
7938         DUMPANDLOG,             /* EPROGMISMATCH        75      Program version wrong */
7939         DUMPANDLOG,             /* EPROCUNAVAIL         76      Bad procedure for program */
7940
7941         DUMPANDLOG,             /* ENOLCK                       77      No locks available */
7942         DUMPANDLOG,             /* ENOSYS                       78      Function not implemented */
7943         DUMPANDLOG,             /* EFTYPE                       79      Inappropriate file type or format */
7944         DUMPANDLOG,             /* EAUTH                        80      Authentication error */
7945         DUMPANDLOG,             /* ENEEDAUTH            81      Need authenticator */
7946         /* Intelligent device errors */
7947         DUMPANDLOG,             /* EPWROFF                      82      Device power is off */
7948         DUMPANDLOG,             /* EDEVERR                      83      Device error, e.g. paper out */
7949         DUMPANDLOG,             /* EOVERFLOW            84      Value too large to be stored in data type */
7950         /* Program loading errors */
7951         DUMPANDLOG,             /* EBADEXEC                     85      Bad executable */
7952         DUMPANDLOG,             /* EBADARCH                     86      Bad CPU type in executable */
7953         DUMPANDLOG,             /* ESHLIBVERS           87      Shared library version mismatch */
7954         DUMPANDLOG,             /* EBADMACHO            88      Malformed Macho file */
7955 };
7956
7957 char
7958 nfs_pageouterrorhandler(int error)
7959 {
7960         if (error > NFS_ELAST) {
7961                 return DUMP;
7962         } else {
7963                 return errortooutcome[error];
7964         }
7965 }
7966
7967
7968 /*
7969  * vnode OP for pageout using UPL
7970  *
7971  * No buffer I/O, just RPCs straight from the mapped pages.
7972  * File size changes are not permitted in pageout.
7973  */
7974 int
7975 nfs_vnop_pageout(
7976         struct vnop_pageout_args /* {
7977                                   *  struct vnodeop_desc *a_desc;
7978                                   *  vnode_t a_vp;
7979                                   *  upl_t a_pl;
7980                                   *  vm_offset_t a_pl_offset;
7981                                   *  off_t a_f_offset;
7982                                   *  size_t a_size;
7983                                   *  int a_flags;
7984                                   *  vfs_context_t a_context;
7985                                   *  } */*ap)
7986 {
7987         vnode_t vp = ap->a_vp;
7988         upl_t pl = ap->a_pl;
7989         size_t size = ap->a_size;
7990         off_t f_offset = ap->a_f_offset;
7991         vm_offset_t pl_offset = ap->a_pl_offset;
7992         int flags = ap->a_flags;
7993         nfsnode_t np = VTONFS(vp);
7994         thread_t thd;
7995         kauth_cred_t cred;
7996         struct nfsbuf *bp;
7997         struct nfsmount *nmp = VTONMP(vp);
7998         daddr64_t lbn;
7999         int error = 0, iomode;
8000         off_t off, txoffset, rxoffset;
8001         vm_offset_t ioaddr, txaddr, rxaddr;
8002         uio_t auio;
8003         char uio_buf[UIO_SIZEOF(1)];
8004         int nofreeupl = flags & UPL_NOCOMMIT;
8005         size_t nmwsize, biosize, iosize, pgsize, txsize, rxsize, xsize, remsize;
8006         struct nfsreq *req[MAXPAGINGREQS];
8007         int nextsend, nextwait, wverfset, commit;
8008         uint64_t wverf, wverf2;
8009 #if CONFIG_NFS4
8010         uint32_t stategenid = 0;
8011 #endif
8012         uint32_t vrestart = 0, restart = 0, vrestarts = 0, restarts = 0;
8013         kern_return_t kret;
8014
8015         FSDBG(323, f_offset, size, pl, pl_offset);
8016
8017         if (pl == (upl_t)NULL) {
8018                 panic("nfs_pageout: no upl");
8019         }
8020
8021         if (size <= 0) {
8022                 printf("nfs_pageout: invalid size %ld", size);
8023                 if (!nofreeupl) {
8024                         ubc_upl_abort_range(pl, pl_offset, size, 0);
8025                 }
8026                 return EINVAL;
8027         }
8028
8029         if (!nmp) {
8030                 if (!nofreeupl) {
8031                         ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
8032                 }
8033                 return ENXIO;
8034         }
8035         biosize = nmp->nm_biosize;
8036         nmwsize = nmp->nm_wsize;
8037
8038         nfs_data_lock_noupdate(np, NFS_DATA_LOCK_SHARED);
8039
8040         /*
8041          * Check to see whether the buffer is incore.
8042          * If incore and not busy, invalidate it from the cache.
8043          */
8044         for (iosize = 0; iosize < size; iosize += xsize) {
8045                 off = f_offset + iosize;
8046                 /* need make sure we do things on block boundaries */
8047                 xsize = biosize - (off % biosize);
8048                 if (off + xsize > f_offset + size) {
8049                         xsize = f_offset + size - off;
8050                 }
8051                 lbn = (daddr64_t)(off / biosize);
8052                 lck_mtx_lock(nfs_buf_mutex);
8053                 if ((bp = nfs_buf_incore(np, lbn))) {
8054                         FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags);
8055                         if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
8056                                 lck_mtx_unlock(nfs_buf_mutex);
8057                                 nfs_data_unlock_noupdate(np);
8058                                 /* no panic. just tell vm we are busy */
8059                                 if (!nofreeupl) {
8060                                         ubc_upl_abort_range(pl, pl_offset, size, 0);
8061                                 }
8062                                 return EBUSY;
8063                         }
8064                         if (bp->nb_dirtyend > 0) {
8065                                 /*
8066                                  * if there's a dirty range in the buffer, check
8067                                  * to see if it extends beyond the pageout region
8068                                  *
8069                                  * if the dirty region lies completely within the
8070                                  * pageout region, we just invalidate the buffer
8071                                  * because it's all being written out now anyway.
8072                                  *
8073                                  * if any of the dirty region lies outside the
8074                                  * pageout region, we'll try to clip the dirty
8075                                  * region to eliminate the portion that's being
8076                                  * paged out.  If that's not possible, because
8077                                  * the dirty region extends before and after the
8078                                  * pageout region, then we'll just return EBUSY.
8079                                  */
8080                                 off_t boff, start, end;
8081                                 boff = NBOFF(bp);
8082                                 start = off;
8083                                 end = off + xsize;
8084                                 /* clip end to EOF */
8085                                 if (end > (off_t)np->n_size) {
8086                                         end = np->n_size;
8087                                 }
8088                                 start -= boff;
8089                                 end -= boff;
8090                                 if ((bp->nb_dirtyoff < start) &&
8091                                     (bp->nb_dirtyend > end)) {
8092                                         /*
8093                                          * not gonna be able to clip the dirty region
8094                                          *
8095                                          * But before returning the bad news, move the
8096                                          * buffer to the start of the delwri list and
8097                                          * give the list a push to try to flush the
8098                                          * buffer out.
8099                                          */
8100                                         FSDBG(323, np, bp, 0xd00deebc, EBUSY);
8101                                         nfs_buf_remfree(bp);
8102                                         TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free);
8103                                         nfsbufdelwricnt++;
8104                                         nfs_buf_drop(bp);
8105                                         nfs_buf_delwri_push(1);
8106                                         lck_mtx_unlock(nfs_buf_mutex);
8107                                         nfs_data_unlock_noupdate(np);
8108                                         if (!nofreeupl) {
8109                                                 ubc_upl_abort_range(pl, pl_offset, size, 0);
8110                                         }
8111                                         return EBUSY;
8112                                 }
8113                                 if ((bp->nb_dirtyoff < start) ||
8114                                     (bp->nb_dirtyend > end)) {
8115                                         /* clip dirty region, if necessary */
8116                                         if (bp->nb_dirtyoff < start) {
8117                                                 bp->nb_dirtyend = min(bp->nb_dirtyend, start);
8118                                         }
8119                                         if (bp->nb_dirtyend > end) {
8120                                                 bp->nb_dirtyoff = max(bp->nb_dirtyoff, end);
8121                                         }
8122                                         FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
8123                                         /* we're leaving this block dirty */
8124                                         nfs_buf_drop(bp);
8125                                         lck_mtx_unlock(nfs_buf_mutex);
8126                                         continue;
8127                                 }
8128                         }
8129                         nfs_buf_remfree(bp);
8130                         lck_mtx_unlock(nfs_buf_mutex);
8131                         SET(bp->nb_flags, NB_INVAL);
8132                         nfs_node_lock_force(np);
8133                         if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
8134                                 CLR(bp->nb_flags, NB_NEEDCOMMIT);
8135                                 np->n_needcommitcnt--;
8136                                 CHECK_NEEDCOMMITCNT(np);
8137                         }
8138                         nfs_node_unlock(np);
8139                         nfs_buf_release(bp, 1);
8140                 } else {
8141                         lck_mtx_unlock(nfs_buf_mutex);
8142                 }
8143         }
8144
8145         thd = vfs_context_thread(ap->a_context);
8146         cred = ubc_getcred(vp);
8147         if (!IS_VALID_CRED(cred)) {
8148                 cred = vfs_context_ucred(ap->a_context);
8149         }
8150
8151         nfs_node_lock_force(np);
8152         if (np->n_flag & NWRITEERR) {
8153                 error = np->n_error;
8154                 nfs_node_unlock(np);
8155                 nfs_data_unlock_noupdate(np);
8156                 if (!nofreeupl) {
8157                         ubc_upl_abort_range(pl, pl_offset, size,
8158                             UPL_ABORT_FREE_ON_EMPTY);
8159                 }
8160                 return error;
8161         }
8162         nfs_node_unlock(np);
8163
8164         if (f_offset < 0 || f_offset >= (off_t)np->n_size ||
8165             f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) {
8166                 nfs_data_unlock_noupdate(np);
8167                 if (!nofreeupl) {
8168                         ubc_upl_abort_range(pl, pl_offset, size,
8169                             UPL_ABORT_FREE_ON_EMPTY);
8170                 }
8171                 return EINVAL;
8172         }
8173
8174         kret = ubc_upl_map(pl, &ioaddr);
8175         if (kret != KERN_SUCCESS) {
8176                 panic("nfs_vnop_pageout: ubc_upl_map() failed with (%d)", kret);
8177         }
8178         ioaddr += pl_offset;
8179
8180         if ((u_quad_t)f_offset + size > np->n_size) {
8181                 xsize = np->n_size - f_offset;
8182         } else {
8183                 xsize = size;
8184         }
8185
8186         pgsize = round_page_64(xsize);
8187         if ((size > pgsize) && !nofreeupl) {
8188                 ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
8189                     UPL_ABORT_FREE_ON_EMPTY);
8190         }
8191
8192         /*
8193          * check for partial page and clear the
8194          * contents past end of the file before
8195          * releasing it in the VM page cache
8196          */
8197         if ((u_quad_t)f_offset < np->n_size && (u_quad_t)f_offset + size > np->n_size) {
8198                 size_t io = np->n_size - f_offset;
8199                 bzero((caddr_t)(ioaddr + io), size - io);
8200                 FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
8201         }
8202         nfs_data_unlock_noupdate(np);
8203
8204         auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE,
8205             &uio_buf, sizeof(uio_buf));
8206
8207 tryagain:
8208 #if CONFIG_NFS4
8209         if (nmp->nm_vers >= NFS_VER4) {
8210                 stategenid = nmp->nm_stategenid;
8211         }
8212 #endif
8213         wverf = wverf2 = wverfset = 0;
8214         txsize = rxsize = xsize;
8215         txoffset = rxoffset = f_offset;
8216         txaddr = rxaddr = ioaddr;
8217         commit = NFS_WRITE_FILESYNC;
8218
8219         bzero(req, sizeof(req));
8220         nextsend = nextwait = 0;
8221         do {
8222                 if (np->n_flag & NREVOKE) {
8223                         error = EIO;
8224                         break;
8225                 }
8226                 /* send requests while we need to and have available slots */
8227                 while ((txsize > 0) && (req[nextsend] == NULL)) {
8228                         iosize = MIN(nmwsize, txsize);
8229                         uio_reset(auio, txoffset, UIO_SYSSPACE, UIO_WRITE);
8230                         uio_addiov(auio, CAST_USER_ADDR_T(txaddr), iosize);
8231                         FSDBG(323, uio_offset(auio), iosize, txaddr, txsize);
8232                         OSAddAtomic64(1, &nfsstats.pageouts);
8233                         nfs_node_lock_force(np);
8234                         np->n_numoutput++;
8235                         nfs_node_unlock(np);
8236                         vnode_startwrite(vp);
8237                         iomode = NFS_WRITE_UNSTABLE;
8238                         if ((error = nmp->nm_funcs->nf_write_rpc_async(np, auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) {
8239                                 req[nextsend] = NULL;
8240                                 vnode_writedone(vp);
8241                                 nfs_node_lock_force(np);
8242                                 np->n_numoutput--;
8243                                 nfs_node_unlock(np);
8244                                 break;
8245                         }
8246                         txaddr += iosize;
8247                         txoffset += iosize;
8248                         txsize -= iosize;
8249                         nextsend = (nextsend + 1) % MAXPAGINGREQS;
8250                 }
8251                 /* wait while we need to and break out if more requests to send */
8252                 while ((rxsize > 0) && req[nextwait]) {
8253                         iosize = remsize = MIN(nmwsize, rxsize);
8254                         error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req[nextwait], &iomode, &iosize, &wverf2);
8255                         req[nextwait] = NULL;
8256                         nextwait = (nextwait + 1) % MAXPAGINGREQS;
8257                         vnode_writedone(vp);
8258                         nfs_node_lock_force(np);
8259                         np->n_numoutput--;
8260                         nfs_node_unlock(np);
8261 #if CONFIG_NFS4
8262                         if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
8263                                 lck_mtx_lock(&nmp->nm_lock);
8264                                 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
8265                                         NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
8266                                         nfs_need_recover(nmp, error);
8267                                 }
8268                                 lck_mtx_unlock(&nmp->nm_lock);
8269                                 restart = 1;
8270                                 goto cancel;
8271                         }
8272 #endif
8273                         if (error) {
8274                                 FSDBG(323, rxoffset, rxsize, error, -1);
8275                                 break;
8276                         }
8277                         if (!wverfset) {
8278                                 wverf = wverf2;
8279                                 wverfset = 1;
8280                         } else if (wverf != wverf2) {
8281                                 /* verifier changed, so we need to restart all the writes */
8282                                 vrestart = 1;
8283                                 goto cancel;
8284                         }
8285                         /* Retain the lowest commitment level returned. */
8286                         if (iomode < commit) {
8287                                 commit = iomode;
8288                         }
8289                         rxaddr += iosize;
8290                         rxoffset += iosize;
8291                         rxsize -= iosize;
8292                         remsize -= iosize;
8293                         if (remsize > 0) {
8294                                 /* need to try sending the remainder */
8295                                 iosize = remsize;
8296                                 uio_reset(auio, rxoffset, UIO_SYSSPACE, UIO_WRITE);
8297                                 uio_addiov(auio, CAST_USER_ADDR_T(rxaddr), remsize);
8298                                 iomode = NFS_WRITE_UNSTABLE;
8299                                 error = nfs_write_rpc2(np, auio, thd, cred, &iomode, &wverf2);
8300 #if CONFIG_NFS4
8301                                 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
8302                                         NP(np, "nfs_vnop_pageout: restart: error %d", error);
8303                                         lck_mtx_lock(&nmp->nm_lock);
8304                                         if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
8305                                                 NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
8306                                                 nfs_need_recover(nmp, error);
8307                                         }
8308                                         lck_mtx_unlock(&nmp->nm_lock);
8309                                         restart = 1;
8310                                         goto cancel;
8311                                 }
8312 #endif
8313                                 if (error) {
8314                                         FSDBG(323, rxoffset, rxsize, error, -1);
8315                                         break;
8316                                 }
8317                                 if (wverf != wverf2) {
8318                                         /* verifier changed, so we need to restart all the writes */
8319                                         vrestart = 1;
8320                                         goto cancel;
8321                                 }
8322                                 if (iomode < commit) {
8323                                         commit = iomode;
8324                                 }
8325                                 rxaddr += iosize;
8326                                 rxoffset += iosize;
8327                                 rxsize -= iosize;
8328                         }
8329                         if (txsize) {
8330                                 break;
8331                         }
8332                 }
8333         } while (!error && (txsize || rxsize));
8334
8335         vrestart = 0;
8336
8337         if (!error && (commit != NFS_WRITE_FILESYNC)) {
8338                 error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred, wverf);
8339                 if (error == NFSERR_STALEWRITEVERF) {
8340                         vrestart = 1;
8341                         error = EIO;
8342                 }
8343         }
8344
8345         if (error) {
8346 cancel:
8347                 /* cancel any outstanding requests */
8348                 while (req[nextwait]) {
8349                         nfs_request_async_cancel(req[nextwait]);
8350                         req[nextwait] = NULL;
8351                         nextwait = (nextwait + 1) % MAXPAGINGREQS;
8352                         vnode_writedone(vp);
8353                         nfs_node_lock_force(np);
8354                         np->n_numoutput--;
8355                         nfs_node_unlock(np);
8356                 }
8357                 if (np->n_flag & NREVOKE) {
8358                         error = EIO;
8359                 } else {
8360                         if (vrestart) {
8361                                 if (++vrestarts <= 100) { /* guard against no progress */
8362                                         goto tryagain;
8363                                 }
8364                                 NP(np, "nfs_pageout: too many restarts, aborting");
8365                                 FSDBG(323, f_offset, xsize, ERESTART, -1);
8366                         }
8367                         if (restart) {
8368                                 if (restarts <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
8369                                         if (error == NFSERR_GRACE) {
8370                                                 tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
8371                                         }
8372                                         if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
8373                                                 goto tryagain;
8374                                         }
8375                                 } else {
8376                                         NP(np, "nfs_pageout: too many restarts, aborting");
8377                                         FSDBG(323, f_offset, xsize, ERESTART, -1);
8378                                 }
8379                         }
8380                 }
8381         }
8382
8383         ubc_upl_unmap(pl);
8384
8385         /*
8386          * We've had several different solutions on what to do when the pageout
8387          * gets an error. If we don't handle it, and return an error to the
8388          * caller, vm, it will retry . This can end in endless looping
8389          * between vm and here doing retries of the same page. Doing a dump
8390          * back to vm, will get it out of vm's knowledge and we lose whatever
8391          * data existed. This is risky, but in some cases necessary. For
8392          * example, the initial fix here was to do that for ESTALE. In that case
8393          * the server is telling us that the file is no longer the same. We
8394          * would not want to keep paging out to that. We also saw some 151
8395          * errors from Auspex server and NFSv3 can return errors higher than
8396          * ELAST. Those along with NFS known server errors we will "dump" from
8397          * vm.  Errors we don't expect to occur, we dump and log for further
8398          * analysis. Errors that could be transient, networking ones,
8399          * we let vm "retry". Lastly, errors that we retry, but may have potential
8400          * to storm the network, we "retrywithsleep". "sever" will be used in
8401          * in the future to dump all pages of object for cases like ESTALE.
8402          * All this is the basis for the states returned and first guesses on
8403          * error handling. Tweaking expected as more statistics are gathered.
8404          * Note, in the long run we may need another more robust solution to
8405          * have some kind of persistant store when the vm cannot dump nor keep
8406          * retrying as a solution, but this would be a file architectural change
8407          */
8408         if (!nofreeupl) { /* otherwise stacked file system has to handle this */
8409                 if (error) {
8410                         int abortflags = 0;
8411                         char action = nfs_pageouterrorhandler(error);
8412
8413                         switch (action) {
8414                         case DUMP:
8415                                 abortflags = UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY;
8416                                 break;
8417                         case DUMPANDLOG:
8418                                 abortflags = UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY;
8419                                 if (error <= NFS_ELAST) {
8420                                         if ((errorcount[error] % 100) == 0) {
8421                                                 NP(np, "nfs_pageout: unexpected error %d. dumping vm page", error);
8422                                         }
8423                                         errorcount[error]++;
8424                                 }
8425                                 break;
8426                         case RETRY:
8427                                 abortflags = UPL_ABORT_FREE_ON_EMPTY;
8428                                 break;
8429                         case SEVER:         /* not implemented */
8430                         default:
8431                                 NP(np, "nfs_pageout: action %d not expected", action);
8432                                 break;
8433                         }
8434
8435                         ubc_upl_abort_range(pl, pl_offset, pgsize, abortflags);
8436                         /* return error in all cases above */
8437                 } else {
8438                         ubc_upl_commit_range(pl, pl_offset, pgsize,
8439                             UPL_COMMIT_CLEAR_DIRTY |
8440                             UPL_COMMIT_FREE_ON_EMPTY);
8441                 }
8442         }
8443         return error;
8444 }
8445
8446 /* Blktooff derives file offset given a logical block number */
8447 int
8448 nfs_vnop_blktooff(
8449         struct vnop_blktooff_args /* {
8450                                    *  struct vnodeop_desc *a_desc;
8451                                    *  vnode_t a_vp;
8452                                    *  daddr64_t a_lblkno;
8453                                    *  off_t *a_offset;
8454                                    *  } */*ap)
8455 {
8456         int biosize;
8457         vnode_t vp = ap->a_vp;
8458         struct nfsmount *nmp = VTONMP(vp);
8459
8460         if (nfs_mount_gone(nmp)) {
8461                 return ENXIO;
8462         }
8463         biosize = nmp->nm_biosize;
8464
8465         *ap->a_offset = (off_t)(ap->a_lblkno * biosize);
8466
8467         return 0;
8468 }
8469
8470 int
8471 nfs_vnop_offtoblk(
8472         struct vnop_offtoblk_args /* {
8473                                    *  struct vnodeop_desc *a_desc;
8474                                    *  vnode_t a_vp;
8475                                    *  off_t a_offset;
8476                                    *  daddr64_t *a_lblkno;
8477                                    *  } */*ap)
8478 {
8479         int biosize;
8480         vnode_t vp = ap->a_vp;
8481         struct nfsmount *nmp = VTONMP(vp);
8482
8483         if (nfs_mount_gone(nmp)) {
8484                 return ENXIO;
8485         }
8486         biosize = nmp->nm_biosize;
8487
8488         *ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize);
8489
8490         return 0;
8491 }
8492
8493 /*
8494  * vnode change monitoring
8495  */
8496 int
8497 nfs_vnop_monitor(
8498         struct vnop_monitor_args /* {
8499                                   *  struct vnodeop_desc *a_desc;
8500                                   *  vnode_t a_vp;
8501                                   *  uint32_t a_events;
8502                                   *  uint32_t a_flags;
8503                                   *  void *a_handle;
8504                                   *  vfs_context_t a_context;
8505                                   *  } */*ap)
8506 {
8507         nfsnode_t np = VTONFS(ap->a_vp);
8508         struct nfsmount *nmp = VTONMP(ap->a_vp);
8509         int error = 0;
8510
8511         if (nfs_mount_gone(nmp)) {
8512                 return ENXIO;
8513         }
8514
8515         /* make sure that the vnode's monitoring status is up to date */
8516         lck_mtx_lock(&nmp->nm_lock);
8517         if (vnode_ismonitored(ap->a_vp)) {
8518                 /* This vnode is currently being monitored, make sure we're tracking it. */
8519                 if (np->n_monlink.le_next == NFSNOLIST) {
8520                         LIST_INSERT_HEAD(&nmp->nm_monlist, np, n_monlink);
8521                         nfs_mount_sock_thread_wake(nmp);
8522                 }
8523         } else {
8524                 /* This vnode is no longer being monitored, make sure we're not tracking it. */
8525                 /* Wait for any in-progress getattr to complete first. */
8526                 while (np->n_mflag & NMMONSCANINPROG) {
8527                         struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
8528                         np->n_mflag |= NMMONSCANWANT;
8529                         msleep(&np->n_mflag, &nmp->nm_lock, PZERO - 1, "nfswaitmonscan", &ts);
8530                 }
8531                 if (np->n_monlink.le_next != NFSNOLIST) {
8532                         LIST_REMOVE(np, n_monlink);
8533                         np->n_monlink.le_next = NFSNOLIST;
8534                 }
8535         }
8536         lck_mtx_unlock(&nmp->nm_lock);
8537
8538         return error;
8539 }
8540
8541 /*
8542  * Send a vnode notification for the given events.
8543  */
8544 void
8545 nfs_vnode_notify(nfsnode_t np, uint32_t events)
8546 {
8547         struct nfsmount *nmp = NFSTONMP(np);
8548         struct nfs_vattr nvattr;
8549         struct vnode_attr vattr, *vap = NULL;
8550         struct timeval now;
8551
8552         microuptime(&now);
8553         if ((np->n_evtstamp == now.tv_sec) || !nmp) {
8554                 /* delay sending this notify */
8555                 np->n_events |= events;
8556                 return;
8557         }
8558         events |= np->n_events;
8559         np->n_events = 0;
8560         np->n_evtstamp = now.tv_sec;
8561
8562         vfs_get_notify_attributes(&vattr);
8563         if (!nfs_getattrcache(np, &nvattr, 0)) {
8564                 vap = &vattr;
8565                 VATTR_INIT(vap);
8566
8567                 VATTR_RETURN(vap, va_fsid, vfs_statfs(nmp->nm_mountp)->f_fsid.val[0]);
8568                 VATTR_RETURN(vap, va_fileid, nvattr.nva_fileid);
8569                 VATTR_RETURN(vap, va_mode, nvattr.nva_mode);
8570                 VATTR_RETURN(vap, va_uid, nvattr.nva_uid);
8571                 VATTR_RETURN(vap, va_gid, nvattr.nva_gid);
8572                 VATTR_RETURN(vap, va_nlink, nvattr.nva_nlink);
8573         }
8574         vnode_notify(NFSTOV(np), events, vap);
8575 }
8576