]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_vnops.c
1df01abc0c60171168911d27c2585b4f698f8da7
[apple/xnu.git] / bsd / nfs / nfs_vnops.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
65 * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
66 */
67
68
69 /*
70 * vnode op calls for Sun NFS version 2 and 3
71 */
72 #include <sys/param.h>
73 #include <sys/kernel.h>
74 #include <sys/systm.h>
75 #include <sys/resourcevar.h>
76 #include <sys/proc_internal.h>
77 #include <sys/kauth.h>
78 #include <sys/mount_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/kpi_mbuf.h>
81 #include <sys/conf.h>
82 #include <sys/vnode_internal.h>
83 #include <sys/dirent.h>
84 #include <sys/fcntl.h>
85 #include <sys/lockf.h>
86 #include <sys/ubc_internal.h>
87 #include <sys/attr.h>
88 #include <sys/signalvar.h>
89 #include <sys/uio_internal.h>
90 #include <sys/xattr.h>
91
92 #include <vfs/vfs_support.h>
93
94 #include <sys/vm.h>
95
96 #include <sys/time.h>
97 #include <kern/clock.h>
98 #include <libkern/OSAtomic.h>
99
100 #include <miscfs/fifofs/fifo.h>
101 #include <miscfs/specfs/specdev.h>
102
103 #include <nfs/rpcv2.h>
104 #include <nfs/nfsproto.h>
105 #include <nfs/nfs.h>
106 #include <nfs/nfsnode.h>
107 #include <nfs/nfs_gss.h>
108 #include <nfs/nfsmount.h>
109 #include <nfs/nfs_lock.h>
110 #include <nfs/xdr_subs.h>
111 #include <nfs/nfsm_subs.h>
112
113 #include <net/if.h>
114 #include <netinet/in.h>
115 #include <netinet/in_var.h>
116
117 #include <vm/vm_kern.h>
118 #include <vm/vm_pageout.h>
119
120 #include <kern/task.h>
121 #include <kern/sched_prim.h>
122
123 #define NFS_VNOP_DBG(...) NFS_DBG(NFS_FAC_VNOP, 7, ## __VA_ARGS__)
124 #define DEFAULT_READLINK_NOCACHE 0
125
126 /*
127 * NFS vnode ops
128 */
129 int nfs_vnop_lookup(struct vnop_lookup_args *);
130 int nfsspec_vnop_read(struct vnop_read_args *);
131 int nfsspec_vnop_write(struct vnop_write_args *);
132 int nfsspec_vnop_close(struct vnop_close_args *);
133 #if FIFO
134 int nfsfifo_vnop_read(struct vnop_read_args *);
135 int nfsfifo_vnop_write(struct vnop_write_args *);
136 int nfsfifo_vnop_close(struct vnop_close_args *);
137 #endif
138 int nfs_vnop_ioctl(struct vnop_ioctl_args *);
139 int nfs_vnop_select(struct vnop_select_args *);
140 int nfs_vnop_setattr(struct vnop_setattr_args *);
141 int nfs_vnop_fsync(struct vnop_fsync_args *);
142 int nfs_vnop_rename(struct vnop_rename_args *);
143 int nfs_vnop_readdir(struct vnop_readdir_args *);
144 int nfs_vnop_readlink(struct vnop_readlink_args *);
145 int nfs_vnop_pathconf(struct vnop_pathconf_args *);
146 int nfs_vnop_pagein(struct vnop_pagein_args *);
147 int nfs_vnop_pageout(struct vnop_pageout_args *);
148 int nfs_vnop_blktooff(struct vnop_blktooff_args *);
149 int nfs_vnop_offtoblk(struct vnop_offtoblk_args *);
150 int nfs_vnop_blockmap(struct vnop_blockmap_args *);
151 int nfs_vnop_monitor(struct vnop_monitor_args *);
152
153 int nfs3_vnop_create(struct vnop_create_args *);
154 int nfs3_vnop_mknod(struct vnop_mknod_args *);
155 int nfs3_vnop_getattr(struct vnop_getattr_args *);
156 int nfs3_vnop_link(struct vnop_link_args *);
157 int nfs3_vnop_mkdir(struct vnop_mkdir_args *);
158 int nfs3_vnop_rmdir(struct vnop_rmdir_args *);
159 int nfs3_vnop_symlink(struct vnop_symlink_args *);
160
161
162 vnop_t **nfsv2_vnodeop_p;
163 static const struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
164 { .opve_op = &vnop_default_desc, .opve_impl = (vnop_t *)vn_default_error },
165 { .opve_op = &vnop_lookup_desc, .opve_impl = (vnop_t *)nfs_vnop_lookup }, /* lookup */
166 { .opve_op = &vnop_create_desc, .opve_impl = (vnop_t *)nfs3_vnop_create }, /* create */
167 { .opve_op = &vnop_mknod_desc, .opve_impl = (vnop_t *)nfs3_vnop_mknod }, /* mknod */
168 { .opve_op = &vnop_open_desc, .opve_impl = (vnop_t *)nfs_vnop_open }, /* open */
169 { .opve_op = &vnop_close_desc, .opve_impl = (vnop_t *)nfs_vnop_close }, /* close */
170 { .opve_op = &vnop_access_desc, .opve_impl = (vnop_t *)nfs_vnop_access }, /* access */
171 { .opve_op = &vnop_getattr_desc, .opve_impl = (vnop_t *)nfs3_vnop_getattr }, /* getattr */
172 { .opve_op = &vnop_setattr_desc, .opve_impl = (vnop_t *)nfs_vnop_setattr }, /* setattr */
173 { .opve_op = &vnop_read_desc, .opve_impl = (vnop_t *)nfs_vnop_read }, /* read */
174 { .opve_op = &vnop_write_desc, .opve_impl = (vnop_t *)nfs_vnop_write }, /* write */
175 { .opve_op = &vnop_ioctl_desc, .opve_impl = (vnop_t *)nfs_vnop_ioctl }, /* ioctl */
176 { .opve_op = &vnop_select_desc, .opve_impl = (vnop_t *)nfs_vnop_select }, /* select */
177 { .opve_op = &vnop_revoke_desc, .opve_impl = (vnop_t *)nfs_vnop_revoke }, /* revoke */
178 { .opve_op = &vnop_mmap_desc, .opve_impl = (vnop_t *)nfs_vnop_mmap }, /* mmap */
179 { .opve_op = &vnop_mnomap_desc, .opve_impl = (vnop_t *)nfs_vnop_mnomap }, /* mnomap */
180 { .opve_op = &vnop_fsync_desc, .opve_impl = (vnop_t *)nfs_vnop_fsync }, /* fsync */
181 { .opve_op = &vnop_remove_desc, .opve_impl = (vnop_t *)nfs_vnop_remove }, /* remove */
182 { .opve_op = &vnop_link_desc, .opve_impl = (vnop_t *)nfs3_vnop_link }, /* link */
183 { .opve_op = &vnop_rename_desc, .opve_impl = (vnop_t *)nfs_vnop_rename }, /* rename */
184 { .opve_op = &vnop_mkdir_desc, .opve_impl = (vnop_t *)nfs3_vnop_mkdir }, /* mkdir */
185 { .opve_op = &vnop_rmdir_desc, .opve_impl = (vnop_t *)nfs3_vnop_rmdir }, /* rmdir */
186 { .opve_op = &vnop_symlink_desc, .opve_impl = (vnop_t *)nfs3_vnop_symlink }, /* symlink */
187 { .opve_op = &vnop_readdir_desc, .opve_impl = (vnop_t *)nfs_vnop_readdir }, /* readdir */
188 { .opve_op = &vnop_readlink_desc, .opve_impl = (vnop_t *)nfs_vnop_readlink }, /* readlink */
189 { .opve_op = &vnop_inactive_desc, .opve_impl = (vnop_t *)nfs_vnop_inactive }, /* inactive */
190 { .opve_op = &vnop_reclaim_desc, .opve_impl = (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
191 { .opve_op = &vnop_strategy_desc, .opve_impl = (vnop_t *)err_strategy }, /* strategy */
192 { .opve_op = &vnop_pathconf_desc, .opve_impl = (vnop_t *)nfs_vnop_pathconf }, /* pathconf */
193 { .opve_op = &vnop_advlock_desc, .opve_impl = (vnop_t *)nfs_vnop_advlock }, /* advlock */
194 { .opve_op = &vnop_bwrite_desc, .opve_impl = (vnop_t *)err_bwrite }, /* bwrite */
195 { .opve_op = &vnop_pagein_desc, .opve_impl = (vnop_t *)nfs_vnop_pagein }, /* Pagein */
196 { .opve_op = &vnop_pageout_desc, .opve_impl = (vnop_t *)nfs_vnop_pageout }, /* Pageout */
197 { .opve_op = &vnop_copyfile_desc, .opve_impl = (vnop_t *)err_copyfile }, /* Copyfile */
198 { .opve_op = &vnop_blktooff_desc, .opve_impl = (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
199 { .opve_op = &vnop_offtoblk_desc, .opve_impl = (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
200 { .opve_op = &vnop_blockmap_desc, .opve_impl = (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
201 { .opve_op = &vnop_monitor_desc, .opve_impl = (vnop_t *)nfs_vnop_monitor }, /* monitor */
202 { .opve_op = NULL, .opve_impl = NULL }
203 };
204 const struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
205 { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
206
207
208 #if CONFIG_NFS4
209 vnop_t **nfsv4_vnodeop_p;
210 static const struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
211 { &vnop_default_desc, (vnop_t *)vn_default_error },
212 { &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup }, /* lookup */
213 { &vnop_create_desc, (vnop_t *)nfs4_vnop_create }, /* create */
214 { &vnop_mknod_desc, (vnop_t *)nfs4_vnop_mknod }, /* mknod */
215 { &vnop_open_desc, (vnop_t *)nfs_vnop_open }, /* open */
216 { &vnop_close_desc, (vnop_t *)nfs_vnop_close }, /* close */
217 { &vnop_access_desc, (vnop_t *)nfs_vnop_access }, /* access */
218 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
219 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
220 { &vnop_read_desc, (vnop_t *)nfs_vnop_read }, /* read */
221 { &vnop_write_desc, (vnop_t *)nfs_vnop_write }, /* write */
222 { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl }, /* ioctl */
223 { &vnop_select_desc, (vnop_t *)nfs_vnop_select }, /* select */
224 { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke }, /* revoke */
225 { &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap }, /* mmap */
226 { &vnop_mnomap_desc, (vnop_t *)nfs_vnop_mnomap }, /* mnomap */
227 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
228 { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove }, /* remove */
229 { &vnop_link_desc, (vnop_t *)nfs4_vnop_link }, /* link */
230 { &vnop_rename_desc, (vnop_t *)nfs_vnop_rename }, /* rename */
231 { &vnop_mkdir_desc, (vnop_t *)nfs4_vnop_mkdir }, /* mkdir */
232 { &vnop_rmdir_desc, (vnop_t *)nfs4_vnop_rmdir }, /* rmdir */
233 { &vnop_symlink_desc, (vnop_t *)nfs4_vnop_symlink }, /* symlink */
234 { &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir }, /* readdir */
235 { &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink }, /* readlink */
236 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
237 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
238 { &vnop_strategy_desc, (vnop_t *)err_strategy }, /* strategy */
239 { &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf }, /* pathconf */
240 { &vnop_advlock_desc, (vnop_t *)nfs_vnop_advlock }, /* advlock */
241 { &vnop_bwrite_desc, (vnop_t *)err_bwrite }, /* bwrite */
242 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
243 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
244 { &vnop_copyfile_desc, (vnop_t *)err_copyfile }, /* Copyfile */
245 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
246 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
247 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
248 { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr }, /* getxattr */
249 { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr }, /* setxattr */
250 { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
251 { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
252 #if NAMEDSTREAMS
253 { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream }, /* getnamedstream */
254 { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream }, /* makenamedstream */
255 { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
256 #endif
257 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
258 { NULL, NULL }
259 };
260 const struct vnodeopv_desc nfsv4_vnodeop_opv_desc =
261 { &nfsv4_vnodeop_p, nfsv4_vnodeop_entries };
262 #endif
263
264 /*
265 * Special device vnode ops
266 */
267 vnop_t **spec_nfsv2nodeop_p;
268 static const struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
269 { &vnop_default_desc, (vnop_t *)vn_default_error },
270 { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */
271 { &vnop_create_desc, (vnop_t *)spec_create }, /* create */
272 { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */
273 { &vnop_open_desc, (vnop_t *)spec_open }, /* open */
274 { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close }, /* close */
275 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
276 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
277 { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read }, /* read */
278 { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write }, /* write */
279 { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */
280 { &vnop_select_desc, (vnop_t *)spec_select }, /* select */
281 { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */
282 { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */
283 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
284 { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */
285 { &vnop_link_desc, (vnop_t *)spec_link }, /* link */
286 { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */
287 { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */
288 { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */
289 { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */
290 { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */
291 { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */
292 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
293 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
294 { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */
295 { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */
296 { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */
297 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
298 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
299 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
300 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
301 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
302 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
303 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
304 { NULL, NULL }
305 };
306 const struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
307 { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
308 #if CONFIG_NFS4
309 vnop_t **spec_nfsv4nodeop_p;
310 static const struct vnodeopv_entry_desc spec_nfsv4nodeop_entries[] = {
311 { &vnop_default_desc, (vnop_t *)vn_default_error },
312 { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */
313 { &vnop_create_desc, (vnop_t *)spec_create }, /* create */
314 { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */
315 { &vnop_open_desc, (vnop_t *)spec_open }, /* open */
316 { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close }, /* close */
317 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
318 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
319 { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read }, /* read */
320 { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write }, /* write */
321 { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */
322 { &vnop_select_desc, (vnop_t *)spec_select }, /* select */
323 { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */
324 { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */
325 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
326 { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */
327 { &vnop_link_desc, (vnop_t *)spec_link }, /* link */
328 { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */
329 { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */
330 { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */
331 { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */
332 { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */
333 { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */
334 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
335 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
336 { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */
337 { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */
338 { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */
339 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
340 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
341 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
342 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
343 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
344 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
345 { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr }, /* getxattr */
346 { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr }, /* setxattr */
347 { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
348 { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
349 #if NAMEDSTREAMS
350 { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream }, /* getnamedstream */
351 { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream }, /* makenamedstream */
352 { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
353 #endif
354 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
355 { NULL, NULL }
356 };
357 const struct vnodeopv_desc spec_nfsv4nodeop_opv_desc =
358 { &spec_nfsv4nodeop_p, spec_nfsv4nodeop_entries };
359 #endif /* CONFIG_NFS4 */
360
361 #if FIFO
362 vnop_t **fifo_nfsv2nodeop_p;
363 static const struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
364 { &vnop_default_desc, (vnop_t *)vn_default_error },
365 { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */
366 { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */
367 { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */
368 { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */
369 { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close }, /* close */
370 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
371 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
372 { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read }, /* read */
373 { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write }, /* write */
374 { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */
375 { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */
376 { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */
377 { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */
378 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
379 { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */
380 { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */
381 { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */
382 { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */
383 { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */
384 { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */
385 { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */
386 { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */
387 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
388 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
389 { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */
390 { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */
391 { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */
392 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
393 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
394 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
395 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
396 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
397 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
398 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
399 { NULL, NULL }
400 };
401 const struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
402 { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
403 #endif
404
405 #if CONFIG_NFS4
406 #if FIFO
407 vnop_t **fifo_nfsv4nodeop_p;
408 static const struct vnodeopv_entry_desc fifo_nfsv4nodeop_entries[] = {
409 { &vnop_default_desc, (vnop_t *)vn_default_error },
410 { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */
411 { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */
412 { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */
413 { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */
414 { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close }, /* close */
415 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
416 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
417 { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read }, /* read */
418 { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write }, /* write */
419 { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */
420 { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */
421 { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */
422 { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */
423 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
424 { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */
425 { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */
426 { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */
427 { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */
428 { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */
429 { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */
430 { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */
431 { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */
432 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
433 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
434 { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */
435 { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */
436 { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */
437 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
438 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
439 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
440 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
441 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
442 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
443 { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr }, /* getxattr */
444 { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr }, /* setxattr */
445 { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
446 { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
447 #if NAMEDSTREAMS
448 { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream }, /* getnamedstream */
449 { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream }, /* makenamedstream */
450 { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
451 #endif
452 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
453 { NULL, NULL }
454 };
455 const struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc =
456 { &fifo_nfsv4nodeop_p, fifo_nfsv4nodeop_entries };
457 #endif /* FIFO */
458 #endif /* CONFIG_NFS4 */
459
460 int nfs_sillyrename(nfsnode_t, nfsnode_t, struct componentname *, vfs_context_t);
461 int nfs_getattr_internal(nfsnode_t, struct nfs_vattr *, vfs_context_t, int);
462 int nfs_refresh_fh(nfsnode_t, vfs_context_t);
463
464
465 /*
466 * Find the slot in the access cache for this UID.
467 * If adding and no existing slot is found, reuse slots in FIFO order.
468 * The index of the next slot to use is kept in the last entry of the n_access array.
469 */
470 int
471 nfs_node_access_slot(nfsnode_t np, uid_t uid, int add)
472 {
473 int slot;
474
475 for (slot = 0; slot < NFS_ACCESS_CACHE_SIZE; slot++) {
476 if (np->n_accessuid[slot] == uid) {
477 break;
478 }
479 }
480 if (slot == NFS_ACCESS_CACHE_SIZE) {
481 if (!add) {
482 return -1;
483 }
484 slot = np->n_access[NFS_ACCESS_CACHE_SIZE];
485 np->n_access[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
486 }
487 return slot;
488 }
489
490 int
491 nfs3_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx)
492 {
493 int error = 0, lockerror = ENOENT, status, slot;
494 uint32_t access_result = 0;
495 u_int64_t xid;
496 struct nfsm_chain nmreq, nmrep;
497 struct nfsmount *nmp;
498 struct timeval now;
499 uid_t uid;
500
501 nfsm_chain_null(&nmreq);
502 nfsm_chain_null(&nmrep);
503
504 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3) + NFSX_UNSIGNED);
505 nfsm_chain_add_fh(error, &nmreq, NFS_VER3, np->n_fhp, np->n_fhsize);
506 nfsm_chain_add_32(error, &nmreq, *access);
507 nfsm_chain_build_done(error, &nmreq);
508 nfsmout_if(error);
509 error = nfs_request2(np, NULL, &nmreq, NFSPROC_ACCESS,
510 vfs_context_thread(ctx), vfs_context_ucred(ctx),
511 NULL, rpcflags, &nmrep, &xid, &status);
512 if ((lockerror = nfs_node_lock(np))) {
513 error = lockerror;
514 }
515 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
516 if (!error) {
517 error = status;
518 }
519 nfsm_chain_get_32(error, &nmrep, access_result);
520 nfsmout_if(error);
521
522 /* XXXab do we really need mount here, also why are we doing access cache management here? */
523 nmp = NFSTONMP(np);
524 if (nfs_mount_gone(nmp)) {
525 error = ENXIO;
526 }
527 nfsmout_if(error);
528
529 #if CONFIG_NFS_GSS
530 if (auth_is_kerberized(np->n_auth) || auth_is_kerberized(nmp->nm_auth)) {
531 uid = nfs_cred_getasid2uid(vfs_context_ucred(ctx));
532 } else {
533 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
534 }
535 #else
536 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
537 #endif /* CONFIG_NFS_GSS */
538 slot = nfs_node_access_slot(np, uid, 1);
539 np->n_accessuid[slot] = uid;
540 microuptime(&now);
541 np->n_accessstamp[slot] = now.tv_sec;
542 np->n_access[slot] = access_result;
543
544 /*
545 * If we asked for DELETE but didn't get it, the server
546 * may simply not support returning that bit (possible
547 * on UNIX systems). So, we'll assume that it is OK,
548 * and just let any subsequent delete action fail if it
549 * really isn't deletable.
550 */
551 if ((*access & NFS_ACCESS_DELETE) &&
552 !(np->n_access[slot] & NFS_ACCESS_DELETE)) {
553 np->n_access[slot] |= NFS_ACCESS_DELETE;
554 }
555 /* ".zfs" subdirectories may erroneously give a denied answer for add/remove */
556 if (nfs_access_dotzfs && (np->n_flag & NISDOTZFSCHILD)) {
557 np->n_access[slot] |= (NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND | NFS_ACCESS_DELETE);
558 }
559 /* pass back the access returned with this request */
560 *access = np->n_access[slot];
561 nfsmout:
562 if (!lockerror) {
563 nfs_node_unlock(np);
564 }
565 nfsm_chain_cleanup(&nmreq);
566 nfsm_chain_cleanup(&nmrep);
567 return error;
568 }
569
570
571 /*
572 * NFS access vnode op.
573 * For NFS version 2, just return ok. File accesses may fail later.
574 * For NFS version 3+, use the access RPC to check accessibility. If file
575 * permissions are changed on the server, accesses might still fail later.
576 */
577 int
578 nfs_vnop_access(
579 struct vnop_access_args /* {
580 * struct vnodeop_desc *a_desc;
581 * vnode_t a_vp;
582 * int a_action;
583 * vfs_context_t a_context;
584 * } */*ap)
585 {
586 vfs_context_t ctx = ap->a_context;
587 vnode_t vp = ap->a_vp;
588 int error = 0, slot, dorpc, rpcflags = 0;
589 u_int32_t access, waccess;
590 nfsnode_t np = VTONFS(vp);
591 struct nfsmount *nmp;
592 int nfsvers;
593 struct timeval now;
594 uid_t uid;
595
596 nmp = VTONMP(vp);
597 if (nfs_mount_gone(nmp)) {
598 return ENXIO;
599 }
600 nfsvers = nmp->nm_vers;
601
602
603 if (nfsvers == NFS_VER2 || NMFLAG(nmp, NOOPAQUE_AUTH)) {
604 if ((ap->a_action & KAUTH_VNODE_WRITE_RIGHTS) &&
605 vfs_isrdonly(vnode_mount(vp))) {
606 return EROFS;
607 }
608 return 0;
609 }
610
611 /*
612 * For NFS v3, do an access rpc, otherwise you are stuck emulating
613 * ufs_access() locally using the vattr. This may not be correct,
614 * since the server may apply other access criteria such as
615 * client uid-->server uid mapping that we do not know about, but
616 * this is better than just returning anything that is lying about
617 * in the cache.
618 */
619
620 /*
621 * Convert KAUTH primitives to NFS access rights.
622 */
623 access = 0;
624 if (vnode_isdir(vp)) {
625 /* directory */
626 if (ap->a_action &
627 (KAUTH_VNODE_LIST_DIRECTORY |
628 KAUTH_VNODE_READ_EXTATTRIBUTES)) {
629 access |= NFS_ACCESS_READ;
630 }
631 if (ap->a_action & KAUTH_VNODE_SEARCH) {
632 access |= NFS_ACCESS_LOOKUP;
633 }
634 if (ap->a_action &
635 (KAUTH_VNODE_ADD_FILE |
636 KAUTH_VNODE_ADD_SUBDIRECTORY)) {
637 access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
638 }
639 if (ap->a_action & KAUTH_VNODE_DELETE_CHILD) {
640 access |= NFS_ACCESS_MODIFY;
641 }
642 } else {
643 /* file */
644 if (ap->a_action &
645 (KAUTH_VNODE_READ_DATA |
646 KAUTH_VNODE_READ_EXTATTRIBUTES)) {
647 access |= NFS_ACCESS_READ;
648 }
649 if (ap->a_action & KAUTH_VNODE_WRITE_DATA) {
650 access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
651 }
652 if (ap->a_action & KAUTH_VNODE_APPEND_DATA) {
653 access |= NFS_ACCESS_EXTEND;
654 }
655 if (ap->a_action & KAUTH_VNODE_EXECUTE) {
656 access |= NFS_ACCESS_EXECUTE;
657 }
658 }
659 /* common */
660 if (ap->a_action & KAUTH_VNODE_DELETE) {
661 access |= NFS_ACCESS_DELETE;
662 }
663 if (ap->a_action &
664 (KAUTH_VNODE_WRITE_ATTRIBUTES |
665 KAUTH_VNODE_WRITE_EXTATTRIBUTES |
666 KAUTH_VNODE_WRITE_SECURITY)) {
667 access |= NFS_ACCESS_MODIFY;
668 }
669 /* XXX this is pretty dubious */
670 if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER) {
671 access |= NFS_ACCESS_MODIFY;
672 }
673
674 /* if caching, always ask for every right */
675 if (nfs_access_cache_timeout > 0) {
676 waccess = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
677 NFS_ACCESS_EXTEND | NFS_ACCESS_EXECUTE |
678 NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
679 } else {
680 waccess = access;
681 }
682
683 if ((error = nfs_node_lock(np))) {
684 return error;
685 }
686
687 /*
688 * Does our cached result allow us to give a definite yes to
689 * this request?
690 */
691 #if CONFIG_NFS_GSS
692 if (auth_is_kerberized(np->n_auth) || auth_is_kerberized(nmp->nm_auth)) {
693 uid = nfs_cred_getasid2uid(vfs_context_ucred(ctx));
694 } else {
695 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
696 }
697 #else
698 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
699 #endif /* CONFIG_NFS_GSS */
700 slot = nfs_node_access_slot(np, uid, 0);
701 dorpc = 1;
702 if (access == 0) {
703 /* not asking for any rights understood by NFS, so don't bother doing an RPC */
704 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
705 dorpc = 0;
706 waccess = 0;
707 } else if (NACCESSVALID(np, slot)) {
708 microuptime(&now);
709 if (((now.tv_sec < (np->n_accessstamp[slot] + nfs_access_cache_timeout)) &&
710 ((np->n_access[slot] & access) == access)) || nfs_use_cache(nmp)) {
711 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
712 dorpc = 0;
713 waccess = np->n_access[slot];
714 }
715 }
716 nfs_node_unlock(np);
717 if (dorpc) {
718 /* Either a no, or a don't know. Go to the wire. */
719 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
720
721 /*
722 * Allow an access call to timeout if we have it cached
723 * so we won't hang if the server isn't responding.
724 */
725 if (NACCESSVALID(np, slot)) {
726 rpcflags |= R_SOFT;
727 }
728
729 error = nmp->nm_funcs->nf_access_rpc(np, &waccess, rpcflags, ctx);
730
731 /*
732 * If the server didn't respond return the cached access.
733 */
734 if ((error == ETIMEDOUT) && (rpcflags & R_SOFT)) {
735 error = 0;
736 waccess = np->n_access[slot];
737 }
738 }
739 if (!error && ((waccess & access) != access)) {
740 error = EACCES;
741 }
742
743 return error;
744 }
745
746
747 /*
748 * NFS open vnode op
749 *
750 * Perform various update/invalidation checks and then add the
751 * open to the node. Regular files will have an open file structure
752 * on the node and, for NFSv4, perform an OPEN request on the server.
753 */
754 int
755 nfs_vnop_open(
756 struct vnop_open_args /* {
757 * struct vnodeop_desc *a_desc;
758 * vnode_t a_vp;
759 * int a_mode;
760 * vfs_context_t a_context;
761 * } */*ap)
762 {
763 vfs_context_t ctx = ap->a_context;
764 vnode_t vp = ap->a_vp;
765 nfsnode_t np = VTONFS(vp);
766 struct nfsmount *nmp;
767 int error, accessMode, denyMode, opened = 0;
768 struct nfs_open_owner *noop = NULL;
769 struct nfs_open_file *nofp = NULL;
770 enum vtype vtype;
771
772 if (!(ap->a_mode & (FREAD | FWRITE))) {
773 return EINVAL;
774 }
775
776 nmp = VTONMP(vp);
777 if (nfs_mount_gone(nmp)) {
778 return ENXIO;
779 }
780 if (np->n_flag & NREVOKE) {
781 return EIO;
782 }
783
784 vtype = vnode_vtype(vp);
785 if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK)) {
786 return EACCES;
787 }
788
789 /* First, check if we need to update/invalidate */
790 if (ISSET(np->n_flag, NUPDATESIZE)) {
791 nfs_data_update_size(np, 0);
792 }
793 if ((error = nfs_node_lock(np))) {
794 return error;
795 }
796 if (np->n_flag & NNEEDINVALIDATE) {
797 np->n_flag &= ~NNEEDINVALIDATE;
798 if (vtype == VDIR) {
799 nfs_invaldir(np);
800 }
801 nfs_node_unlock(np);
802 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
803 if ((error = nfs_node_lock(np))) {
804 return error;
805 }
806 }
807 if (vtype == VREG) {
808 np->n_lastrahead = -1;
809 }
810 if (np->n_flag & NMODIFIED) {
811 if (vtype == VDIR) {
812 nfs_invaldir(np);
813 }
814 nfs_node_unlock(np);
815 if ((error = nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1))) {
816 return error;
817 }
818 } else {
819 nfs_node_unlock(np);
820 }
821
822 /* nfs_getattr() will check changed and purge caches */
823 if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED))) {
824 return error;
825 }
826
827 if (vtype != VREG) {
828 /* Just mark that it was opened */
829 lck_mtx_lock(&np->n_openlock);
830 np->n_openrefcnt++;
831 lck_mtx_unlock(&np->n_openlock);
832 return 0;
833 }
834
835 /* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */
836 accessMode = 0;
837 if (ap->a_mode & FREAD) {
838 accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
839 }
840 if (ap->a_mode & FWRITE) {
841 accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
842 }
843 if (ap->a_mode & O_EXLOCK) {
844 denyMode = NFS_OPEN_SHARE_DENY_BOTH;
845 } else if (ap->a_mode & O_SHLOCK) {
846 denyMode = NFS_OPEN_SHARE_DENY_WRITE;
847 } else {
848 denyMode = NFS_OPEN_SHARE_DENY_NONE;
849 }
850 // XXX don't do deny modes just yet (and never do it for !v4)
851 denyMode = NFS_OPEN_SHARE_DENY_NONE;
852
853 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
854 if (!noop) {
855 return ENOMEM;
856 }
857
858 restart:
859 error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
860 if (error) {
861 nfs_open_owner_rele(noop);
862 return error;
863 }
864 if (np->n_flag & NREVOKE) {
865 error = EIO;
866 nfs_mount_state_in_use_end(nmp, 0);
867 nfs_open_owner_rele(noop);
868 return error;
869 }
870
871 error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1);
872 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
873 NP(np, "nfs_vnop_open: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
874 error = EIO;
875 }
876 #if CONFIG_NFS4
877 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
878 nfs_mount_state_in_use_end(nmp, 0);
879 error = nfs4_reopen(nofp, vfs_context_thread(ctx));
880 nofp = NULL;
881 if (!error) {
882 goto restart;
883 }
884 }
885 #endif
886 if (!error) {
887 error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
888 }
889 if (error) {
890 nofp = NULL;
891 goto out;
892 }
893
894 if (nmp->nm_vers < NFS_VER4) {
895 /*
896 * NFS v2/v3 opens are always allowed - so just add it.
897 */
898 nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
899 goto out;
900 }
901
902 /*
903 * If we just created the file and the modes match, then we simply use
904 * the open performed in the create. Otherwise, send the request.
905 */
906 if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
907 (nofp->nof_creator == current_thread()) &&
908 (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) &&
909 (denyMode == NFS_OPEN_SHARE_DENY_NONE)) {
910 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
911 nofp->nof_creator = NULL;
912 } else {
913 #if CONFIG_NFS4
914 if (!opened) {
915 error = nfs4_open(np, nofp, accessMode, denyMode, ctx);
916 }
917 #endif
918 if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
919 (nofp->nof_creator == current_thread())) {
920 /*
921 * Ugh. This can happen if we just created the file with read-only
922 * perms and we're trying to open it for real with different modes
923 * (e.g. write-only or with a deny mode) and the server decides to
924 * not allow the second open because of the read-only perms.
925 * The best we can do is to just use the create's open.
926 * We may have access we don't need or we may not have a requested
927 * deny mode. We may log complaints later, but we'll try to avoid it.
928 */
929 if (denyMode != NFS_OPEN_SHARE_DENY_NONE) {
930 NP(np, "nfs_vnop_open: deny mode foregone on create, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
931 }
932 nofp->nof_creator = NULL;
933 error = 0;
934 }
935 if (error) {
936 goto out;
937 }
938 opened = 1;
939 /*
940 * If we had just created the file, we already had it open.
941 * If the actual open mode is less than what we grabbed at
942 * create time, then we'll downgrade the open here.
943 */
944 if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
945 (nofp->nof_creator == current_thread())) {
946 error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
947 if (error) {
948 NP(np, "nfs_vnop_open: create close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
949 }
950 if (!nfs_mount_state_error_should_restart(error)) {
951 error = 0;
952 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
953 }
954 }
955 }
956
957 out:
958 if (nofp) {
959 nfs_open_file_clear_busy(nofp);
960 }
961 if (nfs_mount_state_in_use_end(nmp, error)) {
962 nofp = NULL;
963 goto restart;
964 }
965 if (error) {
966 NP(np, "nfs_vnop_open: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
967 }
968 if (noop) {
969 nfs_open_owner_rele(noop);
970 }
971 if (!error && vtype == VREG && (ap->a_mode & FWRITE)) {
972 lck_mtx_lock(&nmp->nm_lock);
973 nmp->nm_state &= ~NFSSTA_SQUISHY;
974 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
975 if (nmp->nm_curdeadtimeout <= 0) {
976 nmp->nm_deadto_start = 0;
977 }
978 nmp->nm_writers++;
979 lck_mtx_unlock(&nmp->nm_lock);
980 }
981
982 return error;
983 }
984
985 static uint32_t
986 nfs_no_of_open_file_writers(nfsnode_t np)
987 {
988 uint32_t writers = 0;
989 struct nfs_open_file *nofp;
990
991 TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
992 writers += nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw + nofp->nof_rw_dw +
993 nofp->nof_w_drw + nofp->nof_rw_drw + nofp->nof_d_w_dw +
994 nofp->nof_d_rw_dw + nofp->nof_d_w_drw + nofp->nof_d_rw_drw +
995 nofp->nof_d_w + nofp->nof_d_rw;
996 }
997
998 return writers;
999 }
1000
1001 /*
1002 * NFS close vnode op
1003 *
1004 * What an NFS client should do upon close after writing is a debatable issue.
1005 * Most NFS clients push delayed writes to the server upon close, basically for
1006 * two reasons:
1007 * 1 - So that any write errors may be reported back to the client process
1008 * doing the close system call. By far the two most likely errors are
1009 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
1010 * 2 - To put a worst case upper bound on cache inconsistency between
1011 * multiple clients for the file.
1012 * There is also a consistency problem for Version 2 of the protocol w.r.t.
1013 * not being able to tell if other clients are writing a file concurrently,
1014 * since there is no way of knowing if the changed modify time in the reply
1015 * is only due to the write for this client.
1016 * (NFS Version 3 provides weak cache consistency data in the reply that
1017 * should be sufficient to detect and handle this case.)
1018 *
1019 * The current code does the following:
1020 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
1021 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate them.
1022 * for NFS Version 4 - basically the same as NFSv3
1023 */
1024 int
1025 nfs_vnop_close(
1026 struct vnop_close_args /* {
1027 * struct vnodeop_desc *a_desc;
1028 * vnode_t a_vp;
1029 * int a_fflag;
1030 * vfs_context_t a_context;
1031 * } */*ap)
1032 {
1033 vfs_context_t ctx = ap->a_context;
1034 vnode_t vp = ap->a_vp;
1035 nfsnode_t np = VTONFS(vp);
1036 struct nfsmount *nmp;
1037 int error = 0, error1, nfsvers;
1038 int fflag = ap->a_fflag;
1039 enum vtype vtype;
1040 int accessMode, denyMode;
1041 struct nfs_open_owner *noop = NULL;
1042 struct nfs_open_file *nofp = NULL;
1043
1044 nmp = VTONMP(vp);
1045 if (!nmp) {
1046 return ENXIO;
1047 }
1048 nfsvers = nmp->nm_vers;
1049 vtype = vnode_vtype(vp);
1050
1051 /* First, check if we need to update/flush/invalidate */
1052 if (ISSET(np->n_flag, NUPDATESIZE)) {
1053 nfs_data_update_size(np, 0);
1054 }
1055 nfs_node_lock_force(np);
1056 if (np->n_flag & NNEEDINVALIDATE) {
1057 np->n_flag &= ~NNEEDINVALIDATE;
1058 nfs_node_unlock(np);
1059 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
1060 nfs_node_lock_force(np);
1061 }
1062 if ((vtype == VREG) && (np->n_flag & NMODIFIED) && (fflag & FWRITE)) {
1063 /* we're closing an open for write and the file is modified, so flush it */
1064 nfs_node_unlock(np);
1065 if (nfsvers != NFS_VER2) {
1066 error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0);
1067 } else {
1068 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1069 }
1070 nfs_node_lock_force(np);
1071 NATTRINVALIDATE(np);
1072 }
1073 if (np->n_flag & NWRITEERR) {
1074 np->n_flag &= ~NWRITEERR;
1075 error = np->n_error;
1076 }
1077 nfs_node_unlock(np);
1078
1079 if (vtype != VREG) {
1080 /* Just mark that it was closed */
1081 lck_mtx_lock(&np->n_openlock);
1082 if (np->n_openrefcnt == 0) {
1083 if (fflag & (FREAD | FWRITE)) {
1084 NP(np, "nfs_vnop_close: open reference underrun");
1085 error = EINVAL;
1086 }
1087 } else if (fflag & (FREAD | FWRITE)) {
1088 np->n_openrefcnt--;
1089 } else {
1090 /* No FREAD/FWRITE set - probably the final close */
1091 np->n_openrefcnt = 0;
1092 }
1093 lck_mtx_unlock(&np->n_openlock);
1094 return error;
1095 }
1096 error1 = error;
1097
1098 /* fflag should contain some combination of: FREAD, FWRITE, FHASLOCK */
1099 accessMode = 0;
1100 if (fflag & FREAD) {
1101 accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
1102 }
1103 if (fflag & FWRITE) {
1104 accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
1105 }
1106 // XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open
1107 // if (fflag & O_EXLOCK)
1108 // denyMode = NFS_OPEN_SHARE_DENY_BOTH;
1109 // else if (fflag & O_SHLOCK)
1110 // denyMode = NFS_OPEN_SHARE_DENY_WRITE;
1111 // else
1112 // denyMode = NFS_OPEN_SHARE_DENY_NONE;
1113 #if 0 // Not yet
1114 if (fflag & FHASLOCK) {
1115 /* XXX assume FHASLOCK is for the deny mode and not flock */
1116 /* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */
1117 if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) {
1118 denyMode = NFS_OPEN_SHARE_DENY_BOTH;
1119 } else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE) {
1120 denyMode = NFS_OPEN_SHARE_DENY_WRITE;
1121 } else {
1122 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1123 }
1124 } else {
1125 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1126 }
1127 #else
1128 // XXX don't do deny modes just yet (and never do it for !v4)
1129 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1130 #endif
1131
1132 if (!accessMode) {
1133 /*
1134 * No mode given to close?
1135 * Guess this is the final close.
1136 * We should unlock all locks and close all opens.
1137 */
1138 uint32_t writers;
1139 mount_t mp = vnode_mount(vp);
1140 int force = (!mp || vfs_isforce(mp));
1141
1142 writers = nfs_no_of_open_file_writers(np);
1143 nfs_release_open_state_for_node(np, force);
1144 if (writers) {
1145 lck_mtx_lock(&nmp->nm_lock);
1146 if (writers > nmp->nm_writers) {
1147 NP(np, "nfs_vnop_close: number of write opens for mount underrun. Node has %d"
1148 " opens for write. Mount has total of %d opens for write\n",
1149 writers, nmp->nm_writers);
1150 nmp->nm_writers = 0;
1151 } else {
1152 nmp->nm_writers -= writers;
1153 }
1154 lck_mtx_unlock(&nmp->nm_lock);
1155 }
1156
1157 return error;
1158 } else if (fflag & FWRITE) {
1159 lck_mtx_lock(&nmp->nm_lock);
1160 if (nmp->nm_writers == 0) {
1161 NP(np, "nfs_vnop_close: removing open writer from mount, but mount has no files open for writing");
1162 } else {
1163 nmp->nm_writers--;
1164 }
1165 lck_mtx_unlock(&nmp->nm_lock);
1166 }
1167
1168
1169 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
1170 if (!noop) {
1171 // printf("nfs_vnop_close: can't get open owner!\n");
1172 return EIO;
1173 }
1174
1175 restart:
1176 error = nfs_mount_state_in_use_start(nmp, NULL);
1177 if (error) {
1178 nfs_open_owner_rele(noop);
1179 return error;
1180 }
1181
1182 error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
1183 #if CONFIG_NFS4
1184 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
1185 nfs_mount_state_in_use_end(nmp, 0);
1186 error = nfs4_reopen(nofp, NULL);
1187 nofp = NULL;
1188 if (!error) {
1189 goto restart;
1190 }
1191 }
1192 #endif
1193 if (error) {
1194 NP(np, "nfs_vnop_close: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1195 error = EBADF;
1196 goto out;
1197 }
1198 error = nfs_open_file_set_busy(nofp, NULL);
1199 if (error) {
1200 nofp = NULL;
1201 goto out;
1202 }
1203
1204 error = nfs_close(np, nofp, accessMode, denyMode, ctx);
1205 if (error) {
1206 NP(np, "nfs_vnop_close: close error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1207 }
1208
1209 out:
1210 if (nofp) {
1211 nfs_open_file_clear_busy(nofp);
1212 }
1213 if (nfs_mount_state_in_use_end(nmp, error)) {
1214 nofp = NULL;
1215 goto restart;
1216 }
1217 if (!error) {
1218 error = error1;
1219 }
1220 if (error) {
1221 NP(np, "nfs_vnop_close: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1222 }
1223 if (noop) {
1224 nfs_open_owner_rele(noop);
1225 }
1226 return error;
1227 }
1228
1229 /*
1230 * nfs_close(): common function that does all the heavy lifting of file closure
1231 *
1232 * Takes an open file structure and a set of access/deny modes and figures out how
1233 * to update the open file structure (and the state on the server) appropriately.
1234 */
1235 int
1236 nfs_close(
1237 nfsnode_t np,
1238 struct nfs_open_file *nofp,
1239 uint32_t accessMode,
1240 uint32_t denyMode,
1241 __unused vfs_context_t ctx)
1242 {
1243 #if CONFIG_NFS4
1244 struct nfs_lock_owner *nlop;
1245 #endif
1246 int error = 0, changed = 0, delegated = 0, closed = 0, downgrade = 0;
1247 uint32_t newAccessMode, newDenyMode;
1248
1249 /* warn if modes don't match current state */
1250 if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode)) {
1251 NP(np, "nfs_close: mode mismatch %d %d, current %d %d, %d",
1252 accessMode, denyMode, nofp->nof_access, nofp->nof_deny,
1253 kauth_cred_getuid(nofp->nof_owner->noo_cred));
1254 }
1255
1256 /*
1257 * If we're closing a write-only open, we may not have a write-only count
1258 * if we also grabbed read access. So, check the read-write count.
1259 */
1260 if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
1261 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1262 (nofp->nof_w == 0) && (nofp->nof_d_w == 0) &&
1263 (nofp->nof_rw || nofp->nof_d_rw)) {
1264 accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1265 }
1266 } else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
1267 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1268 (nofp->nof_w_dw == 0) && (nofp->nof_d_w_dw == 0) &&
1269 (nofp->nof_rw_dw || nofp->nof_d_rw_dw)) {
1270 accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1271 }
1272 } else { /* NFS_OPEN_SHARE_DENY_BOTH */
1273 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1274 (nofp->nof_w_drw == 0) && (nofp->nof_d_w_drw == 0) &&
1275 (nofp->nof_rw_drw || nofp->nof_d_rw_drw)) {
1276 accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1277 }
1278 }
1279
1280 nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
1281 if ((newAccessMode != nofp->nof_access) || (newDenyMode != nofp->nof_deny)) {
1282 changed = 1;
1283 } else {
1284 changed = 0;
1285 }
1286
1287 if (NFSTONMP(np)->nm_vers < NFS_VER4) {
1288 /* NFS v2/v3 closes simply need to remove the open. */
1289 goto v3close;
1290 }
1291 #if CONFIG_NFS4
1292 if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) {
1293 /*
1294 * No more access after this close, so clean up and close it.
1295 * Don't send a close RPC if we're closing a delegated open.
1296 */
1297 nfs_wait_bufs(np);
1298 closed = 1;
1299 if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
1300 error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
1301 }
1302 if (error == NFSERR_LOCKS_HELD) {
1303 /*
1304 * Hmm... the server says we have locks we need to release first
1305 * Find the lock owner and try to unlock everything.
1306 */
1307 nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0);
1308 if (nlop) {
1309 nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX,
1310 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
1311 nfs_lock_owner_rele(nlop);
1312 }
1313 error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
1314 }
1315 } else if (changed) {
1316 /*
1317 * File is still open but with less access, so downgrade the open.
1318 * Don't send a downgrade RPC if we're closing a delegated open.
1319 */
1320 if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
1321 downgrade = 1;
1322 /*
1323 * If we have delegated opens, we should probably claim them before sending
1324 * the downgrade because the server may not know the open we are downgrading to.
1325 */
1326 if (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
1327 nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
1328 nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r) {
1329 nfs4_claim_delegated_state_for_open_file(nofp, 0);
1330 }
1331 /* need to remove the open before sending the downgrade */
1332 nfs_open_file_remove_open(nofp, accessMode, denyMode);
1333 error = nfs4_open_downgrade_rpc(np, nofp, ctx);
1334 if (error) { /* Hmm.. that didn't work. Add the open back in. */
1335 nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
1336 }
1337 }
1338 }
1339 #endif
1340 v3close:
1341 if (error) {
1342 NP(np, "nfs_close: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
1343 return error;
1344 }
1345
1346 if (!downgrade) {
1347 nfs_open_file_remove_open(nofp, accessMode, denyMode);
1348 }
1349
1350 if (closed) {
1351 lck_mtx_lock(&nofp->nof_lock);
1352 if (nofp->nof_r || nofp->nof_d_r || nofp->nof_w || nofp->nof_d_w || nofp->nof_d_rw ||
1353 (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) ||
1354 nofp->nof_r_dw || nofp->nof_d_r_dw || nofp->nof_w_dw || nofp->nof_d_w_dw ||
1355 nofp->nof_rw_dw || nofp->nof_d_rw_dw || nofp->nof_r_drw || nofp->nof_d_r_drw ||
1356 nofp->nof_w_drw || nofp->nof_d_w_drw || nofp->nof_rw_drw || nofp->nof_d_rw_drw) {
1357 NP(np, "nfs_close: unexpected count: %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u flags 0x%x, %d",
1358 nofp->nof_r, nofp->nof_d_r, nofp->nof_w, nofp->nof_d_w,
1359 nofp->nof_rw, nofp->nof_d_rw, nofp->nof_r_dw, nofp->nof_d_r_dw,
1360 nofp->nof_w_dw, nofp->nof_d_w_dw, nofp->nof_rw_dw, nofp->nof_d_rw_dw,
1361 nofp->nof_r_drw, nofp->nof_d_r_drw, nofp->nof_w_drw, nofp->nof_d_w_drw,
1362 nofp->nof_rw_drw, nofp->nof_d_rw_drw, nofp->nof_flags,
1363 kauth_cred_getuid(nofp->nof_owner->noo_cred));
1364 }
1365 /* clear out all open info, just to be safe */
1366 nofp->nof_access = nofp->nof_deny = 0;
1367 nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
1368 nofp->nof_r = nofp->nof_d_r = 0;
1369 nofp->nof_w = nofp->nof_d_w = 0;
1370 nofp->nof_rw = nofp->nof_d_rw = 0;
1371 nofp->nof_r_dw = nofp->nof_d_r_dw = 0;
1372 nofp->nof_w_dw = nofp->nof_d_w_dw = 0;
1373 nofp->nof_rw_dw = nofp->nof_d_rw_dw = 0;
1374 nofp->nof_r_drw = nofp->nof_d_r_drw = 0;
1375 nofp->nof_w_drw = nofp->nof_d_w_drw = 0;
1376 nofp->nof_rw_drw = nofp->nof_d_rw_drw = 0;
1377 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
1378 lck_mtx_unlock(&nofp->nof_lock);
1379 /* XXX we may potentially want to clean up idle/unused open file structures */
1380 }
1381 if (nofp->nof_flags & NFS_OPEN_FILE_LOST) {
1382 error = EIO;
1383 NP(np, "nfs_close: LOST%s, %d", !nofp->nof_opencnt ? " (last)" : "",
1384 kauth_cred_getuid(nofp->nof_owner->noo_cred));
1385 }
1386
1387 return error;
1388 }
1389
1390
1391 int
1392 nfs3_getattr_rpc(
1393 nfsnode_t np,
1394 mount_t mp,
1395 u_char *fhp,
1396 size_t fhsize,
1397 int flags,
1398 vfs_context_t ctx,
1399 struct nfs_vattr *nvap,
1400 u_int64_t *xidp)
1401 {
1402 struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
1403 int error = 0, status, nfsvers, rpcflags = 0;
1404 struct nfsm_chain nmreq, nmrep;
1405
1406 if (nfs_mount_gone(nmp)) {
1407 return ENXIO;
1408 }
1409 nfsvers = nmp->nm_vers;
1410
1411 if (flags & NGA_MONITOR) { /* vnode monitor requests should be soft */
1412 rpcflags = R_RECOVER;
1413 }
1414
1415 if (flags & NGA_SOFT) { /* Return ETIMEDOUT if server not responding */
1416 rpcflags |= R_SOFT;
1417 }
1418
1419 nfsm_chain_null(&nmreq);
1420 nfsm_chain_null(&nmrep);
1421
1422 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
1423 if (nfsvers != NFS_VER2) {
1424 nfsm_chain_add_32(error, &nmreq, fhsize);
1425 }
1426 nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
1427 nfsm_chain_build_done(error, &nmreq);
1428 nfsmout_if(error);
1429 error = nfs_request2(np, mp, &nmreq, NFSPROC_GETATTR,
1430 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1431 NULL, rpcflags, &nmrep, xidp, &status);
1432 if (!error) {
1433 error = status;
1434 }
1435 nfsmout_if(error);
1436 error = nfs_parsefattr(nmp, &nmrep, nfsvers, nvap);
1437 nfsmout:
1438 nfsm_chain_cleanup(&nmreq);
1439 nfsm_chain_cleanup(&nmrep);
1440 return error;
1441 }
1442
1443 /*
1444 * nfs_refresh_fh will attempt to update the file handle for the node.
1445 *
1446 * It only does this for symbolic links and regular files that are not currently opened.
1447 *
1448 * On Success returns 0 and the nodes file handle is updated, or ESTALE on failure.
1449 */
1450 int
1451 nfs_refresh_fh(nfsnode_t np, vfs_context_t ctx)
1452 {
1453 vnode_t dvp, vp = NFSTOV(np);
1454 nfsnode_t dnp;
1455 const char *v_name = vnode_getname(vp);
1456 char *name;
1457 int namelen, fhsize, refreshed;
1458 int error, wanted = 0;
1459 uint8_t *fhp;
1460 struct timespec ts = {.tv_sec = 2, .tv_nsec = 0};
1461
1462 NFS_VNOP_DBG("vnode is %d\n", vnode_vtype(vp));
1463
1464 dvp = vnode_parent(vp);
1465 if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VLNK) ||
1466 v_name == NULL || *v_name == '\0' || dvp == NULL) {
1467 if (v_name != NULL) {
1468 vnode_putname(v_name);
1469 }
1470 return ESTALE;
1471 }
1472 dnp = VTONFS(dvp);
1473
1474 namelen = strlen(v_name);
1475 MALLOC(name, char *, namelen + 1, M_TEMP, M_WAITOK);
1476 if (name == NULL) {
1477 vnode_putname(v_name);
1478 return ESTALE;
1479 }
1480 bcopy(v_name, name, namelen + 1);
1481 NFS_VNOP_DBG("Trying to refresh %s : %s\n", v_name, name);
1482 vnode_putname(v_name);
1483
1484 /* Allocate the maximum size file handle */
1485 MALLOC(fhp, uint8_t *, NFS4_FHSIZE, M_TEMP, M_WAITOK);
1486 if (fhp == NULL) {
1487 FREE(name, M_TEMP);
1488 return ESTALE;
1489 }
1490
1491 if ((error = nfs_node_lock(np))) {
1492 FREE(name, M_TEMP);
1493 FREE(fhp, M_TEMP);
1494 return ESTALE;
1495 }
1496
1497 fhsize = np->n_fhsize;
1498 bcopy(np->n_fhp, fhp, fhsize);
1499 while (ISSET(np->n_flag, NREFRESH)) {
1500 SET(np->n_flag, NREFRESHWANT);
1501 NFS_VNOP_DBG("Waiting for refresh of %s\n", name);
1502 msleep(np, &np->n_lock, PZERO - 1, "nfsrefreshwant", &ts);
1503 if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
1504 break;
1505 }
1506 }
1507 refreshed = error ? 0 : !NFS_CMPFH(np, fhp, fhsize);
1508 SET(np->n_flag, NREFRESH);
1509 nfs_node_unlock(np);
1510
1511 NFS_VNOP_DBG("error = %d, refreshed = %d\n", error, refreshed);
1512 if (error || refreshed) {
1513 goto nfsmout;
1514 }
1515
1516 /* Check that there are no open references for this file */
1517 lck_mtx_lock(&np->n_openlock);
1518 if (np->n_openrefcnt || !TAILQ_EMPTY(&np->n_opens) || !TAILQ_EMPTY(&np->n_lock_owners)) {
1519 int cnt = 0;
1520 struct nfs_open_file *ofp;
1521
1522 TAILQ_FOREACH(ofp, &np->n_opens, nof_link) {
1523 cnt += ofp->nof_opencnt;
1524 }
1525 if (cnt) {
1526 lck_mtx_unlock(&np->n_openlock);
1527 NFS_VNOP_DBG("Can not refresh file handle for %s with open state\n", name);
1528 NFS_VNOP_DBG("\topenrefcnt = %d, opens = %d lock_owners = %d\n",
1529 np->n_openrefcnt, cnt, !TAILQ_EMPTY(&np->n_lock_owners));
1530 error = ESTALE;
1531 goto nfsmout;
1532 }
1533 }
1534 lck_mtx_unlock(&np->n_openlock);
1535 /*
1536 * Since the FH is currently stale we should not be able to
1537 * establish any open state until the FH is refreshed.
1538 */
1539
1540 error = nfs_node_lock(np);
1541 nfsmout_if(error);
1542 /*
1543 * Symlinks should never need invalidations and are holding
1544 * the one and only nfsbuf in an uncached acquired state
1545 * trying to do a readlink. So we will hang if we invalidate
1546 * in that case. Only in in the VREG case do we need to
1547 * invalidate.
1548 */
1549 if (vnode_vtype(vp) == VREG) {
1550 np->n_flag &= ~NNEEDINVALIDATE;
1551 nfs_node_unlock(np);
1552 error = nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ctx, 1);
1553 if (error) {
1554 NFS_VNOP_DBG("nfs_vinvalbuf returned %d\n", error);
1555 }
1556 nfsmout_if(error);
1557 } else {
1558 nfs_node_unlock(np);
1559 }
1560
1561 NFS_VNOP_DBG("Looking up %s\n", name);
1562 error = nfs_lookitup(dnp, name, namelen, ctx, &np);
1563 if (error) {
1564 NFS_VNOP_DBG("nfs_lookitup returned %d\n", error);
1565 }
1566
1567 nfsmout:
1568 nfs_node_lock_force(np);
1569 wanted = ISSET(np->n_flag, NREFRESHWANT);
1570 CLR(np->n_flag, NREFRESH | NREFRESHWANT);
1571 nfs_node_unlock(np);
1572 if (wanted) {
1573 wakeup(np);
1574 }
1575
1576 if (error == 0) {
1577 NFS_VNOP_DBG("%s refreshed file handle\n", name);
1578 }
1579
1580 FREE(name, M_TEMP);
1581 FREE(fhp, M_TEMP);
1582
1583 return error ? ESTALE : 0;
1584 }
1585
1586 int
1587 nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
1588 {
1589 int error;
1590
1591 retry:
1592 error = nfs_getattr_internal(np, nvap, ctx, flags);
1593 if (error == ESTALE) {
1594 error = nfs_refresh_fh(np, ctx);
1595 if (!error) {
1596 goto retry;
1597 }
1598 }
1599 return error;
1600 }
1601
1602 int
1603 nfs_getattr_internal(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
1604 {
1605 struct nfsmount *nmp;
1606 int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods;
1607 struct nfs_vattr nvattr;
1608 struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
1609 u_int64_t xid;
1610
1611 FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
1612
1613 nmp = NFSTONMP(np);
1614
1615 if (nfs_mount_gone(nmp)) {
1616 return ENXIO;
1617 }
1618 nfsvers = nmp->nm_vers;
1619
1620 if (!nvap) {
1621 nvap = &nvattr;
1622 }
1623 NVATTR_INIT(nvap);
1624
1625 /* Update local times for special files. */
1626 if (np->n_flag & (NACC | NUPD)) {
1627 nfs_node_lock_force(np);
1628 np->n_flag |= NCHG;
1629 nfs_node_unlock(np);
1630 }
1631 /* Update size, if necessary */
1632 if (ISSET(np->n_flag, NUPDATESIZE)) {
1633 nfs_data_update_size(np, 0);
1634 }
1635
1636 error = nfs_node_lock(np);
1637 nfsmout_if(error);
1638 if (!(flags & (NGA_UNCACHED | NGA_MONITOR)) || ((nfsvers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))) {
1639 /*
1640 * Use the cache or wait for any getattr in progress if:
1641 * - it's a cached request, or
1642 * - we have a delegation, or
1643 * - the server isn't responding
1644 */
1645 while (1) {
1646 error = nfs_getattrcache(np, nvap, flags);
1647 if (!error || (error != ENOENT)) {
1648 nfs_node_unlock(np);
1649 goto nfsmout;
1650 }
1651 error = 0;
1652 if (!ISSET(np->n_flag, NGETATTRINPROG)) {
1653 break;
1654 }
1655 if (flags & NGA_MONITOR) {
1656 /* no need to wait if a request is pending */
1657 error = EINPROGRESS;
1658 nfs_node_unlock(np);
1659 goto nfsmout;
1660 }
1661 SET(np->n_flag, NGETATTRWANT);
1662 msleep(np, &np->n_lock, PZERO - 1, "nfsgetattrwant", &ts);
1663 if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
1664 nfs_node_unlock(np);
1665 goto nfsmout;
1666 }
1667 }
1668 SET(np->n_flag, NGETATTRINPROG);
1669 inprogset = 1;
1670 } else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
1671 SET(np->n_flag, NGETATTRINPROG);
1672 inprogset = 1;
1673 } else if (flags & NGA_MONITOR) {
1674 /* no need to make a request if one is pending */
1675 error = EINPROGRESS;
1676 }
1677 nfs_node_unlock(np);
1678
1679 nmp = NFSTONMP(np);
1680 if (nfs_mount_gone(nmp)) {
1681 error = ENXIO;
1682 }
1683 if (error) {
1684 goto nfsmout;
1685 }
1686
1687 /*
1688 * Return cached attributes if they are valid,
1689 * if the server doesn't respond, and this is
1690 * some softened up style of mount.
1691 */
1692 if (NATTRVALID(np) && nfs_use_cache(nmp)) {
1693 flags |= NGA_SOFT;
1694 }
1695
1696 /*
1697 * We might want to try to get both the attributes and access info by
1698 * making an ACCESS call and seeing if it returns updated attributes.
1699 * But don't bother if we aren't caching access info or if the
1700 * attributes returned wouldn't be cached.
1701 */
1702 if (!(flags & NGA_ACL) && (nfsvers != NFS_VER2) && nfs_access_for_getattr && (nfs_access_cache_timeout > 0)) {
1703 if (nfs_attrcachetimeout(np) > 0) {
1704 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
1705 u_int32_t access = NFS_ACCESS_ALL;
1706 int rpcflags = 0;
1707
1708 /* Return cached attrs if server doesn't respond */
1709 if (flags & NGA_SOFT) {
1710 rpcflags |= R_SOFT;
1711 }
1712
1713 error = nmp->nm_funcs->nf_access_rpc(np, &access, rpcflags, ctx);
1714
1715 if (error == ETIMEDOUT) {
1716 goto returncached;
1717 }
1718
1719 if (error) {
1720 goto nfsmout;
1721 }
1722 nfs_node_lock_force(np);
1723 error = nfs_getattrcache(np, nvap, flags);
1724 nfs_node_unlock(np);
1725 if (!error || (error != ENOENT)) {
1726 goto nfsmout;
1727 }
1728 /* Well, that didn't work... just do a getattr... */
1729 error = 0;
1730 }
1731 }
1732
1733 avoidfloods = 0;
1734
1735 tryagain:
1736 error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid);
1737 if (!error) {
1738 nfs_node_lock_force(np);
1739 error = nfs_loadattrcache(np, nvap, &xid, 0);
1740 nfs_node_unlock(np);
1741 }
1742
1743 /*
1744 * If the server didn't respond, return cached attributes.
1745 */
1746 returncached:
1747 if ((flags & NGA_SOFT) && (error == ETIMEDOUT)) {
1748 nfs_node_lock_force(np);
1749 error = nfs_getattrcache(np, nvap, flags);
1750 if (!error || (error != ENOENT)) {
1751 nfs_node_unlock(np);
1752 goto nfsmout;
1753 }
1754 nfs_node_unlock(np);
1755 }
1756 nfsmout_if(error);
1757
1758 if (!xid) { /* out-of-order rpc - attributes were dropped */
1759 FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
1760 if (avoidfloods++ < 20) {
1761 goto tryagain;
1762 }
1763 /* avoidfloods>1 is bizarre. at 20 pull the plug */
1764 /* just return the last attributes we got */
1765 }
1766 nfsmout:
1767 nfs_node_lock_force(np);
1768 if (inprogset) {
1769 wanted = ISSET(np->n_flag, NGETATTRWANT);
1770 CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT));
1771 }
1772 if (!error) {
1773 /* check if the node changed on us */
1774 vnode_t vp = NFSTOV(np);
1775 enum vtype vtype = vnode_vtype(vp);
1776 if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) {
1777 FSDBG(513, -1, np, 0, np);
1778 np->n_flag &= ~NNEGNCENTRIES;
1779 cache_purge(vp);
1780 np->n_ncgen++;
1781 NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
1782 NFS_VNOP_DBG("Purge directory 0x%llx\n",
1783 (uint64_t)VM_KERNEL_ADDRPERM(vp));
1784 }
1785 if (NFS_CHANGED(nfsvers, np, nvap)) {
1786 FSDBG(513, -1, np, -1, np);
1787 if (vtype == VDIR) {
1788 NFS_VNOP_DBG("Invalidate directory 0x%llx\n",
1789 (uint64_t)VM_KERNEL_ADDRPERM(vp));
1790 nfs_invaldir(np);
1791 }
1792 nfs_node_unlock(np);
1793 if (wanted) {
1794 wakeup(np);
1795 }
1796 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1797 FSDBG(513, -1, np, -2, error);
1798 if (!error) {
1799 nfs_node_lock_force(np);
1800 NFS_CHANGED_UPDATE(nfsvers, np, nvap);
1801 nfs_node_unlock(np);
1802 }
1803 } else {
1804 nfs_node_unlock(np);
1805 if (wanted) {
1806 wakeup(np);
1807 }
1808 }
1809 } else {
1810 nfs_node_unlock(np);
1811 if (wanted) {
1812 wakeup(np);
1813 }
1814 }
1815
1816 if (nvap == &nvattr) {
1817 NVATTR_CLEANUP(nvap);
1818 } else if (!(flags & NGA_ACL)) {
1819 /* make sure we don't return an ACL if it wasn't asked for */
1820 NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
1821 if (nvap->nva_acl) {
1822 kauth_acl_free(nvap->nva_acl);
1823 nvap->nva_acl = NULL;
1824 }
1825 }
1826 FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
1827 return error;
1828 }
1829
1830
1831 /*
1832 * NFS getattr call from vfs.
1833 */
1834
1835 /*
1836 * The attributes we support over the wire.
1837 * We also get fsid but the vfs layer gets it out of the mount
1838 * structure after this calling us so there's no need to return it,
1839 * and Finder expects to call getattrlist just looking for the FSID
1840 * with out hanging on a non responsive server.
1841 */
1842 #define NFS3_SUPPORTED_VATTRS \
1843 (VNODE_ATTR_va_rdev | \
1844 VNODE_ATTR_va_nlink | \
1845 VNODE_ATTR_va_data_size | \
1846 VNODE_ATTR_va_data_alloc | \
1847 VNODE_ATTR_va_uid | \
1848 VNODE_ATTR_va_gid | \
1849 VNODE_ATTR_va_mode | \
1850 VNODE_ATTR_va_modify_time | \
1851 VNODE_ATTR_va_change_time | \
1852 VNODE_ATTR_va_access_time | \
1853 VNODE_ATTR_va_fileid | \
1854 VNODE_ATTR_va_type)
1855
1856
1857 int
1858 nfs3_vnop_getattr(
1859 struct vnop_getattr_args /* {
1860 * struct vnodeop_desc *a_desc;
1861 * vnode_t a_vp;
1862 * struct vnode_attr *a_vap;
1863 * vfs_context_t a_context;
1864 * } */*ap)
1865 {
1866 int error;
1867 struct nfs_vattr nva;
1868 struct vnode_attr *vap = ap->a_vap;
1869 struct nfsmount *nmp;
1870 dev_t rdev;
1871
1872 nmp = VTONMP(ap->a_vp);
1873
1874 /*
1875 * Lets don't go over the wire if we don't support any of the attributes.
1876 * Just fall through at the VFS layer and let it cons up what it needs.
1877 */
1878 /* Return the io size no matter what, since we don't go over the wire for this */
1879 VATTR_RETURN(vap, va_iosize, nfs_iosize);
1880
1881 if ((vap->va_active & NFS3_SUPPORTED_VATTRS) == 0) {
1882 return 0;
1883 }
1884
1885 if (VATTR_IS_ACTIVE(ap->a_vap, va_name)) {
1886 NFS_VNOP_DBG("Getting attrs for 0x%llx, vname is %s\n",
1887 (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
1888 ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
1889 }
1890 error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
1891 if (error) {
1892 return error;
1893 }
1894
1895 /* copy nva to *a_vap */
1896 VATTR_RETURN(vap, va_type, nva.nva_type);
1897 VATTR_RETURN(vap, va_mode, nva.nva_mode);
1898 rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
1899 VATTR_RETURN(vap, va_rdev, rdev);
1900 VATTR_RETURN(vap, va_uid, nva.nva_uid);
1901 VATTR_RETURN(vap, va_gid, nva.nva_gid);
1902 VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
1903 VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
1904 VATTR_RETURN(vap, va_data_size, nva.nva_size);
1905 VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes);
1906 vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
1907 vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
1908 VATTR_SET_SUPPORTED(vap, va_access_time);
1909 vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
1910 vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
1911 VATTR_SET_SUPPORTED(vap, va_modify_time);
1912 vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
1913 vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
1914 VATTR_SET_SUPPORTED(vap, va_change_time);
1915
1916
1917 // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
1918 return error;
1919 }
1920
1921 /*
1922 * NFS setattr call.
1923 */
1924 int
1925 nfs_vnop_setattr(
1926 struct vnop_setattr_args /* {
1927 * struct vnodeop_desc *a_desc;
1928 * vnode_t a_vp;
1929 * struct vnode_attr *a_vap;
1930 * vfs_context_t a_context;
1931 * } */*ap)
1932 {
1933 vfs_context_t ctx = ap->a_context;
1934 vnode_t vp = ap->a_vp;
1935 nfsnode_t np = VTONFS(vp);
1936 struct nfsmount *nmp;
1937 struct vnode_attr *vap = ap->a_vap;
1938 int error = 0;
1939 int biosize, nfsvers, namedattrs;
1940 u_quad_t origsize, vapsize;
1941 struct nfs_dulookup dul;
1942 nfsnode_t dnp = NULL;
1943 int dul_in_progress = 0;
1944 vnode_t dvp = NULL;
1945 const char *vname = NULL;
1946 #if CONFIG_NFS4
1947 struct nfs_open_owner *noop = NULL;
1948 struct nfs_open_file *nofp = NULL;
1949 #endif
1950 nmp = VTONMP(vp);
1951 if (nfs_mount_gone(nmp)) {
1952 return ENXIO;
1953 }
1954 nfsvers = nmp->nm_vers;
1955 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
1956 biosize = nmp->nm_biosize;
1957
1958 /* Disallow write attempts if the filesystem is mounted read-only. */
1959 if (vnode_vfsisrdonly(vp)) {
1960 return EROFS;
1961 }
1962
1963 origsize = np->n_size;
1964 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1965 switch (vnode_vtype(vp)) {
1966 case VDIR:
1967 return EISDIR;
1968 case VCHR:
1969 case VBLK:
1970 case VSOCK:
1971 case VFIFO:
1972 if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
1973 !VATTR_IS_ACTIVE(vap, va_access_time) &&
1974 !VATTR_IS_ACTIVE(vap, va_mode) &&
1975 !VATTR_IS_ACTIVE(vap, va_uid) &&
1976 !VATTR_IS_ACTIVE(vap, va_gid)) {
1977 return 0;
1978 }
1979 VATTR_CLEAR_ACTIVE(vap, va_data_size);
1980 break;
1981 default:
1982 /*
1983 * Disallow write attempts if the filesystem is
1984 * mounted read-only.
1985 */
1986 if (vnode_vfsisrdonly(vp)) {
1987 return EROFS;
1988 }
1989 FSDBG_TOP(512, np->n_size, vap->va_data_size,
1990 np->n_vattr.nva_size, np->n_flag);
1991 /* clear NNEEDINVALIDATE, if set */
1992 if ((error = nfs_node_lock(np))) {
1993 return error;
1994 }
1995 if (np->n_flag & NNEEDINVALIDATE) {
1996 np->n_flag &= ~NNEEDINVALIDATE;
1997 }
1998 nfs_node_unlock(np);
1999 /* flush everything */
2000 error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0), ctx, 1);
2001 if (error) {
2002 NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
2003 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
2004 return error;
2005 }
2006 #if CONFIG_NFS4
2007 if (nfsvers >= NFS_VER4) {
2008 /* setting file size requires having the file open for write access */
2009 if (np->n_flag & NREVOKE) {
2010 return EIO;
2011 }
2012 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
2013 if (!noop) {
2014 return ENOMEM;
2015 }
2016 restart:
2017 error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
2018 if (error) {
2019 return error;
2020 }
2021 if (np->n_flag & NREVOKE) {
2022 nfs_mount_state_in_use_end(nmp, 0);
2023 return EIO;
2024 }
2025 error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
2026 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
2027 error = EIO;
2028 }
2029 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
2030 nfs_mount_state_in_use_end(nmp, 0);
2031 error = nfs4_reopen(nofp, vfs_context_thread(ctx));
2032 nofp = NULL;
2033 if (!error) {
2034 goto restart;
2035 }
2036 }
2037 if (!error) {
2038 error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
2039 }
2040 if (error) {
2041 nfs_open_owner_rele(noop);
2042 return error;
2043 }
2044 if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
2045 /* we don't have the file open for write access, so open it */
2046 error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
2047 if (!error) {
2048 nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
2049 }
2050 if (nfs_mount_state_error_should_restart(error)) {
2051 nfs_open_file_clear_busy(nofp);
2052 nofp = NULL;
2053 if (nfs_mount_state_in_use_end(nmp, error)) {
2054 goto restart;
2055 }
2056 }
2057 }
2058 }
2059 #endif
2060 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2061 if (np->n_size > vap->va_data_size) { /* shrinking? */
2062 daddr64_t obn, bn;
2063 int neweofoff, mustwrite;
2064 struct nfsbuf *bp;
2065
2066 obn = (np->n_size - 1) / biosize;
2067 bn = vap->va_data_size / biosize;
2068 for (; obn >= bn; obn--) {
2069 if (!nfs_buf_is_incore(np, obn)) {
2070 continue;
2071 }
2072 error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
2073 if (error) {
2074 continue;
2075 }
2076 if (obn != bn) {
2077 FSDBG(512, bp, bp->nb_flags, 0, obn);
2078 SET(bp->nb_flags, NB_INVAL);
2079 nfs_buf_release(bp, 1);
2080 continue;
2081 }
2082 mustwrite = 0;
2083 neweofoff = vap->va_data_size - NBOFF(bp);
2084 /* check for any dirty data before the new EOF */
2085 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
2086 /* clip dirty range to EOF */
2087 if (bp->nb_dirtyend > neweofoff) {
2088 bp->nb_dirtyend = neweofoff;
2089 if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
2090 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2091 }
2092 }
2093 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
2094 mustwrite++;
2095 }
2096 }
2097 bp->nb_dirty &= (1 << round_page_32(neweofoff) / PAGE_SIZE) - 1;
2098 if (bp->nb_dirty) {
2099 mustwrite++;
2100 }
2101 if (!mustwrite) {
2102 FSDBG(512, bp, bp->nb_flags, 0, obn);
2103 SET(bp->nb_flags, NB_INVAL);
2104 nfs_buf_release(bp, 1);
2105 continue;
2106 }
2107 /* gotta write out dirty data before invalidating */
2108 /* (NB_STABLE indicates that data writes should be FILESYNC) */
2109 /* (NB_NOCACHE indicates buffer should be discarded) */
2110 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
2111 SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
2112 if (!IS_VALID_CRED(bp->nb_wcred)) {
2113 kauth_cred_t cred = vfs_context_ucred(ctx);
2114 kauth_cred_ref(cred);
2115 bp->nb_wcred = cred;
2116 }
2117 error = nfs_buf_write(bp);
2118 // Note: bp has been released
2119 if (error) {
2120 FSDBG(512, bp, 0xd00dee, 0xbad, error);
2121 nfs_node_lock_force(np);
2122 np->n_error = error;
2123 np->n_flag |= NWRITEERR;
2124 /*
2125 * There was a write error and we need to
2126 * invalidate attrs and flush buffers in
2127 * order to sync up with the server.
2128 * (if this write was extending the file,
2129 * we may no longer know the correct size)
2130 */
2131 NATTRINVALIDATE(np);
2132 nfs_node_unlock(np);
2133 nfs_data_unlock(np);
2134 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
2135 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2136 error = 0;
2137 }
2138 }
2139 }
2140 if (vap->va_data_size != np->n_size) {
2141 ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
2142 }
2143 origsize = np->n_size;
2144 np->n_size = np->n_vattr.nva_size = vap->va_data_size;
2145 nfs_node_lock_force(np);
2146 CLR(np->n_flag, NUPDATESIZE);
2147 nfs_node_unlock(np);
2148 FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
2149 }
2150 } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
2151 VATTR_IS_ACTIVE(vap, va_access_time) ||
2152 (vap->va_vaflags & VA_UTIMES_NULL)) {
2153 if ((error = nfs_node_lock(np))) {
2154 return error;
2155 }
2156 if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
2157 nfs_node_unlock(np);
2158 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
2159 if (error == EINTR) {
2160 return error;
2161 }
2162 } else {
2163 nfs_node_unlock(np);
2164 }
2165 }
2166 if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
2167 VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
2168 !(error = nfs_node_lock(np))) {
2169 NACCESSINVALIDATE(np);
2170 nfs_node_unlock(np);
2171 if (!namedattrs) {
2172 dvp = vnode_getparent(vp);
2173 vname = vnode_getname(vp);
2174 dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
2175 if (dnp) {
2176 if (nfs_node_set_busy(dnp, vfs_context_thread(ctx))) {
2177 vnode_put(dvp);
2178 vnode_putname(vname);
2179 } else {
2180 nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
2181 nfs_dulookup_start(&dul, dnp, ctx);
2182 dul_in_progress = 1;
2183 }
2184 } else {
2185 if (dvp) {
2186 vnode_put(dvp);
2187 }
2188 if (vname) {
2189 vnode_putname(vname);
2190 }
2191 }
2192 }
2193 }
2194
2195 if (!error) {
2196 error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
2197 }
2198
2199 if (dul_in_progress) {
2200 nfs_dulookup_finish(&dul, dnp, ctx);
2201 nfs_node_clear_busy(dnp);
2202 vnode_put(dvp);
2203 vnode_putname(vname);
2204 }
2205
2206 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
2207 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
2208 if (error && (origsize != np->n_size) &&
2209 ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
2210 /* make every effort to resync file size w/ server... */
2211 /* (don't bother if we'll be restarting the operation) */
2212 int err; /* preserve "error" for return */
2213 np->n_size = np->n_vattr.nva_size = origsize;
2214 nfs_node_lock_force(np);
2215 CLR(np->n_flag, NUPDATESIZE);
2216 nfs_node_unlock(np);
2217 FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
2218 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
2219 vapsize = vap->va_data_size;
2220 vap->va_data_size = origsize;
2221 err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
2222 if (err) {
2223 NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
2224 }
2225 vap->va_data_size = vapsize;
2226 }
2227 nfs_node_lock_force(np);
2228 /*
2229 * The size was just set. If the size is already marked for update, don't
2230 * trust the newsize (it may have been set while the setattr was in progress).
2231 * Clear the update flag and make sure we fetch new attributes so we are sure
2232 * we have the latest size.
2233 */
2234 if (ISSET(np->n_flag, NUPDATESIZE)) {
2235 CLR(np->n_flag, NUPDATESIZE);
2236 NATTRINVALIDATE(np);
2237 nfs_node_unlock(np);
2238 nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
2239 } else {
2240 nfs_node_unlock(np);
2241 }
2242 nfs_data_unlock(np);
2243 #if CONFIG_NFS4
2244 if (nfsvers >= NFS_VER4) {
2245 if (nofp) {
2246 /* don't close our setattr open if we'll be restarting... */
2247 if (!nfs_mount_state_error_should_restart(error) &&
2248 (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
2249 int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
2250 if (err) {
2251 NP(np, "nfs_vnop_setattr: close error: %d", err);
2252 }
2253 nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
2254 }
2255 nfs_open_file_clear_busy(nofp);
2256 nofp = NULL;
2257 }
2258 if (nfs_mount_state_in_use_end(nmp, error)) {
2259 goto restart;
2260 }
2261 nfs_open_owner_rele(noop);
2262 }
2263 #endif
2264 }
2265 return error;
2266 }
2267
2268 /*
2269 * Do an NFS setattr RPC.
2270 */
2271 int
2272 nfs3_setattr_rpc(
2273 nfsnode_t np,
2274 struct vnode_attr *vap,
2275 vfs_context_t ctx)
2276 {
2277 struct nfsmount *nmp = NFSTONMP(np);
2278 int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers;
2279 u_int64_t xid, nextxid;
2280 struct nfsm_chain nmreq, nmrep;
2281
2282 if (nfs_mount_gone(nmp)) {
2283 return ENXIO;
2284 }
2285 nfsvers = nmp->nm_vers;
2286
2287 VATTR_SET_SUPPORTED(vap, va_mode);
2288 VATTR_SET_SUPPORTED(vap, va_uid);
2289 VATTR_SET_SUPPORTED(vap, va_gid);
2290 VATTR_SET_SUPPORTED(vap, va_data_size);
2291 VATTR_SET_SUPPORTED(vap, va_access_time);
2292 VATTR_SET_SUPPORTED(vap, va_modify_time);
2293
2294
2295 if (VATTR_IS_ACTIVE(vap, va_flags)
2296 ) {
2297 if (vap->va_flags) { /* we don't support setting flags */
2298 if (vap->va_active & ~VNODE_ATTR_va_flags) {
2299 return EINVAL; /* return EINVAL if other attributes also set */
2300 } else {
2301 return ENOTSUP; /* return ENOTSUP for chflags(2) */
2302 }
2303 }
2304 /* no flags set, so we'll just ignore it */
2305 if (!(vap->va_active & ~VNODE_ATTR_va_flags)) {
2306 return 0; /* no (other) attributes to set, so nothing to do */
2307 }
2308 }
2309
2310 nfsm_chain_null(&nmreq);
2311 nfsm_chain_null(&nmrep);
2312
2313 nfsm_chain_build_alloc_init(error, &nmreq,
2314 NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
2315 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2316 if (nfsvers == NFS_VER3) {
2317 if (VATTR_IS_ACTIVE(vap, va_mode)) {
2318 nfsm_chain_add_32(error, &nmreq, TRUE);
2319 nfsm_chain_add_32(error, &nmreq, vap->va_mode);
2320 } else {
2321 nfsm_chain_add_32(error, &nmreq, FALSE);
2322 }
2323 if (VATTR_IS_ACTIVE(vap, va_uid)) {
2324 nfsm_chain_add_32(error, &nmreq, TRUE);
2325 nfsm_chain_add_32(error, &nmreq, vap->va_uid);
2326 } else {
2327 nfsm_chain_add_32(error, &nmreq, FALSE);
2328 }
2329 if (VATTR_IS_ACTIVE(vap, va_gid)) {
2330 nfsm_chain_add_32(error, &nmreq, TRUE);
2331 nfsm_chain_add_32(error, &nmreq, vap->va_gid);
2332 } else {
2333 nfsm_chain_add_32(error, &nmreq, FALSE);
2334 }
2335 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
2336 nfsm_chain_add_32(error, &nmreq, TRUE);
2337 nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
2338 } else {
2339 nfsm_chain_add_32(error, &nmreq, FALSE);
2340 }
2341 if (vap->va_vaflags & VA_UTIMES_NULL) {
2342 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
2343 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
2344 } else {
2345 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
2346 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
2347 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
2348 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
2349 } else {
2350 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
2351 }
2352 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
2353 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
2354 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
2355 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
2356 } else {
2357 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
2358 }
2359 }
2360 nfsm_chain_add_32(error, &nmreq, FALSE);
2361 } else {
2362 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
2363 vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
2364 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
2365 vap->va_uid : (uint32_t)-1);
2366 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
2367 vap->va_gid : (uint32_t)-1);
2368 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
2369 vap->va_data_size : (uint32_t)-1);
2370 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
2371 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
2372 nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
2373 ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
2374 } else {
2375 nfsm_chain_add_32(error, &nmreq, -1);
2376 nfsm_chain_add_32(error, &nmreq, -1);
2377 }
2378 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
2379 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
2380 nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
2381 ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
2382 } else {
2383 nfsm_chain_add_32(error, &nmreq, -1);
2384 nfsm_chain_add_32(error, &nmreq, -1);
2385 }
2386 }
2387 nfsm_chain_build_done(error, &nmreq);
2388 nfsmout_if(error);
2389 error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
2390 if ((lockerror = nfs_node_lock(np))) {
2391 error = lockerror;
2392 }
2393 if (nfsvers == NFS_VER3) {
2394 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
2395 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
2396 nfsmout_if(error);
2397 /* if file hadn't changed, update cached mtime */
2398 if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
2399 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
2400 }
2401 /* if directory hadn't changed, update namecache mtime */
2402 if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
2403 nfstimespeccmp(&np->n_ncmtime, &premtime, ==)) {
2404 NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
2405 }
2406 if (!wccpostattr) {
2407 NATTRINVALIDATE(np);
2408 }
2409 error = status;
2410 } else {
2411 if (!error) {
2412 error = status;
2413 }
2414 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
2415 }
2416 /*
2417 * We just changed the attributes and we want to make sure that we
2418 * see the latest attributes. Get the next XID. If it's not the
2419 * next XID after the SETATTR XID, then it's possible that another
2420 * RPC was in flight at the same time and it might put stale attributes
2421 * in the cache. In that case, we invalidate the attributes and set
2422 * the attribute cache XID to guarantee that newer attributes will
2423 * get loaded next.
2424 */
2425 nextxid = 0;
2426 nfs_get_xid(&nextxid);
2427 if (nextxid != (xid + 1)) {
2428 np->n_xid = nextxid;
2429 NATTRINVALIDATE(np);
2430 }
2431 nfsmout:
2432 if (!lockerror) {
2433 nfs_node_unlock(np);
2434 }
2435 nfsm_chain_cleanup(&nmreq);
2436 nfsm_chain_cleanup(&nmrep);
2437 return error;
2438 }
2439
2440 /*
2441 * NFS lookup call, one step at a time...
2442 * First look in cache
2443 * If not found, unlock the directory nfsnode and do the RPC
2444 */
2445 int
2446 nfs_vnop_lookup(
2447 struct vnop_lookup_args /* {
2448 * struct vnodeop_desc *a_desc;
2449 * vnode_t a_dvp;
2450 * vnode_t *a_vpp;
2451 * struct componentname *a_cnp;
2452 * vfs_context_t a_context;
2453 * } */*ap)
2454 {
2455 vfs_context_t ctx = ap->a_context;
2456 struct componentname *cnp = ap->a_cnp;
2457 vnode_t dvp = ap->a_dvp;
2458 vnode_t *vpp = ap->a_vpp;
2459 int flags = cnp->cn_flags;
2460 vnode_t newvp;
2461 nfsnode_t dnp, np;
2462 struct nfsmount *nmp;
2463 mount_t mp;
2464 int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
2465 u_int64_t xid;
2466 struct nfs_vattr nvattr;
2467 int ngflags;
2468 struct vnop_access_args naa;
2469 fhandle_t fh;
2470 struct nfsreq rq, *req = &rq;
2471
2472 *vpp = NULLVP;
2473
2474 dnp = VTONFS(dvp);
2475 NVATTR_INIT(&nvattr);
2476
2477 mp = vnode_mount(dvp);
2478 nmp = VFSTONFS(mp);
2479 if (nfs_mount_gone(nmp)) {
2480 error = ENXIO;
2481 goto error_return;
2482 }
2483 nfsvers = nmp->nm_vers;
2484 negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
2485
2486 if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)))) {
2487 goto error_return;
2488 }
2489 /* nfs_getattr() will check changed and purge caches */
2490 if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED))) {
2491 goto error_return;
2492 }
2493
2494 error = cache_lookup(dvp, vpp, cnp);
2495 switch (error) {
2496 case ENOENT:
2497 /* negative cache entry */
2498 goto error_return;
2499 case 0:
2500 /* cache miss */
2501 if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
2502 /* if rdirplus, try dir buf cache lookup */
2503 error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0);
2504 if (!error && np) {
2505 /* dir buf cache hit */
2506 *vpp = NFSTOV(np);
2507 error = -1;
2508 }
2509 }
2510 if (error != -1) { /* cache miss */
2511 break;
2512 }
2513 /* FALLTHROUGH */
2514 case -1:
2515 /* cache hit, not really an error */
2516 OSAddAtomic64(1, &nfsstats.lookupcache_hits);
2517
2518 nfs_node_clear_busy(dnp);
2519 busyerror = ENOENT;
2520
2521 /* check for directory access */
2522 naa.a_desc = &vnop_access_desc;
2523 naa.a_vp = dvp;
2524 naa.a_action = KAUTH_VNODE_SEARCH;
2525 naa.a_context = ctx;
2526
2527 /* compute actual success/failure based on accessibility */
2528 error = nfs_vnop_access(&naa);
2529 /* FALLTHROUGH */
2530 default:
2531 /* unexpected error from cache_lookup */
2532 goto error_return;
2533 }
2534
2535 /* skip lookup, if we know who we are: "." or ".." */
2536 isdot = isdotdot = 0;
2537 if (cnp->cn_nameptr[0] == '.') {
2538 if (cnp->cn_namelen == 1) {
2539 isdot = 1;
2540 }
2541 if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.')) {
2542 isdotdot = 1;
2543 }
2544 }
2545 if (isdotdot || isdot) {
2546 fh.fh_len = 0;
2547 goto found;
2548 }
2549 #if CONFIG_NFS4
2550 if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
2551 /* we should never be looking things up in a trigger directory, return nothing */
2552 error = ENOENT;
2553 goto error_return;
2554 }
2555 #endif
2556
2557 /* do we know this name is too long? */
2558 nmp = VTONMP(dvp);
2559 if (nfs_mount_gone(nmp)) {
2560 error = ENXIO;
2561 goto error_return;
2562 }
2563 if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
2564 (cnp->cn_namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
2565 error = ENAMETOOLONG;
2566 goto error_return;
2567 }
2568
2569 error = 0;
2570 newvp = NULLVP;
2571
2572 OSAddAtomic64(1, &nfsstats.lookupcache_misses);
2573
2574 error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
2575 nfsmout_if(error);
2576 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, &fh, &nvattr);
2577 nfsmout_if(error);
2578
2579 /* is the file handle the same as this directory's file handle? */
2580 isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len);
2581
2582 found:
2583 if (flags & ISLASTCN) {
2584 switch (cnp->cn_nameiop) {
2585 case DELETE:
2586 cnp->cn_flags &= ~MAKEENTRY;
2587 break;
2588 case RENAME:
2589 cnp->cn_flags &= ~MAKEENTRY;
2590 if (isdot) {
2591 error = EISDIR;
2592 goto error_return;
2593 }
2594 break;
2595 }
2596 }
2597
2598 if (isdotdot) {
2599 newvp = vnode_getparent(dvp);
2600 if (!newvp) {
2601 error = ENOENT;
2602 goto error_return;
2603 }
2604 } else if (isdot) {
2605 error = vnode_get(dvp);
2606 if (error) {
2607 goto error_return;
2608 }
2609 newvp = dvp;
2610 nfs_node_lock_force(dnp);
2611 if (fh.fh_len && (dnp->n_xid <= xid)) {
2612 nfs_loadattrcache(dnp, &nvattr, &xid, 0);
2613 }
2614 nfs_node_unlock(dnp);
2615 } else {
2616 ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
2617 error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, ngflags, &np);
2618 if (error) {
2619 goto error_return;
2620 }
2621 newvp = NFSTOV(np);
2622 nfs_node_unlock(np);
2623 }
2624 *vpp = newvp;
2625
2626 nfsmout:
2627 if (error) {
2628 if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
2629 (flags & ISLASTCN) && (error == ENOENT)) {
2630 if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp)) {
2631 error = EROFS;
2632 } else {
2633 error = EJUSTRETURN;
2634 }
2635 }
2636 }
2637 if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
2638 (cnp->cn_nameiop != CREATE) && negnamecache) {
2639 /* add a negative entry in the name cache */
2640 nfs_node_lock_force(dnp);
2641 cache_enter(dvp, NULL, cnp);
2642 dnp->n_flag |= NNEGNCENTRIES;
2643 nfs_node_unlock(dnp);
2644 }
2645 error_return:
2646 NVATTR_CLEANUP(&nvattr);
2647 if (!busyerror) {
2648 nfs_node_clear_busy(dnp);
2649 }
2650 if (error && *vpp) {
2651 vnode_put(*vpp);
2652 *vpp = NULLVP;
2653 }
2654 return error;
2655 }
2656
2657 int nfs_readlink_nocache = DEFAULT_READLINK_NOCACHE;
2658
2659 /*
2660 * NFS readlink call
2661 */
2662 int
2663 nfs_vnop_readlink(
2664 struct vnop_readlink_args /* {
2665 * struct vnodeop_desc *a_desc;
2666 * vnode_t a_vp;
2667 * struct uio *a_uio;
2668 * vfs_context_t a_context;
2669 * } */*ap)
2670 {
2671 vfs_context_t ctx = ap->a_context;
2672 nfsnode_t np = VTONFS(ap->a_vp);
2673 struct nfsmount *nmp;
2674 int error = 0, nfsvers;
2675 uint32_t buflen;
2676 uio_t uio = ap->a_uio;
2677 struct nfsbuf *bp = NULL;
2678 struct timespec ts;
2679 int timeo;
2680
2681 if (vnode_vtype(ap->a_vp) != VLNK) {
2682 return EPERM;
2683 }
2684
2685 if (uio_resid(uio) == 0) {
2686 return 0;
2687 }
2688 if (uio_offset(uio) < 0) {
2689 return EINVAL;
2690 }
2691
2692 nmp = VTONMP(ap->a_vp);
2693 if (nfs_mount_gone(nmp)) {
2694 return ENXIO;
2695 }
2696 nfsvers = nmp->nm_vers;
2697
2698
2699 /* nfs_getattr() will check changed and purge caches */
2700 if ((error = nfs_getattr(np, NULL, ctx, nfs_readlink_nocache ? NGA_UNCACHED : NGA_CACHED))) {
2701 FSDBG(531, np, 0xd1e0001, 0, error);
2702 return error;
2703 }
2704
2705 if (nfs_readlink_nocache) {
2706 timeo = nfs_attrcachetimeout(np);
2707 nanouptime(&ts);
2708 }
2709
2710 retry:
2711 OSAddAtomic64(1, &nfsstats.biocache_readlinks);
2712 error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_META, &bp);
2713 if (error) {
2714 FSDBG(531, np, 0xd1e0002, 0, error);
2715 return error;
2716 }
2717
2718 if (nfs_readlink_nocache) {
2719 NFS_VNOP_DBG("timeo = %d ts.tv_sec = %ld need refresh = %d cached = %d\n", timeo, ts.tv_sec,
2720 (np->n_rltim.tv_sec + timeo) < ts.tv_sec || nfs_readlink_nocache > 1,
2721 ISSET(bp->nb_flags, NB_CACHE) == NB_CACHE);
2722 /* n_rltim is synchronized by the associated nfs buf */
2723 if (ISSET(bp->nb_flags, NB_CACHE) && ((nfs_readlink_nocache > 1) || ((np->n_rltim.tv_sec + timeo) < ts.tv_sec))) {
2724 SET(bp->nb_flags, NB_INVAL);
2725 nfs_buf_release(bp, 0);
2726 goto retry;
2727 }
2728 }
2729 if (!ISSET(bp->nb_flags, NB_CACHE)) {
2730 readagain:
2731 OSAddAtomic64(1, &nfsstats.readlink_bios);
2732 buflen = bp->nb_bufsize;
2733 error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
2734 if (error) {
2735 if (error == ESTALE) {
2736 NFS_VNOP_DBG("Stale FH from readlink rpc\n");
2737 error = nfs_refresh_fh(np, ctx);
2738 if (error == 0) {
2739 goto readagain;
2740 }
2741 }
2742 SET(bp->nb_flags, NB_ERROR);
2743 bp->nb_error = error;
2744 NFS_VNOP_DBG("readlink failed %d\n", error);
2745 } else {
2746 bp->nb_validoff = 0;
2747 bp->nb_validend = buflen;
2748 np->n_rltim = ts;
2749 NFS_VNOP_DBG("readlink of %.*s\n", bp->nb_validend, (char *)bp->nb_data);
2750 }
2751 } else {
2752 NFS_VNOP_DBG("got cached link of %.*s\n", bp->nb_validend, (char *)bp->nb_data);
2753 }
2754
2755 if (!error && (bp->nb_validend > 0)) {
2756 error = uiomove(bp->nb_data, bp->nb_validend, uio);
2757 }
2758 FSDBG(531, np, bp->nb_validend, 0, error);
2759 nfs_buf_release(bp, 1);
2760 return error;
2761 }
2762
2763 /*
2764 * Do a readlink RPC.
2765 */
2766 int
2767 nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
2768 {
2769 struct nfsmount *nmp;
2770 int error = 0, lockerror = ENOENT, nfsvers, status;
2771 uint32_t len;
2772 u_int64_t xid;
2773 struct nfsm_chain nmreq, nmrep;
2774
2775 nmp = NFSTONMP(np);
2776 if (nfs_mount_gone(nmp)) {
2777 return ENXIO;
2778 }
2779 nfsvers = nmp->nm_vers;
2780 nfsm_chain_null(&nmreq);
2781 nfsm_chain_null(&nmrep);
2782
2783 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
2784 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2785 nfsm_chain_build_done(error, &nmreq);
2786 nfsmout_if(error);
2787 error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
2788 if ((lockerror = nfs_node_lock(np))) {
2789 error = lockerror;
2790 }
2791 if (nfsvers == NFS_VER3) {
2792 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
2793 }
2794 if (!error) {
2795 error = status;
2796 }
2797 nfsm_chain_get_32(error, &nmrep, len);
2798 nfsmout_if(error);
2799 if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
2800 error = EBADRPC;
2801 goto nfsmout;
2802 }
2803 if (len >= *buflenp) {
2804 if (np->n_size && (np->n_size < *buflenp)) {
2805 len = np->n_size;
2806 } else {
2807 len = *buflenp - 1;
2808 }
2809 }
2810 nfsm_chain_get_opaque(error, &nmrep, len, buf);
2811 if (!error) {
2812 *buflenp = len;
2813 }
2814 nfsmout:
2815 if (!lockerror) {
2816 nfs_node_unlock(np);
2817 }
2818 nfsm_chain_cleanup(&nmreq);
2819 nfsm_chain_cleanup(&nmrep);
2820 return error;
2821 }
2822
2823 /*
2824 * NFS read RPC call
2825 * Ditto above
2826 */
2827 int
2828 nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
2829 {
2830 struct nfsmount *nmp;
2831 int error = 0, nfsvers, eof = 0;
2832 size_t nmrsize, len, retlen;
2833 user_ssize_t tsiz;
2834 off_t txoffset;
2835 struct nfsreq rq, *req = &rq;
2836 #if CONFIG_NFS4
2837 uint32_t stategenid = 0, restart = 0;
2838 #endif
2839 FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
2840 nmp = NFSTONMP(np);
2841 if (nfs_mount_gone(nmp)) {
2842 return ENXIO;
2843 }
2844 nfsvers = nmp->nm_vers;
2845 nmrsize = nmp->nm_rsize;
2846
2847 txoffset = uio_offset(uio);
2848 tsiz = uio_resid(uio);
2849 if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
2850 FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
2851 return EFBIG;
2852 }
2853
2854 while (tsiz > 0) {
2855 len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
2856 FSDBG(536, np, txoffset, len, 0);
2857 if (np->n_flag & NREVOKE) {
2858 error = EIO;
2859 break;
2860 }
2861 #if CONFIG_NFS4
2862 if (nmp->nm_vers >= NFS_VER4) {
2863 stategenid = nmp->nm_stategenid;
2864 }
2865 #endif
2866 error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
2867 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
2868 if (!error) {
2869 error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
2870 }
2871 #if CONFIG_NFS4
2872 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
2873 (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
2874 lck_mtx_lock(&nmp->nm_lock);
2875 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
2876 NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
2877 nfs_need_recover(nmp, error);
2878 }
2879 lck_mtx_unlock(&nmp->nm_lock);
2880 if (np->n_flag & NREVOKE) {
2881 error = EIO;
2882 } else {
2883 if (error == NFSERR_GRACE) {
2884 tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
2885 }
2886 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
2887 continue;
2888 }
2889 }
2890 }
2891 #endif
2892 if (error) {
2893 break;
2894 }
2895 txoffset += retlen;
2896 tsiz -= retlen;
2897 if (nfsvers != NFS_VER2) {
2898 if (eof || (retlen == 0)) {
2899 tsiz = 0;
2900 }
2901 } else if (retlen < len) {
2902 tsiz = 0;
2903 }
2904 }
2905
2906 FSDBG_BOT(536, np, eof, uio_resid(uio), error);
2907 return error;
2908 }
2909
2910 int
2911 nfs3_read_rpc_async(
2912 nfsnode_t np,
2913 off_t offset,
2914 size_t len,
2915 thread_t thd,
2916 kauth_cred_t cred,
2917 struct nfsreq_cbinfo *cb,
2918 struct nfsreq **reqp)
2919 {
2920 struct nfsmount *nmp;
2921 int error = 0, nfsvers;
2922 struct nfsm_chain nmreq;
2923
2924 nmp = NFSTONMP(np);
2925 if (nfs_mount_gone(nmp)) {
2926 return ENXIO;
2927 }
2928 nfsvers = nmp->nm_vers;
2929
2930 nfsm_chain_null(&nmreq);
2931 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
2932 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2933 if (nfsvers == NFS_VER3) {
2934 nfsm_chain_add_64(error, &nmreq, offset);
2935 nfsm_chain_add_32(error, &nmreq, len);
2936 } else {
2937 nfsm_chain_add_32(error, &nmreq, offset);
2938 nfsm_chain_add_32(error, &nmreq, len);
2939 nfsm_chain_add_32(error, &nmreq, 0);
2940 }
2941 nfsm_chain_build_done(error, &nmreq);
2942 nfsmout_if(error);
2943 error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
2944 nfsmout:
2945 nfsm_chain_cleanup(&nmreq);
2946 return error;
2947 }
2948
2949 int
2950 nfs3_read_rpc_async_finish(
2951 nfsnode_t np,
2952 struct nfsreq *req,
2953 uio_t uio,
2954 size_t *lenp,
2955 int *eofp)
2956 {
2957 int error = 0, lockerror, nfsvers, status, eof = 0;
2958 size_t retlen = 0;
2959 uint64_t xid;
2960 struct nfsmount *nmp;
2961 struct nfsm_chain nmrep;
2962
2963 nmp = NFSTONMP(np);
2964 if (nfs_mount_gone(nmp)) {
2965 nfs_request_async_cancel(req);
2966 return ENXIO;
2967 }
2968 nfsvers = nmp->nm_vers;
2969
2970 nfsm_chain_null(&nmrep);
2971
2972 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2973 if (error == EINPROGRESS) { /* async request restarted */
2974 return error;
2975 }
2976
2977 if ((lockerror = nfs_node_lock(np))) {
2978 error = lockerror;
2979 }
2980 if (nfsvers == NFS_VER3) {
2981 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
2982 }
2983 if (!error) {
2984 error = status;
2985 }
2986 if (nfsvers == NFS_VER3) {
2987 nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
2988 nfsm_chain_get_32(error, &nmrep, eof);
2989 } else {
2990 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
2991 }
2992 if (!lockerror) {
2993 nfs_node_unlock(np);
2994 }
2995 nfsm_chain_get_32(error, &nmrep, retlen);
2996 if ((nfsvers == NFS_VER2) && (retlen > *lenp)) {
2997 error = EBADRPC;
2998 }
2999 nfsmout_if(error);
3000 error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
3001 if (eofp) {
3002 if (nfsvers == NFS_VER3) {
3003 if (!eof && !retlen) {
3004 eof = 1;
3005 }
3006 } else if (retlen < *lenp) {
3007 eof = 1;
3008 }
3009 *eofp = eof;
3010 }
3011 *lenp = MIN(retlen, *lenp);
3012 nfsmout:
3013 nfsm_chain_cleanup(&nmrep);
3014 return error;
3015 }
3016
3017 /*
3018 * NFS write call
3019 */
3020 int
3021 nfs_vnop_write(
3022 struct vnop_write_args /* {
3023 * struct vnodeop_desc *a_desc;
3024 * vnode_t a_vp;
3025 * struct uio *a_uio;
3026 * int a_ioflag;
3027 * vfs_context_t a_context;
3028 * } */*ap)
3029 {
3030 vfs_context_t ctx = ap->a_context;
3031 uio_t uio = ap->a_uio;
3032 vnode_t vp = ap->a_vp;
3033 nfsnode_t np = VTONFS(vp);
3034 int ioflag = ap->a_ioflag;
3035 struct nfsbuf *bp;
3036 struct nfsmount *nmp = VTONMP(vp);
3037 daddr64_t lbn;
3038 int biosize;
3039 int n, on, error = 0;
3040 off_t boff, start, end;
3041 uio_t auio;
3042 char auio_buf[UIO_SIZEOF(1)];
3043 thread_t thd;
3044 kauth_cred_t cred;
3045
3046 FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
3047
3048 if (vnode_vtype(vp) != VREG) {
3049 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
3050 return EIO;
3051 }
3052
3053 thd = vfs_context_thread(ctx);
3054 cred = vfs_context_ucred(ctx);
3055
3056 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
3057
3058 if ((error = nfs_node_lock(np))) {
3059 nfs_data_unlock(np);
3060 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
3061 return error;
3062 }
3063 np->n_wrbusy++;
3064
3065 if (np->n_flag & NWRITEERR) {
3066 error = np->n_error;
3067 np->n_flag &= ~NWRITEERR;
3068 }
3069 if (np->n_flag & NNEEDINVALIDATE) {
3070 np->n_flag &= ~NNEEDINVALIDATE;
3071 nfs_node_unlock(np);
3072 nfs_data_unlock(np);
3073 nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
3074 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
3075 } else {
3076 nfs_node_unlock(np);
3077 }
3078 if (error) {
3079 goto out;
3080 }
3081
3082 biosize = nmp->nm_biosize;
3083
3084 if (ioflag & (IO_APPEND | IO_SYNC)) {
3085 nfs_node_lock_force(np);
3086 if (np->n_flag & NMODIFIED) {
3087 NATTRINVALIDATE(np);
3088 nfs_node_unlock(np);
3089 nfs_data_unlock(np);
3090 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
3091 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
3092 if (error) {
3093 FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
3094 goto out;
3095 }
3096 } else {
3097 nfs_node_unlock(np);
3098 }
3099 if (ioflag & IO_APPEND) {
3100 nfs_data_unlock(np);
3101 /* nfs_getattr() will check changed and purge caches */
3102 error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
3103 /* we'll be extending the file, so take the data lock exclusive */
3104 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
3105 if (error) {
3106 FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
3107 goto out;
3108 }
3109 uio_setoffset(uio, np->n_size);
3110 }
3111 }
3112 if (uio_offset(uio) < 0) {
3113 error = EINVAL;
3114 FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
3115 goto out;
3116 }
3117 if (uio_resid(uio) == 0) {
3118 goto out;
3119 }
3120
3121 if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
3122 /*
3123 * It looks like we'll be extending the file, so take the data lock exclusive.
3124 */
3125 nfs_data_unlock(np);
3126 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
3127
3128 /*
3129 * Also, if the write begins after the previous EOF buffer, make sure to zero
3130 * and validate the new bytes in that buffer.
3131 */
3132 struct nfsbuf *eofbp = NULL;
3133 daddr64_t eofbn = np->n_size / biosize;
3134 int eofoff = np->n_size % biosize;
3135 lbn = uio_offset(uio) / biosize;
3136
3137 if (eofoff && (eofbn < lbn)) {
3138 if ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE | NBLK_ONLYVALID, &eofbp))) {
3139 goto out;
3140 }
3141 np->n_size += (biosize - eofoff);
3142 nfs_node_lock_force(np);
3143 CLR(np->n_flag, NUPDATESIZE);
3144 np->n_flag |= NMODIFIED;
3145 nfs_node_unlock(np);
3146 FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
3147 ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
3148 if (eofbp) {
3149 /*
3150 * For the old last page, don't zero bytes if there
3151 * are invalid bytes in that page (i.e. the page isn't
3152 * currently valid).
3153 * For pages after the old last page, zero them and
3154 * mark them as valid.
3155 */
3156 char *d;
3157 int i;
3158 if (ioflag & IO_NOCACHE) {
3159 SET(eofbp->nb_flags, NB_NOCACHE);
3160 }
3161 NFS_BUF_MAP(eofbp);
3162 FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
3163 d = eofbp->nb_data;
3164 i = eofoff / PAGE_SIZE;
3165 while (eofoff < biosize) {
3166 int poff = eofoff & PAGE_MASK;
3167 if (!poff || NBPGVALID(eofbp, i)) {
3168 bzero(d + eofoff, PAGE_SIZE - poff);
3169 NBPGVALID_SET(eofbp, i);
3170 }
3171 eofoff += PAGE_SIZE - poff;
3172 i++;
3173 }
3174 nfs_buf_release(eofbp, 1);
3175 }
3176 }
3177 }
3178
3179 do {
3180 OSAddAtomic64(1, &nfsstats.biocache_writes);
3181 lbn = uio_offset(uio) / biosize;
3182 on = uio_offset(uio) % biosize;
3183 n = biosize - on;
3184 if (uio_resid(uio) < n) {
3185 n = uio_resid(uio);
3186 }
3187 again:
3188 /*
3189 * Get a cache block for writing. The range to be written is
3190 * (off..off+n) within the block. We ensure that the block
3191 * either has no dirty region or that the given range is
3192 * contiguous with the existing dirty region.
3193 */
3194 error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
3195 if (error) {
3196 goto out;
3197 }
3198 /* map the block because we know we're going to write to it */
3199 NFS_BUF_MAP(bp);
3200
3201 if (ioflag & IO_NOCACHE) {
3202 SET(bp->nb_flags, NB_NOCACHE);
3203 }
3204
3205 if (!IS_VALID_CRED(bp->nb_wcred)) {
3206 kauth_cred_ref(cred);
3207 bp->nb_wcred = cred;
3208 }
3209
3210 /*
3211 * If there's already a dirty range AND dirty pages in this block we
3212 * need to send a commit AND write the dirty pages before continuing.
3213 *
3214 * If there's already a dirty range OR dirty pages in this block
3215 * and the new write range is not contiguous with the existing range,
3216 * then force the buffer to be written out now.
3217 * (We used to just extend the dirty range to cover the valid,
3218 * but unwritten, data in between also. But writing ranges
3219 * of data that weren't actually written by an application
3220 * risks overwriting some other client's data with stale data
3221 * that's just masquerading as new written data.)
3222 */
3223 if (bp->nb_dirtyend > 0) {
3224 if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
3225 FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
3226 /* write/commit buffer "synchronously" */
3227 /* (NB_STABLE indicates that data writes should be FILESYNC) */
3228 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
3229 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
3230 error = nfs_buf_write(bp);
3231 if (error) {
3232 goto out;
3233 }
3234 goto again;
3235 }
3236 } else if (bp->nb_dirty) {
3237 int firstpg, lastpg;
3238 u_int32_t pagemask;
3239 /* calculate write range pagemask */
3240 firstpg = on / PAGE_SIZE;
3241 lastpg = (on + n - 1) / PAGE_SIZE;
3242 pagemask = ((1 << (lastpg + 1)) - 1) & ~((1 << firstpg) - 1);
3243 /* check if there are dirty pages outside the write range */
3244 if (bp->nb_dirty & ~pagemask) {
3245 FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
3246 /* write/commit buffer "synchronously" */
3247 /* (NB_STABLE indicates that data writes should be FILESYNC) */
3248 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
3249 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
3250 error = nfs_buf_write(bp);
3251 if (error) {
3252 goto out;
3253 }
3254 goto again;
3255 }
3256 /* if the first or last pages are already dirty */
3257 /* make sure that the dirty range encompasses those pages */
3258 if (NBPGDIRTY(bp, firstpg) || NBPGDIRTY(bp, lastpg)) {
3259 FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
3260 bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
3261 if (NBPGDIRTY(bp, lastpg)) {
3262 bp->nb_dirtyend = (lastpg + 1) * PAGE_SIZE;
3263 /* clip to EOF */
3264 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
3265 bp->nb_dirtyend = np->n_size - NBOFF(bp);
3266 if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
3267 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
3268 }
3269 }
3270 } else {
3271 bp->nb_dirtyend = on + n;
3272 }
3273 }
3274 }
3275
3276 /*
3277 * Are we extending the size of the file with this write?
3278 * If so, update file size now that we have the block.
3279 * If there was a partial buf at the old eof, validate
3280 * and zero the new bytes.
3281 */
3282 if ((uio_offset(uio) + n) > (off_t)np->n_size) {
3283 daddr64_t eofbn = np->n_size / biosize;
3284 int neweofoff = (uio_offset(uio) + n) % biosize;
3285
3286 FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
3287
3288 /* if we're extending within the same last block */
3289 /* and the block is flagged as being cached... */
3290 if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
3291 /* ...check that all pages in buffer are valid */
3292 int endpg = ((neweofoff ? neweofoff : biosize) - 1) / PAGE_SIZE;
3293 u_int32_t pagemask;
3294 /* pagemask only has to extend to last page being written to */
3295 pagemask = (1 << (endpg + 1)) - 1;
3296 FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
3297 if ((bp->nb_valid & pagemask) != pagemask) {
3298 /* zerofill any hole */
3299 if (on > bp->nb_validend) {
3300 int i;
3301 for (i = bp->nb_validend / PAGE_SIZE; i <= (on - 1) / PAGE_SIZE; i++) {
3302 NBPGVALID_SET(bp, i);
3303 }
3304 NFS_BUF_MAP(bp);
3305 FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
3306 bzero((char *)bp->nb_data + bp->nb_validend,
3307 on - bp->nb_validend);
3308 }
3309 /* zerofill any trailing data in the last page */
3310 if (neweofoff) {
3311 NFS_BUF_MAP(bp);
3312 FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
3313 bzero((char *)bp->nb_data + neweofoff,
3314 PAGE_SIZE - (neweofoff & PAGE_MASK));
3315 }
3316 }
3317 }
3318 np->n_size = uio_offset(uio) + n;
3319 nfs_node_lock_force(np);
3320 CLR(np->n_flag, NUPDATESIZE);
3321 np->n_flag |= NMODIFIED;
3322 nfs_node_unlock(np);
3323 FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
3324 ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
3325 }
3326 /*
3327 * If dirtyend exceeds file size, chop it down. This should
3328 * not occur unless there is a race.
3329 */
3330 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
3331 bp->nb_dirtyend = np->n_size - NBOFF(bp);
3332 if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
3333 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
3334 }
3335 }
3336 /*
3337 * UBC doesn't handle partial pages, so we need to make sure
3338 * that any pages left in the page cache are completely valid.
3339 *
3340 * Writes that are smaller than a block are delayed if they
3341 * don't extend to the end of the block.
3342 *
3343 * If the block isn't (completely) cached, we may need to read
3344 * in some parts of pages that aren't covered by the write.
3345 * If the write offset (on) isn't page aligned, we'll need to
3346 * read the start of the first page being written to. Likewise,
3347 * if the offset of the end of the write (on+n) isn't page aligned,
3348 * we'll need to read the end of the last page being written to.
3349 *
3350 * Notes:
3351 * We don't want to read anything we're just going to write over.
3352 * We don't want to read anything we're just going drop when the
3353 * I/O is complete (i.e. don't do reads for NOCACHE requests).
3354 * We don't want to issue multiple I/Os if we don't have to
3355 * (because they're synchronous rpcs).
3356 * We don't want to read anything we already have modified in the
3357 * page cache.
3358 */
3359 if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
3360 int firstpg, lastpg, dirtypg;
3361 int firstpgoff, lastpgoff;
3362 start = end = -1;
3363 firstpg = on / PAGE_SIZE;
3364 firstpgoff = on & PAGE_MASK;
3365 lastpg = (on + n - 1) / PAGE_SIZE;
3366 lastpgoff = (on + n) & PAGE_MASK;
3367 if (firstpgoff && !NBPGVALID(bp, firstpg)) {
3368 /* need to read start of first page */
3369 start = firstpg * PAGE_SIZE;
3370 end = start + firstpgoff;
3371 }
3372 if (lastpgoff && !NBPGVALID(bp, lastpg)) {
3373 /* need to read end of last page */
3374 if (start < 0) {
3375 start = (lastpg * PAGE_SIZE) + lastpgoff;
3376 }
3377 end = (lastpg + 1) * PAGE_SIZE;
3378 }
3379 if (ISSET(bp->nb_flags, NB_NOCACHE)) {
3380 /*
3381 * For nocache writes, if there is any partial page at the
3382 * start or end of the write range, then we do the write
3383 * synchronously to make sure that we can drop the data
3384 * from the cache as soon as the WRITE finishes. Normally,
3385 * we would do an unstable write and not drop the data until
3386 * it was committed. But doing that here would risk allowing
3387 * invalid data to be read from the cache between the WRITE
3388 * and the COMMIT.
3389 * (NB_STABLE indicates that data writes should be FILESYNC)
3390 */
3391 if (end > start) {
3392 SET(bp->nb_flags, NB_STABLE);
3393 }
3394 goto skipread;
3395 }
3396 if (end > start) {
3397 /* need to read the data in range: start...end-1 */
3398
3399 /* first, check for dirty pages in between */
3400 /* if there are, we'll have to do two reads because */
3401 /* we don't want to overwrite the dirty pages. */
3402 for (dirtypg = start / PAGE_SIZE; dirtypg <= (end - 1) / PAGE_SIZE; dirtypg++) {
3403 if (NBPGDIRTY(bp, dirtypg)) {
3404 break;
3405 }
3406 }
3407
3408 /* if start is at beginning of page, try */
3409 /* to get any preceeding pages as well. */
3410 if (!(start & PAGE_MASK)) {
3411 /* stop at next dirty/valid page or start of block */
3412 for (; start > 0; start -= PAGE_SIZE) {
3413 if (NBPGVALID(bp, ((start - 1) / PAGE_SIZE))) {
3414 break;
3415 }
3416 }
3417 }
3418
3419 NFS_BUF_MAP(bp);
3420 /* setup uio for read(s) */
3421 boff = NBOFF(bp);
3422 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
3423 &auio_buf, sizeof(auio_buf));
3424
3425 if (dirtypg <= (end - 1) / PAGE_SIZE) {
3426 /* there's a dirty page in the way, so just do two reads */
3427 /* we'll read the preceding data here */
3428 uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
3429 uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
3430 error = nfs_read_rpc(np, auio, ctx);
3431 if (error) {
3432 /* couldn't read the data, so treat buffer as synchronous NOCACHE */
3433 SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
3434 goto skipread;
3435 }
3436 if (uio_resid(auio) > 0) {
3437 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
3438 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
3439 }
3440 if (!error) {
3441 /* update validoff/validend if necessary */
3442 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
3443 bp->nb_validoff = start;
3444 }
3445 if ((bp->nb_validend < 0) || (bp->nb_validend < on)) {
3446 bp->nb_validend = on;
3447 }
3448 if ((off_t)np->n_size > boff + bp->nb_validend) {
3449 bp->nb_validend = min(np->n_size - (boff + start), biosize);
3450 }
3451 /* validate any pages before the write offset */
3452 for (; start < on / PAGE_SIZE; start += PAGE_SIZE) {
3453 NBPGVALID_SET(bp, start / PAGE_SIZE);
3454 }
3455 }
3456 /* adjust start to read any trailing data */
3457 start = on + n;
3458 }
3459
3460 /* if end is at end of page, try to */
3461 /* get any following pages as well. */
3462 if (!(end & PAGE_MASK)) {
3463 /* stop at next valid page or end of block */
3464 for (; end < biosize; end += PAGE_SIZE) {
3465 if (NBPGVALID(bp, end / PAGE_SIZE)) {
3466 break;
3467 }
3468 }
3469 }
3470
3471 if (((boff + start) >= (off_t)np->n_size) ||
3472 ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
3473 /*
3474 * Either this entire read is beyond the current EOF
3475 * or the range that we won't be modifying (on+n...end)
3476 * is all beyond the current EOF.
3477 * No need to make a trip across the network to
3478 * read nothing. So, just zero the buffer instead.
3479 */
3480 FSDBG(516, bp, start, end - start, 0xd00dee00);
3481 bzero(bp->nb_data + start, end - start);
3482 error = 0;
3483 } else {
3484 /* now we'll read the (rest of the) data */
3485 uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
3486 uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
3487 error = nfs_read_rpc(np, auio, ctx);
3488 if (error) {
3489 /* couldn't read the data, so treat buffer as synchronous NOCACHE */
3490 SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
3491 goto skipread;
3492 }
3493 if (uio_resid(auio) > 0) {
3494 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
3495 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
3496 }
3497 }
3498 if (!error) {
3499 /* update validoff/validend if necessary */
3500 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
3501 bp->nb_validoff = start;
3502 }
3503 if ((bp->nb_validend < 0) || (bp->nb_validend < end)) {
3504 bp->nb_validend = end;
3505 }
3506 if ((off_t)np->n_size > boff + bp->nb_validend) {
3507 bp->nb_validend = min(np->n_size - (boff + start), biosize);
3508 }
3509 /* validate any pages before the write offset's page */
3510 for (; start < (off_t)trunc_page_32(on); start += PAGE_SIZE) {
3511 NBPGVALID_SET(bp, start / PAGE_SIZE);
3512 }
3513 /* validate any pages after the range of pages being written to */
3514 for (; (end - 1) > (off_t)round_page_32(on + n - 1); end -= PAGE_SIZE) {
3515 NBPGVALID_SET(bp, (end - 1) / PAGE_SIZE);
3516 }
3517 }
3518 /* Note: pages being written to will be validated when written */
3519 }
3520 }
3521 skipread:
3522
3523 if (ISSET(bp->nb_flags, NB_ERROR)) {
3524 error = bp->nb_error;
3525 nfs_buf_release(bp, 1);
3526 goto out;
3527 }
3528
3529 nfs_node_lock_force(np);
3530 np->n_flag |= NMODIFIED;
3531 nfs_node_unlock(np);
3532
3533 NFS_BUF_MAP(bp);
3534 error = uiomove((char *)bp->nb_data + on, n, uio);
3535 if (error) {
3536 SET(bp->nb_flags, NB_ERROR);
3537 nfs_buf_release(bp, 1);
3538 goto out;
3539 }
3540
3541 /* validate any pages written to */
3542 start = on & ~PAGE_MASK;
3543 for (; start < on + n; start += PAGE_SIZE) {
3544 NBPGVALID_SET(bp, start / PAGE_SIZE);
3545 /*
3546 * This may seem a little weird, but we don't actually set the
3547 * dirty bits for writes. This is because we keep the dirty range
3548 * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
3549 * delayed writes, when we give the pages back to the VM we don't
3550 * want to keep them marked dirty, because when we later write the
3551 * buffer we won't be able to tell which pages were written dirty
3552 * and which pages were mmapped and dirtied.
3553 */
3554 }
3555 if (bp->nb_dirtyend > 0) {
3556 bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
3557 bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
3558 } else {
3559 bp->nb_dirtyoff = on;
3560 bp->nb_dirtyend = on + n;
3561 }
3562 if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
3563 bp->nb_validoff > bp->nb_dirtyend) {
3564 bp->nb_validoff = bp->nb_dirtyoff;
3565 bp->nb_validend = bp->nb_dirtyend;
3566 } else {
3567 bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
3568 bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
3569 }
3570 if (!ISSET(bp->nb_flags, NB_CACHE)) {
3571 nfs_buf_normalize_valid_range(np, bp);
3572 }
3573
3574 /*
3575 * Since this block is being modified, it must be written
3576 * again and not just committed.
3577 */
3578 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
3579 nfs_node_lock_force(np);
3580 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
3581 np->n_needcommitcnt--;
3582 CHECK_NEEDCOMMITCNT(np);
3583 }
3584 CLR(bp->nb_flags, NB_NEEDCOMMIT);
3585 nfs_node_unlock(np);
3586 }
3587
3588 if (ioflag & IO_SYNC) {
3589 error = nfs_buf_write(bp);
3590 if (error) {
3591 goto out;
3592 }
3593 } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
3594 (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
3595 SET(bp->nb_flags, NB_ASYNC);
3596 error = nfs_buf_write(bp);
3597 if (error) {
3598 goto out;
3599 }
3600 } else {
3601 /* If the block wasn't already delayed: charge for the write */
3602 if (!ISSET(bp->nb_flags, NB_DELWRI)) {
3603 proc_t p = vfs_context_proc(ctx);
3604 if (p && p->p_stats) {
3605 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
3606 }
3607 }
3608 nfs_buf_write_delayed(bp);
3609 }
3610
3611
3612 if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS) {
3613 nfs_flushcommits(np, 1);
3614 }
3615 } while (uio_resid(uio) > 0 && n > 0);
3616
3617 out:
3618 nfs_node_lock_force(np);
3619 np->n_wrbusy--;
3620 nfs_node_unlock(np);
3621 nfs_data_unlock(np);
3622 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
3623 return error;
3624 }
3625
3626
3627 /*
3628 * NFS write call
3629 */
3630 int
3631 nfs_write_rpc(
3632 nfsnode_t np,
3633 uio_t uio,
3634 vfs_context_t ctx,
3635 int *iomodep,
3636 uint64_t *wverfp)
3637 {
3638 return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
3639 }
3640
3641 int
3642 nfs_write_rpc2(
3643 nfsnode_t np,
3644 uio_t uio,
3645 thread_t thd,
3646 kauth_cred_t cred,
3647 int *iomodep,
3648 uint64_t *wverfp)
3649 {
3650 struct nfsmount *nmp;
3651 int error = 0, nfsvers;
3652 int wverfset, commit, committed;
3653 uint64_t wverf = 0, wverf2;
3654 size_t nmwsize, totalsize, tsiz, len, rlen;
3655 struct nfsreq rq, *req = &rq;
3656 #if CONFIG_NFS4
3657 uint32_t stategenid = 0, restart = 0;
3658 #endif
3659 uint32_t vrestart = 0;
3660 uio_t uio_save = NULL;
3661
3662 #if DIAGNOSTIC
3663 /* XXX limitation based on need to back up uio on short write */
3664 if (uio_iovcnt(uio) != 1) {
3665 panic("nfs3_write_rpc: iovcnt > 1");
3666 }
3667 #endif
3668 FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
3669 nmp = NFSTONMP(np);
3670 if (nfs_mount_gone(nmp)) {
3671 return ENXIO;
3672 }
3673 nfsvers = nmp->nm_vers;
3674 nmwsize = nmp->nm_wsize;
3675
3676 wverfset = 0;
3677 committed = NFS_WRITE_FILESYNC;
3678
3679 totalsize = tsiz = uio_resid(uio);
3680 if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
3681 FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
3682 return EFBIG;
3683 }
3684
3685 uio_save = uio_duplicate(uio);
3686 if (uio_save == NULL) {
3687 return EIO;
3688 }
3689
3690 while (tsiz > 0) {
3691 len = (tsiz > nmwsize) ? nmwsize : tsiz;
3692 FSDBG(537, np, uio_offset(uio), len, 0);
3693 if (np->n_flag & NREVOKE) {
3694 error = EIO;
3695 break;
3696 }
3697 #if CONFIG_NFS4
3698 if (nmp->nm_vers >= NFS_VER4) {
3699 stategenid = nmp->nm_stategenid;
3700 }
3701 #endif
3702 error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
3703 if (!error) {
3704 error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
3705 }
3706 nmp = NFSTONMP(np);
3707 if (nfs_mount_gone(nmp)) {
3708 error = ENXIO;
3709 }
3710 #if CONFIG_NFS4
3711 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
3712 (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
3713 lck_mtx_lock(&nmp->nm_lock);
3714 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
3715 NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
3716 nfs_need_recover(nmp, error);
3717 }
3718 lck_mtx_unlock(&nmp->nm_lock);
3719 if (np->n_flag & NREVOKE) {
3720 error = EIO;
3721 } else {
3722 if (error == NFSERR_GRACE) {
3723 tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
3724 }
3725 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
3726 continue;
3727 }
3728 }
3729 }
3730 #endif
3731 if (error) {
3732 break;
3733 }
3734 if (nfsvers == NFS_VER2) {
3735 tsiz -= len;
3736 continue;
3737 }
3738
3739 /* check for a short write */
3740 if (rlen < len) {
3741 /* Reset the uio to reflect the actual transfer */
3742 *uio = *uio_save;
3743 uio_update(uio, totalsize - (tsiz - rlen));
3744 len = rlen;
3745 }
3746
3747 /* return lowest commit level returned */
3748 if (commit < committed) {
3749 committed = commit;
3750 }
3751
3752 tsiz -= len;
3753
3754 /* check write verifier */
3755 if (!wverfset) {
3756 wverf = wverf2;
3757 wverfset = 1;
3758 } else if (wverf != wverf2) {
3759 /* verifier changed, so we need to restart all the writes */
3760 if (++vrestart > 100) {
3761 /* give up after too many restarts */
3762 error = EIO;
3763 break;
3764 }
3765 *uio = *uio_save; // Reset the uio back to the start
3766 committed = NFS_WRITE_FILESYNC;
3767 wverfset = 0;
3768 tsiz = totalsize;
3769 }
3770 }
3771 if (uio_save) {
3772 uio_free(uio_save);
3773 }
3774 if (wverfset && wverfp) {
3775 *wverfp = wverf;
3776 }
3777 *iomodep = committed;
3778 if (error) {
3779 uio_setresid(uio, tsiz);
3780 }
3781 FSDBG_BOT(537, np, committed, uio_resid(uio), error);
3782 return error;
3783 }
3784
3785 int
3786 nfs3_write_rpc_async(
3787 nfsnode_t np,
3788 uio_t uio,
3789 size_t len,
3790 thread_t thd,
3791 kauth_cred_t cred,
3792 int iomode,
3793 struct nfsreq_cbinfo *cb,
3794 struct nfsreq **reqp)
3795 {
3796 struct nfsmount *nmp;
3797 mount_t mp;
3798 int error = 0, nfsvers;
3799 struct nfsm_chain nmreq;
3800
3801 nmp = NFSTONMP(np);
3802 if (nfs_mount_gone(nmp)) {
3803 return ENXIO;
3804 }
3805 nfsvers = nmp->nm_vers;
3806
3807 /* for async mounts, don't bother sending sync write requests */
3808 if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
3809 ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) {
3810 iomode = NFS_WRITE_UNSTABLE;
3811 }
3812
3813 nfsm_chain_null(&nmreq);
3814 nfsm_chain_build_alloc_init(error, &nmreq,
3815 NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
3816 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
3817 if (nfsvers == NFS_VER3) {
3818 nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
3819 nfsm_chain_add_32(error, &nmreq, len);
3820 nfsm_chain_add_32(error, &nmreq, iomode);
3821 } else {
3822 nfsm_chain_add_32(error, &nmreq, 0);
3823 nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
3824 nfsm_chain_add_32(error, &nmreq, 0);
3825 }
3826 nfsm_chain_add_32(error, &nmreq, len);
3827 nfsmout_if(error);
3828 error = nfsm_chain_add_uio(&nmreq, uio, len);
3829 nfsm_chain_build_done(error, &nmreq);
3830 nfsmout_if(error);
3831 error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, NULL, 0, cb, reqp);
3832 nfsmout:
3833 nfsm_chain_cleanup(&nmreq);
3834 return error;
3835 }
3836
3837 int
3838 nfs3_write_rpc_async_finish(
3839 nfsnode_t np,
3840 struct nfsreq *req,
3841 int *iomodep,
3842 size_t *rlenp,
3843 uint64_t *wverfp)
3844 {
3845 struct nfsmount *nmp;
3846 int error = 0, lockerror = ENOENT, nfsvers, status;
3847 int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
3848 u_int64_t xid, wverf;
3849 mount_t mp;
3850 struct nfsm_chain nmrep;
3851
3852 nmp = NFSTONMP(np);
3853 if (nfs_mount_gone(nmp)) {
3854 nfs_request_async_cancel(req);
3855 return ENXIO;
3856 }
3857 nfsvers = nmp->nm_vers;
3858
3859 nfsm_chain_null(&nmrep);
3860
3861 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3862 if (error == EINPROGRESS) { /* async request restarted */
3863 return error;
3864 }
3865 nmp = NFSTONMP(np);
3866 if (nfs_mount_gone(nmp)) {
3867 error = ENXIO;
3868 }
3869 if (!error && (lockerror = nfs_node_lock(np))) {
3870 error = lockerror;
3871 }
3872 if (nfsvers == NFS_VER3) {
3873 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
3874 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
3875 if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
3876 updatemtime = 1;
3877 }
3878 if (!error) {
3879 error = status;
3880 }
3881 nfsm_chain_get_32(error, &nmrep, rlen);
3882 nfsmout_if(error);
3883 *rlenp = rlen;
3884 if (rlen <= 0) {
3885 error = NFSERR_IO;
3886 }
3887 nfsm_chain_get_32(error, &nmrep, committed);
3888 nfsm_chain_get_64(error, &nmrep, wverf);
3889 nfsmout_if(error);
3890 if (wverfp) {
3891 *wverfp = wverf;
3892 }
3893 lck_mtx_lock(&nmp->nm_lock);
3894 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
3895 nmp->nm_verf = wverf;
3896 nmp->nm_state |= NFSSTA_HASWRITEVERF;
3897 } else if (nmp->nm_verf != wverf) {
3898 nmp->nm_verf = wverf;
3899 }
3900 lck_mtx_unlock(&nmp->nm_lock);
3901 } else {
3902 if (!error) {
3903 error = status;
3904 }
3905 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
3906 nfsmout_if(error);
3907 }
3908 if (updatemtime) {
3909 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
3910 }
3911 nfsmout:
3912 if (!lockerror) {
3913 nfs_node_unlock(np);
3914 }
3915 nfsm_chain_cleanup(&nmrep);
3916 if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
3917 ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) {
3918 committed = NFS_WRITE_FILESYNC;
3919 }
3920 *iomodep = committed;
3921 return error;
3922 }
3923
3924 /*
3925 * NFS mknod vnode op
3926 *
3927 * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
3928 * mode set to specify the file type and the size field for rdev.
3929 */
3930 int
3931 nfs3_vnop_mknod(
3932 struct vnop_mknod_args /* {
3933 * struct vnodeop_desc *a_desc;
3934 * vnode_t a_dvp;
3935 * vnode_t *a_vpp;
3936 * struct componentname *a_cnp;
3937 * struct vnode_attr *a_vap;
3938 * vfs_context_t a_context;
3939 * } */*ap)
3940 {
3941 vnode_t dvp = ap->a_dvp;
3942 vnode_t *vpp = ap->a_vpp;
3943 struct componentname *cnp = ap->a_cnp;
3944 struct vnode_attr *vap = ap->a_vap;
3945 vfs_context_t ctx = ap->a_context;
3946 vnode_t newvp = NULL;
3947 nfsnode_t np = NULL;
3948 struct nfsmount *nmp;
3949 nfsnode_t dnp = VTONFS(dvp);
3950 struct nfs_vattr nvattr;
3951 fhandle_t fh;
3952 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
3953 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
3954 u_int32_t rdev;
3955 u_int64_t xid = 0, dxid;
3956 int nfsvers, gotuid, gotgid;
3957 struct nfsm_chain nmreq, nmrep;
3958 struct nfsreq rq, *req = &rq;
3959
3960 nmp = VTONMP(dvp);
3961 if (nfs_mount_gone(nmp)) {
3962 return ENXIO;
3963 }
3964 nfsvers = nmp->nm_vers;
3965
3966 if (!VATTR_IS_ACTIVE(vap, va_type)) {
3967 return EINVAL;
3968 }
3969 if (vap->va_type == VCHR || vap->va_type == VBLK) {
3970 if (!VATTR_IS_ACTIVE(vap, va_rdev)) {
3971 return EINVAL;
3972 }
3973 rdev = vap->va_rdev;
3974 } else if (vap->va_type == VFIFO || vap->va_type == VSOCK) {
3975 rdev = 0xffffffff;
3976 } else {
3977 return ENOTSUP;
3978 }
3979 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
3980 return ENAMETOOLONG;
3981 }
3982
3983 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
3984
3985 VATTR_SET_SUPPORTED(vap, va_mode);
3986 VATTR_SET_SUPPORTED(vap, va_uid);
3987 VATTR_SET_SUPPORTED(vap, va_gid);
3988 VATTR_SET_SUPPORTED(vap, va_data_size);
3989 VATTR_SET_SUPPORTED(vap, va_access_time);
3990 VATTR_SET_SUPPORTED(vap, va_modify_time);
3991 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3992 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3993
3994 nfsm_chain_null(&nmreq);
3995 nfsm_chain_null(&nmrep);
3996
3997 nfsm_chain_build_alloc_init(error, &nmreq,
3998 NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
3999 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
4000 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4001 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4002 if (nfsvers == NFS_VER3) {
4003 nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
4004 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
4005 if (vap->va_type == VCHR || vap->va_type == VBLK) {
4006 nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
4007 nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
4008 }
4009 } else {
4010 nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
4011 }
4012 nfsm_chain_build_done(error, &nmreq);
4013 if (!error) {
4014 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4015 }
4016 nfsmout_if(error);
4017
4018 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKNOD,
4019 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4020 if (!error) {
4021 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4022 }
4023
4024 if ((lockerror = nfs_node_lock(dnp))) {
4025 error = lockerror;
4026 }
4027 /* XXX no EEXIST kludge here? */
4028 dxid = xid;
4029 if (!error && !status) {
4030 if (dnp->n_flag & NNEGNCENTRIES) {
4031 dnp->n_flag &= ~NNEGNCENTRIES;
4032 cache_purge_negatives(dvp);
4033 }
4034 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
4035 }
4036 if (nfsvers == NFS_VER3) {
4037 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
4038 }
4039 if (!error) {
4040 error = status;
4041 }
4042 nfsmout:
4043 nfsm_chain_cleanup(&nmreq);
4044 nfsm_chain_cleanup(&nmrep);
4045
4046 if (!lockerror) {
4047 dnp->n_flag |= NMODIFIED;
4048 /* if directory hadn't changed, update namecache mtime */
4049 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
4050 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4051 }
4052 nfs_node_unlock(dnp);
4053 /* nfs_getattr() will check changed and purge caches */
4054 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4055 }
4056
4057 if (!error && fh.fh_len) {
4058 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
4059 }
4060 if (!error && !np) {
4061 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
4062 }
4063 if (!error && np) {
4064 newvp = NFSTOV(np);
4065 }
4066 if (!busyerror) {
4067 nfs_node_clear_busy(dnp);
4068 }
4069
4070 if (!error && (gotuid || gotgid) &&
4071 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
4072 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
4073 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
4074 /* clear ID bits if server didn't use them (or we can't tell) */
4075 VATTR_CLEAR_SUPPORTED(vap, va_uid);
4076 VATTR_CLEAR_SUPPORTED(vap, va_gid);
4077 }
4078 if (error) {
4079 if (newvp) {
4080 nfs_node_unlock(np);
4081 vnode_put(newvp);
4082 }
4083 } else {
4084 *vpp = newvp;
4085 nfs_node_unlock(np);
4086 }
4087 return error;
4088 }
4089
4090 static uint32_t create_verf;
4091 /*
4092 * NFS file create call
4093 */
4094 int
4095 nfs3_vnop_create(
4096 struct vnop_create_args /* {
4097 * struct vnodeop_desc *a_desc;
4098 * vnode_t a_dvp;
4099 * vnode_t *a_vpp;
4100 * struct componentname *a_cnp;
4101 * struct vnode_attr *a_vap;
4102 * vfs_context_t a_context;
4103 * } */*ap)
4104 {
4105 vfs_context_t ctx = ap->a_context;
4106 vnode_t dvp = ap->a_dvp;
4107 struct vnode_attr *vap = ap->a_vap;
4108 struct componentname *cnp = ap->a_cnp;
4109 struct nfs_vattr nvattr;
4110 fhandle_t fh;
4111 nfsnode_t np = NULL;
4112 struct nfsmount *nmp;
4113 nfsnode_t dnp = VTONFS(dvp);
4114 vnode_t newvp = NULL;
4115 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0, fmode = 0;
4116 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4117 int nfsvers, gotuid, gotgid;
4118 u_int64_t xid, dxid;
4119 uint32_t val;
4120 struct nfsm_chain nmreq, nmrep;
4121 struct nfsreq rq, *req = &rq;
4122 struct nfs_dulookup dul;
4123 int dul_in_progress = 0;
4124 int namedattrs;
4125
4126 nmp = VTONMP(dvp);
4127 if (nfs_mount_gone(nmp)) {
4128 return ENXIO;
4129 }
4130 nfsvers = nmp->nm_vers;
4131 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4132
4133 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
4134 return ENAMETOOLONG;
4135 }
4136
4137 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4138
4139 VATTR_SET_SUPPORTED(vap, va_mode);
4140 VATTR_SET_SUPPORTED(vap, va_uid);
4141 VATTR_SET_SUPPORTED(vap, va_gid);
4142 VATTR_SET_SUPPORTED(vap, va_data_size);
4143 VATTR_SET_SUPPORTED(vap, va_access_time);
4144 VATTR_SET_SUPPORTED(vap, va_modify_time);
4145 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4146 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4147
4148 if ((vap->va_vaflags & VA_EXCLUSIVE)
4149 ) {
4150 fmode |= O_EXCL;
4151 if (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time)) {
4152 vap->va_vaflags |= VA_UTIMES_NULL;
4153 }
4154 }
4155
4156 again:
4157 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4158 if (!namedattrs) {
4159 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4160 }
4161
4162 nfsm_chain_null(&nmreq);
4163 nfsm_chain_null(&nmrep);
4164
4165 nfsm_chain_build_alloc_init(error, &nmreq,
4166 NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
4167 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
4168 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4169 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4170 if (nfsvers == NFS_VER3) {
4171 if (fmode & O_EXCL) {
4172 nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
4173 lck_rw_lock_shared(in_ifaddr_rwlock);
4174 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
4175 val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
4176 } else {
4177 val = create_verf;
4178 }
4179 lck_rw_done(in_ifaddr_rwlock);
4180 nfsm_chain_add_32(error, &nmreq, val);
4181 ++create_verf;
4182 nfsm_chain_add_32(error, &nmreq, create_verf);
4183 } else {
4184 nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
4185 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
4186 }
4187 } else {
4188 nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
4189 }
4190 nfsm_chain_build_done(error, &nmreq);
4191 nfsmout_if(error);
4192
4193 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
4194 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4195 if (!error) {
4196 if (!namedattrs) {
4197 nfs_dulookup_start(&dul, dnp, ctx);
4198 dul_in_progress = 1;
4199 }
4200 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4201 }
4202
4203 if ((lockerror = nfs_node_lock(dnp))) {
4204 error = lockerror;
4205 }
4206 dxid = xid;
4207 if (!error && !status) {
4208 if (dnp->n_flag & NNEGNCENTRIES) {
4209 dnp->n_flag &= ~NNEGNCENTRIES;
4210 cache_purge_negatives(dvp);
4211 }
4212 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
4213 }
4214 if (nfsvers == NFS_VER3) {
4215 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
4216 }
4217 if (!error) {
4218 error = status;
4219 }
4220 nfsmout:
4221 nfsm_chain_cleanup(&nmreq);
4222 nfsm_chain_cleanup(&nmrep);
4223
4224 if (!lockerror) {
4225 dnp->n_flag |= NMODIFIED;
4226 /* if directory hadn't changed, update namecache mtime */
4227 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
4228 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4229 }
4230 nfs_node_unlock(dnp);
4231 /* nfs_getattr() will check changed and purge caches */
4232 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4233 }
4234
4235 if (!error && fh.fh_len) {
4236 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
4237 }
4238 if (!error && !np) {
4239 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
4240 }
4241 if (!error && np) {
4242 newvp = NFSTOV(np);
4243 }
4244
4245 if (dul_in_progress) {
4246 nfs_dulookup_finish(&dul, dnp, ctx);
4247 }
4248 if (!busyerror) {
4249 nfs_node_clear_busy(dnp);
4250 }
4251
4252 if (error) {
4253 if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
4254 fmode &= ~O_EXCL;
4255 goto again;
4256 }
4257 if (newvp) {
4258 nfs_node_unlock(np);
4259 vnode_put(newvp);
4260 }
4261 } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
4262 nfs_node_unlock(np);
4263 error = nfs3_setattr_rpc(np, vap, ctx);
4264 if (error && (gotuid || gotgid)) {
4265 /* it's possible the server didn't like our attempt to set IDs. */
4266 /* so, let's try it again without those */
4267 VATTR_CLEAR_ACTIVE(vap, va_uid);
4268 VATTR_CLEAR_ACTIVE(vap, va_gid);
4269 error = nfs3_setattr_rpc(np, vap, ctx);
4270 }
4271 if (error) {
4272 vnode_put(newvp);
4273 } else {
4274 nfs_node_lock_force(np);
4275 }
4276 }
4277 if (!error) {
4278 *ap->a_vpp = newvp;
4279 }
4280 if (!error && (gotuid || gotgid) &&
4281 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
4282 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
4283 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
4284 /* clear ID bits if server didn't use them (or we can't tell) */
4285 VATTR_CLEAR_SUPPORTED(vap, va_uid);
4286 VATTR_CLEAR_SUPPORTED(vap, va_gid);
4287 }
4288 if (!error) {
4289 nfs_node_unlock(np);
4290 }
4291 return error;
4292 }
4293
4294 /*
4295 * NFS file remove call
4296 * To try and make NFS semantics closer to UFS semantics, a file that has
4297 * other processes using the vnode is renamed instead of removed and then
4298 * removed later on the last close.
4299 * - If vnode_isinuse()
4300 * If a rename is not already in the works
4301 * call nfs_sillyrename() to set it up
4302 * else
4303 * do the remove RPC
4304 */
4305 int
4306 nfs_vnop_remove(
4307 struct vnop_remove_args /* {
4308 * struct vnodeop_desc *a_desc;
4309 * vnode_t a_dvp;
4310 * vnode_t a_vp;
4311 * struct componentname *a_cnp;
4312 * int a_flags;
4313 * vfs_context_t a_context;
4314 * } */*ap)
4315 {
4316 vfs_context_t ctx = ap->a_context;
4317 vnode_t vp = ap->a_vp;
4318 vnode_t dvp = ap->a_dvp;
4319 struct componentname *cnp = ap->a_cnp;
4320 nfsnode_t dnp = VTONFS(dvp);
4321 nfsnode_t np = VTONFS(vp);
4322 int error = 0, nfsvers, namedattrs, inuse, gotattr = 0, flushed = 0, setsize = 0;
4323 struct nfs_vattr nvattr;
4324 struct nfsmount *nmp;
4325 struct nfs_dulookup dul;
4326
4327 /* XXX prevent removing a sillyrenamed file? */
4328
4329 nmp = NFSTONMP(dnp);
4330 if (nfs_mount_gone(nmp)) {
4331 return ENXIO;
4332 }
4333 nfsvers = nmp->nm_vers;
4334 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4335
4336 again_relock:
4337 error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
4338 if (error) {
4339 return error;
4340 }
4341
4342 /* lock the node while we remove the file */
4343 lck_mtx_lock(nfs_node_hash_mutex);
4344 while (np->n_hflag & NHLOCKED) {
4345 np->n_hflag |= NHLOCKWANT;
4346 msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
4347 }
4348 np->n_hflag |= NHLOCKED;
4349 lck_mtx_unlock(nfs_node_hash_mutex);
4350
4351 if (!namedattrs) {
4352 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4353 }
4354 again:
4355 inuse = vnode_isinuse(vp, 0);
4356 if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
4357 /* Caller requested Carbon delete semantics, but file is busy */
4358 error = EBUSY;
4359 goto out;
4360 }
4361 if (inuse && !gotattr) {
4362 if (nfs_getattr(np, &nvattr, ctx, NGA_CACHED)) {
4363 nvattr.nva_nlink = 1;
4364 }
4365 gotattr = 1;
4366 goto again;
4367 }
4368 if (!inuse || (np->n_sillyrename && (nvattr.nva_nlink > 1))) {
4369 if (!inuse && !flushed) { /* flush all the buffers first */
4370 /* unlock the node */
4371 lck_mtx_lock(nfs_node_hash_mutex);
4372 np->n_hflag &= ~NHLOCKED;
4373 if (np->n_hflag & NHLOCKWANT) {
4374 np->n_hflag &= ~NHLOCKWANT;
4375 wakeup(np);
4376 }
4377 lck_mtx_unlock(nfs_node_hash_mutex);
4378 nfs_node_clear_busy2(dnp, np);
4379 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
4380 FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
4381 flushed = 1;
4382 if (error == EINTR) {
4383 nfs_node_lock_force(np);
4384 NATTRINVALIDATE(np);
4385 nfs_node_unlock(np);
4386 return error;
4387 }
4388 if (!namedattrs) {
4389 nfs_dulookup_finish(&dul, dnp, ctx);
4390 }
4391 goto again_relock;
4392 }
4393 #if CONFIG_NFS4
4394 if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
4395 nfs4_delegation_return(np, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
4396 }
4397 #endif
4398 /*
4399 * Purge the name cache so that the chance of a lookup for
4400 * the name succeeding while the remove is in progress is
4401 * minimized.
4402 */
4403 nfs_name_cache_purge(dnp, np, cnp, ctx);
4404
4405 if (!namedattrs) {
4406 nfs_dulookup_start(&dul, dnp, ctx);
4407 }
4408
4409 /* Do the rpc */
4410 error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
4411 vfs_context_thread(ctx), vfs_context_ucred(ctx));
4412
4413 /*
4414 * Kludge City: If the first reply to the remove rpc is lost..
4415 * the reply to the retransmitted request will be ENOENT
4416 * since the file was in fact removed
4417 * Therefore, we cheat and return success.
4418 */
4419 if (error == ENOENT) {
4420 error = 0;
4421 }
4422
4423 if (!error && !inuse && !np->n_sillyrename) {
4424 /*
4425 * removal succeeded, it's not in use, and not silly renamed so
4426 * remove nfsnode from hash now so we can't accidentally find it
4427 * again if another object gets created with the same filehandle
4428 * before this vnode gets reclaimed
4429 */
4430 lck_mtx_lock(nfs_node_hash_mutex);
4431 if (np->n_hflag & NHHASHED) {
4432 LIST_REMOVE(np, n_hash);
4433 np->n_hflag &= ~NHHASHED;
4434 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
4435 }
4436 lck_mtx_unlock(nfs_node_hash_mutex);
4437 /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
4438 /* clear all flags other than these */
4439 nfs_node_lock_force(np);
4440 np->n_flag &= (NMODIFIED);
4441 NATTRINVALIDATE(np);
4442 nfs_node_unlock(np);
4443 vnode_recycle(vp);
4444 setsize = 1;
4445 } else {
4446 nfs_node_lock_force(np);
4447 NATTRINVALIDATE(np);
4448 nfs_node_unlock(np);
4449 }
4450 } else if (!np->n_sillyrename) {
4451 if (!namedattrs) {
4452 nfs_dulookup_start(&dul, dnp, ctx);
4453 }
4454 error = nfs_sillyrename(dnp, np, cnp, ctx);
4455 nfs_node_lock_force(np);
4456 NATTRINVALIDATE(np);
4457 nfs_node_unlock(np);
4458 } else {
4459 nfs_node_lock_force(np);
4460 NATTRINVALIDATE(np);
4461 nfs_node_unlock(np);
4462 if (!namedattrs) {
4463 nfs_dulookup_start(&dul, dnp, ctx);
4464 }
4465 }
4466
4467 /* nfs_getattr() will check changed and purge caches */
4468 nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
4469 if (!namedattrs) {
4470 nfs_dulookup_finish(&dul, dnp, ctx);
4471 }
4472 out:
4473 /* unlock the node */
4474 lck_mtx_lock(nfs_node_hash_mutex);
4475 np->n_hflag &= ~NHLOCKED;
4476 if (np->n_hflag & NHLOCKWANT) {
4477 np->n_hflag &= ~NHLOCKWANT;
4478 wakeup(np);
4479 }
4480 lck_mtx_unlock(nfs_node_hash_mutex);
4481 nfs_node_clear_busy2(dnp, np);
4482 if (setsize) {
4483 ubc_setsize(vp, 0);
4484 }
4485 return error;
4486 }
4487
4488 /*
4489 * NFS silly-renamed file removal function called from nfs_vnop_inactive
4490 */
4491 int
4492 nfs_removeit(struct nfs_sillyrename *nsp)
4493 {
4494 struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp);
4495 if (nfs_mount_gone(nmp)) {
4496 return ENXIO;
4497 }
4498 return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred);
4499 }
4500
4501 /*
4502 * NFS remove rpc, called from nfs_remove() and nfs_removeit().
4503 */
4504 int
4505 nfs3_remove_rpc(
4506 nfsnode_t dnp,
4507 char *name,
4508 int namelen,
4509 thread_t thd,
4510 kauth_cred_t cred)
4511 {
4512 int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
4513 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4514 struct nfsmount *nmp;
4515 int nfsvers;
4516 u_int64_t xid;
4517 struct nfsm_chain nmreq, nmrep;
4518
4519 nmp = NFSTONMP(dnp);
4520 if (nfs_mount_gone(nmp)) {
4521 return ENXIO;
4522 }
4523 nfsvers = nmp->nm_vers;
4524 if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN)) {
4525 return ENAMETOOLONG;
4526 }
4527
4528 nfsm_chain_null(&nmreq);
4529 nfsm_chain_null(&nmrep);
4530
4531 nfsm_chain_build_alloc_init(error, &nmreq,
4532 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
4533 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4534 nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
4535 nfsm_chain_build_done(error, &nmreq);
4536 nfsmout_if(error);
4537
4538 error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, NULL, 0, &nmrep, &xid, &status);
4539
4540 if ((lockerror = nfs_node_lock(dnp))) {
4541 error = lockerror;
4542 }
4543 if (nfsvers == NFS_VER3) {
4544 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
4545 }
4546 nfsmout_if(error);
4547 dnp->n_flag |= NMODIFIED;
4548 /* if directory hadn't changed, update namecache mtime */
4549 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
4550 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4551 }
4552 if (!wccpostattr) {
4553 NATTRINVALIDATE(dnp);
4554 }
4555 if (!error) {
4556 error = status;
4557 }
4558 nfsmout:
4559 if (!lockerror) {
4560 nfs_node_unlock(dnp);
4561 }
4562 nfsm_chain_cleanup(&nmreq);
4563 nfsm_chain_cleanup(&nmrep);
4564 return error;
4565 }
4566
4567 /*
4568 * NFS file rename call
4569 */
4570 int
4571 nfs_vnop_rename(
4572 struct vnop_rename_args /* {
4573 * struct vnodeop_desc *a_desc;
4574 * vnode_t a_fdvp;
4575 * vnode_t a_fvp;
4576 * struct componentname *a_fcnp;
4577 * vnode_t a_tdvp;
4578 * vnode_t a_tvp;
4579 * struct componentname *a_tcnp;
4580 * vfs_context_t a_context;
4581 * } */*ap)
4582 {
4583 vfs_context_t ctx = ap->a_context;
4584 vnode_t fdvp = ap->a_fdvp;
4585 vnode_t fvp = ap->a_fvp;
4586 vnode_t tdvp = ap->a_tdvp;
4587 vnode_t tvp = ap->a_tvp;
4588 nfsnode_t fdnp, fnp, tdnp, tnp;
4589 struct componentname *tcnp = ap->a_tcnp;
4590 struct componentname *fcnp = ap->a_fcnp;
4591 int error, nfsvers, inuse = 0, tvprecycle = 0, locked = 0;
4592 mount_t fmp, tdmp, tmp;
4593 struct nfs_vattr nvattr;
4594 struct nfsmount *nmp;
4595
4596 fdnp = VTONFS(fdvp);
4597 fnp = VTONFS(fvp);
4598 tdnp = VTONFS(tdvp);
4599 tnp = tvp ? VTONFS(tvp) : NULL;
4600
4601 nmp = NFSTONMP(fdnp);
4602 if (nfs_mount_gone(nmp)) {
4603 return ENXIO;
4604 }
4605 nfsvers = nmp->nm_vers;
4606
4607 error = nfs_node_set_busy4(fdnp, fnp, tdnp, tnp, vfs_context_thread(ctx));
4608 if (error) {
4609 return error;
4610 }
4611
4612 if (tvp && (tvp != fvp)) {
4613 /* lock the node while we rename over the existing file */
4614 lck_mtx_lock(nfs_node_hash_mutex);
4615 while (tnp->n_hflag & NHLOCKED) {
4616 tnp->n_hflag |= NHLOCKWANT;
4617 msleep(tnp, nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
4618 }
4619 tnp->n_hflag |= NHLOCKED;
4620 lck_mtx_unlock(nfs_node_hash_mutex);
4621 locked = 1;
4622 }
4623
4624 /* Check for cross-device rename */
4625 fmp = vnode_mount(fvp);
4626 tmp = tvp ? vnode_mount(tvp) : NULL;
4627 tdmp = vnode_mount(tdvp);
4628 if ((fmp != tdmp) || (tvp && (fmp != tmp))) {
4629 error = EXDEV;
4630 goto out;
4631 }
4632
4633 /* XXX prevent renaming from/over a sillyrenamed file? */
4634
4635 /*
4636 * If the tvp exists and is in use, sillyrename it before doing the
4637 * rename of the new file over it.
4638 * XXX Can't sillyrename a directory.
4639 * Don't sillyrename if source and target are same vnode (hard
4640 * links or case-variants)
4641 */
4642 if (tvp && (tvp != fvp)) {
4643 inuse = vnode_isinuse(tvp, 0);
4644 }
4645 if (inuse && !tnp->n_sillyrename && (vnode_vtype(tvp) != VDIR)) {
4646 error = nfs_sillyrename(tdnp, tnp, tcnp, ctx);
4647 if (error) {
4648 /* sillyrename failed. Instead of pressing on, return error */
4649 goto out; /* should not be ENOENT. */
4650 } else {
4651 /* sillyrename succeeded.*/
4652 tvp = NULL;
4653 }
4654 }
4655 #if CONFIG_NFS4
4656 else if (tvp && (nmp->nm_vers >= NFS_VER4) && (tnp->n_openflags & N_DELEG_MASK)) {
4657 nfs4_delegation_return(tnp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
4658 }
4659 #endif
4660 error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen,
4661 tdnp, tcnp->cn_nameptr, tcnp->cn_namelen, ctx);
4662
4663 /*
4664 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
4665 */
4666 if (error == ENOENT) {
4667 error = 0;
4668 }
4669
4670 if (tvp && (tvp != fvp) && !tnp->n_sillyrename) {
4671 nfs_node_lock_force(tnp);
4672 tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
4673 (nfs_getattrcache(tnp, &nvattr, 0) || (nvattr.nva_nlink == 1)));
4674 nfs_node_unlock(tnp);
4675 lck_mtx_lock(nfs_node_hash_mutex);
4676 if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
4677 /*
4678 * remove nfsnode from hash now so we can't accidentally find it
4679 * again if another object gets created with the same filehandle
4680 * before this vnode gets reclaimed
4681 */
4682 LIST_REMOVE(tnp, n_hash);
4683 tnp->n_hflag &= ~NHHASHED;
4684 FSDBG(266, 0, tnp, tnp->n_flag, 0xb1eb1e);
4685 }
4686 lck_mtx_unlock(nfs_node_hash_mutex);
4687 }
4688
4689 /* purge the old name cache entries and enter the new one */
4690 nfs_name_cache_purge(fdnp, fnp, fcnp, ctx);
4691 if (tvp) {
4692 nfs_name_cache_purge(tdnp, tnp, tcnp, ctx);
4693 if (tvprecycle) {
4694 /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
4695 /* clear all flags other than these */
4696 nfs_node_lock_force(tnp);
4697 tnp->n_flag &= (NMODIFIED);
4698 nfs_node_unlock(tnp);
4699 vnode_recycle(tvp);
4700 }
4701 }
4702 if (!error) {
4703 nfs_node_lock_force(tdnp);
4704 if (tdnp->n_flag & NNEGNCENTRIES) {
4705 tdnp->n_flag &= ~NNEGNCENTRIES;
4706 cache_purge_negatives(tdvp);
4707 }
4708 nfs_node_unlock(tdnp);
4709 nfs_node_lock_force(fnp);
4710 cache_enter(tdvp, fvp, tcnp);
4711 if (tdvp != fdvp) { /* update parent pointer */
4712 if (fnp->n_parent && !vnode_get(fnp->n_parent)) {
4713 /* remove ref from old parent */
4714 vnode_rele(fnp->n_parent);
4715 vnode_put(fnp->n_parent);
4716 }
4717 fnp->n_parent = tdvp;
4718 if (tdvp && !vnode_get(tdvp)) {
4719 /* add ref to new parent */
4720 vnode_ref(tdvp);
4721 vnode_put(tdvp);
4722 } else {
4723 fnp->n_parent = NULL;
4724 }
4725 }
4726 nfs_node_unlock(fnp);
4727 }
4728 out:
4729 /* nfs_getattr() will check changed and purge caches */
4730 nfs_getattr(fdnp, NULL, ctx, NGA_CACHED);
4731 nfs_getattr(tdnp, NULL, ctx, NGA_CACHED);
4732 if (locked) {
4733 /* unlock node */
4734 lck_mtx_lock(nfs_node_hash_mutex);
4735 tnp->n_hflag &= ~NHLOCKED;
4736 if (tnp->n_hflag & NHLOCKWANT) {
4737 tnp->n_hflag &= ~NHLOCKWANT;
4738 wakeup(tnp);
4739 }
4740 lck_mtx_unlock(nfs_node_hash_mutex);
4741 }
4742 nfs_node_clear_busy4(fdnp, fnp, tdnp, tnp);
4743 return error;
4744 }
4745
4746 /*
4747 * Do an NFS rename rpc. Called from nfs_vnop_rename() and nfs_sillyrename().
4748 */
4749 int
4750 nfs3_rename_rpc(
4751 nfsnode_t fdnp,
4752 char *fnameptr,
4753 int fnamelen,
4754 nfsnode_t tdnp,
4755 char *tnameptr,
4756 int tnamelen,
4757 vfs_context_t ctx)
4758 {
4759 int error = 0, lockerror = ENOENT, status, fwccpostattr = 0, twccpostattr = 0;
4760 struct timespec fpremtime = { .tv_sec = 0, .tv_nsec = 0 }, tpremtime = { .tv_sec = 0, .tv_nsec = 0 };
4761 struct nfsmount *nmp;
4762 int nfsvers;
4763 u_int64_t xid, txid;
4764 struct nfsm_chain nmreq, nmrep;
4765
4766 nmp = NFSTONMP(fdnp);
4767 if (nfs_mount_gone(nmp)) {
4768 return ENXIO;
4769 }
4770 nfsvers = nmp->nm_vers;
4771 if ((nfsvers == NFS_VER2) &&
4772 ((fnamelen > NFS_MAXNAMLEN) || (tnamelen > NFS_MAXNAMLEN))) {
4773 return ENAMETOOLONG;
4774 }
4775
4776 nfsm_chain_null(&nmreq);
4777 nfsm_chain_null(&nmrep);
4778
4779 nfsm_chain_build_alloc_init(error, &nmreq,
4780 (NFSX_FH(nfsvers) + NFSX_UNSIGNED) * 2 +
4781 nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
4782 nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
4783 nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
4784 nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
4785 nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
4786 nfsm_chain_build_done(error, &nmreq);
4787 nfsmout_if(error);
4788
4789 error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, NULL, &nmrep, &xid, &status);
4790
4791 if ((lockerror = nfs_node_lock2(fdnp, tdnp))) {
4792 error = lockerror;
4793 }
4794 if (nfsvers == NFS_VER3) {
4795 txid = xid;
4796 nfsm_chain_get_wcc_data(error, &nmrep, fdnp, &fpremtime, &fwccpostattr, &xid);
4797 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &tpremtime, &twccpostattr, &txid);
4798 }
4799 if (!error) {
4800 error = status;
4801 }
4802 nfsmout:
4803 nfsm_chain_cleanup(&nmreq);
4804 nfsm_chain_cleanup(&nmrep);
4805 if (!lockerror) {
4806 fdnp->n_flag |= NMODIFIED;
4807 /* if directory hadn't changed, update namecache mtime */
4808 if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==)) {
4809 NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr);
4810 }
4811 if (!fwccpostattr) {
4812 NATTRINVALIDATE(fdnp);
4813 }
4814 tdnp->n_flag |= NMODIFIED;
4815 /* if directory hadn't changed, update namecache mtime */
4816 if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==)) {
4817 NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
4818 }
4819 if (!twccpostattr) {
4820 NATTRINVALIDATE(tdnp);
4821 }
4822 nfs_node_unlock2(fdnp, tdnp);
4823 }
4824 return error;
4825 }
4826
4827 /*
4828 * NFS hard link create call
4829 */
4830 int
4831 nfs3_vnop_link(
4832 struct vnop_link_args /* {
4833 * struct vnodeop_desc *a_desc;
4834 * vnode_t a_vp;
4835 * vnode_t a_tdvp;
4836 * struct componentname *a_cnp;
4837 * vfs_context_t a_context;
4838 * } */*ap)
4839 {
4840 vfs_context_t ctx = ap->a_context;
4841 vnode_t vp = ap->a_vp;
4842 vnode_t tdvp = ap->a_tdvp;
4843 struct componentname *cnp = ap->a_cnp;
4844 int error = 0, lockerror = ENOENT, status, wccpostattr = 0, attrflag = 0;
4845 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4846 struct nfsmount *nmp;
4847 nfsnode_t np = VTONFS(vp);
4848 nfsnode_t tdnp = VTONFS(tdvp);
4849 int nfsvers;
4850 u_int64_t xid, txid;
4851 struct nfsm_chain nmreq, nmrep;
4852
4853 if (vnode_mount(vp) != vnode_mount(tdvp)) {
4854 return EXDEV;
4855 }
4856
4857 nmp = VTONMP(vp);
4858 if (nfs_mount_gone(nmp)) {
4859 return ENXIO;
4860 }
4861 nfsvers = nmp->nm_vers;
4862 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
4863 return ENAMETOOLONG;
4864 }
4865
4866 /*
4867 * Push all writes to the server, so that the attribute cache
4868 * doesn't get "out of sync" with the server.
4869 * XXX There should be a better way!
4870 */
4871 nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
4872
4873 error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx));
4874 if (error) {
4875 return error;
4876 }
4877
4878 nfsm_chain_null(&nmreq);
4879 nfsm_chain_null(&nmrep);
4880
4881 nfsm_chain_build_alloc_init(error, &nmreq,
4882 NFSX_FH(nfsvers) * 2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
4883 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4884 nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
4885 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4886 nfsm_chain_build_done(error, &nmreq);
4887 nfsmout_if(error);
4888 error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx, NULL, &nmrep, &xid, &status);
4889
4890 if ((lockerror = nfs_node_lock2(tdnp, np))) {
4891 error = lockerror;
4892 goto nfsmout;
4893 }
4894 if (nfsvers == NFS_VER3) {
4895 txid = xid;
4896 nfsm_chain_postop_attr_update_flag(error, &nmrep, np, attrflag, &xid);
4897 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &premtime, &wccpostattr, &txid);
4898 }
4899 if (!error) {
4900 error = status;
4901 }
4902 nfsmout:
4903 nfsm_chain_cleanup(&nmreq);
4904 nfsm_chain_cleanup(&nmrep);
4905 if (!lockerror) {
4906 if (!attrflag) {
4907 NATTRINVALIDATE(np);
4908 }
4909 tdnp->n_flag |= NMODIFIED;
4910 /* if directory hadn't changed, update namecache mtime */
4911 if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==)) {
4912 NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
4913 }
4914 if (!wccpostattr) {
4915 NATTRINVALIDATE(tdnp);
4916 }
4917 if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
4918 tdnp->n_flag &= ~NNEGNCENTRIES;
4919 cache_purge_negatives(tdvp);
4920 }
4921 nfs_node_unlock2(tdnp, np);
4922 }
4923 nfs_node_clear_busy2(tdnp, np);
4924 /*
4925 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
4926 */
4927 if (error == EEXIST) {
4928 error = 0;
4929 }
4930 return error;
4931 }
4932
4933 /*
4934 * NFS symbolic link create call
4935 */
4936 int
4937 nfs3_vnop_symlink(
4938 struct vnop_symlink_args /* {
4939 * struct vnodeop_desc *a_desc;
4940 * vnode_t a_dvp;
4941 * vnode_t *a_vpp;
4942 * struct componentname *a_cnp;
4943 * struct vnode_attr *a_vap;
4944 * char *a_target;
4945 * vfs_context_t a_context;
4946 * } */*ap)
4947 {
4948 vfs_context_t ctx = ap->a_context;
4949 vnode_t dvp = ap->a_dvp;
4950 struct vnode_attr *vap = ap->a_vap;
4951 struct componentname *cnp = ap->a_cnp;
4952 struct nfs_vattr nvattr;
4953 fhandle_t fh;
4954 int slen, error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
4955 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
4956 vnode_t newvp = NULL;
4957 int nfsvers, gotuid, gotgid;
4958 u_int64_t xid = 0, dxid;
4959 nfsnode_t np = NULL;
4960 nfsnode_t dnp = VTONFS(dvp);
4961 struct nfsmount *nmp;
4962 struct nfsm_chain nmreq, nmrep;
4963 struct nfsreq rq, *req = &rq;
4964 struct nfs_dulookup dul;
4965 int namedattrs;
4966 int dul_in_progress = 0;
4967
4968 nmp = VTONMP(dvp);
4969 if (nfs_mount_gone(nmp)) {
4970 return ENXIO;
4971 }
4972 nfsvers = nmp->nm_vers;
4973 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4974
4975 slen = strlen(ap->a_target);
4976 if ((nfsvers == NFS_VER2) &&
4977 ((cnp->cn_namelen > NFS_MAXNAMLEN) || (slen > NFS_MAXPATHLEN))) {
4978 return ENAMETOOLONG;
4979 }
4980
4981 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4982
4983 VATTR_SET_SUPPORTED(vap, va_mode);
4984 VATTR_SET_SUPPORTED(vap, va_uid);
4985 VATTR_SET_SUPPORTED(vap, va_gid);
4986 VATTR_SET_SUPPORTED(vap, va_data_size);
4987 VATTR_SET_SUPPORTED(vap, va_access_time);
4988 VATTR_SET_SUPPORTED(vap, va_modify_time);
4989 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4990 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4991
4992 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4993 if (!namedattrs) {
4994 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4995 }
4996
4997 nfsm_chain_null(&nmreq);
4998 nfsm_chain_null(&nmrep);
4999
5000 nfsm_chain_build_alloc_init(error, &nmreq,
5001 NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
5002 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(nfsvers));
5003 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5004 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
5005 if (nfsvers == NFS_VER3) {
5006 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
5007 }
5008 nfsm_chain_add_name(error, &nmreq, ap->a_target, slen, nmp);
5009 if (nfsvers == NFS_VER2) {
5010 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
5011 }
5012 nfsm_chain_build_done(error, &nmreq);
5013 nfsmout_if(error);
5014
5015 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK,
5016 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
5017 if (!error) {
5018 if (!namedattrs) {
5019 nfs_dulookup_start(&dul, dnp, ctx);
5020 dul_in_progress = 1;
5021 }
5022 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
5023 }
5024
5025 if ((lockerror = nfs_node_lock(dnp))) {
5026 error = lockerror;
5027 }
5028 dxid = xid;
5029 if (!error && !status) {
5030 if (dnp->n_flag & NNEGNCENTRIES) {
5031 dnp->n_flag &= ~NNEGNCENTRIES;
5032 cache_purge_negatives(dvp);
5033 }
5034 if (nfsvers == NFS_VER3) {
5035 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
5036 } else {
5037 fh.fh_len = 0;
5038 }
5039 }
5040 if (nfsvers == NFS_VER3) {
5041 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
5042 }
5043 if (!error) {
5044 error = status;
5045 }
5046 nfsmout:
5047 nfsm_chain_cleanup(&nmreq);
5048 nfsm_chain_cleanup(&nmrep);
5049
5050 if (!lockerror) {
5051 dnp->n_flag |= NMODIFIED;
5052 /* if directory hadn't changed, update namecache mtime */
5053 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
5054 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
5055 }
5056 nfs_node_unlock(dnp);
5057 /* nfs_getattr() will check changed and purge caches */
5058 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
5059 }
5060
5061 if (!error && fh.fh_len) {
5062 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
5063 }
5064 if (!error && np) {
5065 newvp = NFSTOV(np);
5066 }
5067
5068 if (dul_in_progress) {
5069 nfs_dulookup_finish(&dul, dnp, ctx);
5070 }
5071
5072 /*
5073 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
5074 * if we can succeed in looking up the symlink.
5075 */
5076 if ((error == EEXIST) || (!error && !newvp)) {
5077 if (newvp) {
5078 nfs_node_unlock(np);
5079 vnode_put(newvp);
5080 newvp = NULL;
5081 }
5082 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
5083 if (!error) {
5084 newvp = NFSTOV(np);
5085 if (vnode_vtype(newvp) != VLNK) {
5086 error = EEXIST;
5087 }
5088 }
5089 }
5090 if (!busyerror) {
5091 nfs_node_clear_busy(dnp);
5092 }
5093 if (!error && (gotuid || gotgid) &&
5094 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
5095 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
5096 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
5097 /* clear ID bits if server didn't use them (or we can't tell) */
5098 VATTR_CLEAR_SUPPORTED(vap, va_uid);
5099 VATTR_CLEAR_SUPPORTED(vap, va_gid);
5100 }
5101 if (error) {
5102 if (newvp) {
5103 nfs_node_unlock(np);
5104 vnode_put(newvp);
5105 }
5106 } else {
5107 nfs_node_unlock(np);
5108 *ap->a_vpp = newvp;
5109 }
5110 return error;
5111 }
5112
5113 /*
5114 * NFS make dir call
5115 */
5116 int
5117 nfs3_vnop_mkdir(
5118 struct vnop_mkdir_args /* {
5119 * struct vnodeop_desc *a_desc;
5120 * vnode_t a_dvp;
5121 * vnode_t *a_vpp;
5122 * struct componentname *a_cnp;
5123 * struct vnode_attr *a_vap;
5124 * vfs_context_t a_context;
5125 * } */*ap)
5126 {
5127 vfs_context_t ctx = ap->a_context;
5128 vnode_t dvp = ap->a_dvp;
5129 struct vnode_attr *vap = ap->a_vap;
5130 struct componentname *cnp = ap->a_cnp;
5131 struct nfs_vattr nvattr;
5132 nfsnode_t np = NULL;
5133 struct nfsmount *nmp;
5134 nfsnode_t dnp = VTONFS(dvp);
5135 vnode_t newvp = NULL;
5136 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
5137 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
5138 int nfsvers, gotuid, gotgid;
5139 u_int64_t xid = 0, dxid;
5140 fhandle_t fh;
5141 struct nfsm_chain nmreq, nmrep;
5142 struct nfsreq rq, *req = &rq;
5143 struct nfs_dulookup dul;
5144 int namedattrs;
5145 int dul_in_progress = 0;
5146
5147 nmp = VTONMP(dvp);
5148 if (nfs_mount_gone(nmp)) {
5149 return ENXIO;
5150 }
5151 nfsvers = nmp->nm_vers;
5152 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
5153
5154 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
5155 return ENAMETOOLONG;
5156 }
5157
5158 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
5159
5160 VATTR_SET_SUPPORTED(vap, va_mode);
5161 VATTR_SET_SUPPORTED(vap, va_uid);
5162 VATTR_SET_SUPPORTED(vap, va_gid);
5163 VATTR_SET_SUPPORTED(vap, va_data_size);
5164 VATTR_SET_SUPPORTED(vap, va_access_time);
5165 VATTR_SET_SUPPORTED(vap, va_modify_time);
5166 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
5167 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
5168
5169 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
5170 if (!namedattrs) {
5171 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
5172 }
5173
5174 nfsm_chain_null(&nmreq);
5175 nfsm_chain_null(&nmrep);
5176
5177 nfsm_chain_build_alloc_init(error, &nmreq,
5178 NFSX_FH(nfsvers) + NFSX_UNSIGNED +
5179 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
5180 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5181 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
5182 if (nfsvers == NFS_VER3) {
5183 nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
5184 } else {
5185 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
5186 }
5187 nfsm_chain_build_done(error, &nmreq);
5188 nfsmout_if(error);
5189
5190 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR,
5191 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
5192 if (!error) {
5193 if (!namedattrs) {
5194 nfs_dulookup_start(&dul, dnp, ctx);
5195 dul_in_progress = 1;
5196 }
5197 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
5198 }
5199
5200 if ((lockerror = nfs_node_lock(dnp))) {
5201 error = lockerror;
5202 }
5203 dxid = xid;
5204 if (!error && !status) {
5205 if (dnp->n_flag & NNEGNCENTRIES) {
5206 dnp->n_flag &= ~NNEGNCENTRIES;
5207 cache_purge_negatives(dvp);
5208 }
5209 error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
5210 }
5211 if (nfsvers == NFS_VER3) {
5212 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
5213 }
5214 if (!error) {
5215 error = status;
5216 }
5217 nfsmout:
5218 nfsm_chain_cleanup(&nmreq);
5219 nfsm_chain_cleanup(&nmrep);
5220
5221 if (!lockerror) {
5222 dnp->n_flag |= NMODIFIED;
5223 /* if directory hadn't changed, update namecache mtime */
5224 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
5225 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
5226 }
5227 nfs_node_unlock(dnp);
5228 /* nfs_getattr() will check changed and purge caches */
5229 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
5230 }
5231
5232 if (!error && fh.fh_len) {
5233 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
5234 }
5235 if (!error && np) {
5236 newvp = NFSTOV(np);
5237 }
5238
5239 if (dul_in_progress) {
5240 nfs_dulookup_finish(&dul, dnp, ctx);
5241 }
5242
5243 /*
5244 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
5245 * if we can succeed in looking up the directory.
5246 */
5247 if ((error == EEXIST) || (!error && !newvp)) {
5248 if (newvp) {
5249 nfs_node_unlock(np);
5250 vnode_put(newvp);
5251 newvp = NULL;
5252 }
5253 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
5254 if (!error) {
5255 newvp = NFSTOV(np);
5256 if (vnode_vtype(newvp) != VDIR) {
5257 error = EEXIST;
5258 }
5259 }
5260 }
5261 if (!busyerror) {
5262 nfs_node_clear_busy(dnp);
5263 }
5264 if (!error && (gotuid || gotgid) &&
5265 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
5266 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
5267 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
5268 /* clear ID bits if server didn't use them (or we can't tell) */
5269 VATTR_CLEAR_SUPPORTED(vap, va_uid);
5270 VATTR_CLEAR_SUPPORTED(vap, va_gid);
5271 }
5272 if (error) {
5273 if (newvp) {
5274 nfs_node_unlock(np);
5275 vnode_put(newvp);
5276 }
5277 } else {
5278 nfs_node_unlock(np);
5279 *ap->a_vpp = newvp;
5280 }
5281 return error;
5282 }
5283
5284 /*
5285 * NFS remove directory call
5286 */
5287 int
5288 nfs3_vnop_rmdir(
5289 struct vnop_rmdir_args /* {
5290 * struct vnodeop_desc *a_desc;
5291 * vnode_t a_dvp;
5292 * vnode_t a_vp;
5293 * struct componentname *a_cnp;
5294 * vfs_context_t a_context;
5295 * } */*ap)
5296 {
5297 vfs_context_t ctx = ap->a_context;
5298 vnode_t vp = ap->a_vp;
5299 vnode_t dvp = ap->a_dvp;
5300 struct componentname *cnp = ap->a_cnp;
5301 int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
5302 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
5303 struct nfsmount *nmp;
5304 nfsnode_t np = VTONFS(vp);
5305 nfsnode_t dnp = VTONFS(dvp);
5306 int nfsvers;
5307 u_int64_t xid;
5308 struct nfsm_chain nmreq, nmrep;
5309 struct nfsreq rq, *req = &rq;
5310 struct nfs_dulookup dul;
5311 int namedattrs;
5312 int dul_in_progress = 0;
5313
5314 nmp = VTONMP(vp);
5315 if (nfs_mount_gone(nmp)) {
5316 return ENXIO;
5317 }
5318 nfsvers = nmp->nm_vers;
5319 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
5320
5321 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
5322 return ENAMETOOLONG;
5323 }
5324
5325 if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx)))) {
5326 return error;
5327 }
5328
5329 if (!namedattrs) {
5330 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
5331 }
5332
5333 nfsm_chain_null(&nmreq);
5334 nfsm_chain_null(&nmrep);
5335
5336 nfsm_chain_build_alloc_init(error, &nmreq,
5337 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
5338 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5339 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
5340 nfsm_chain_build_done(error, &nmreq);
5341 nfsmout_if(error);
5342
5343 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_RMDIR,
5344 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
5345 if (!error) {
5346 if (!namedattrs) {
5347 nfs_dulookup_start(&dul, dnp, ctx);
5348 dul_in_progress = 1;
5349 }
5350 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
5351 }
5352
5353 if ((lockerror = nfs_node_lock(dnp))) {
5354 error = lockerror;
5355 }
5356 if (nfsvers == NFS_VER3) {
5357 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
5358 }
5359 if (!error) {
5360 error = status;
5361 }
5362 nfsmout:
5363 nfsm_chain_cleanup(&nmreq);
5364 nfsm_chain_cleanup(&nmrep);
5365
5366 if (!lockerror) {
5367 dnp->n_flag |= NMODIFIED;
5368 /* if directory hadn't changed, update namecache mtime */
5369 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
5370 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
5371 }
5372 nfs_node_unlock(dnp);
5373 nfs_name_cache_purge(dnp, np, cnp, ctx);
5374 /* nfs_getattr() will check changed and purge caches */
5375 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
5376 }
5377 if (dul_in_progress) {
5378 nfs_dulookup_finish(&dul, dnp, ctx);
5379 }
5380 nfs_node_clear_busy2(dnp, np);
5381
5382 /*
5383 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
5384 */
5385 if (error == ENOENT) {
5386 error = 0;
5387 }
5388 if (!error) {
5389 /*
5390 * remove nfsnode from hash now so we can't accidentally find it
5391 * again if another object gets created with the same filehandle
5392 * before this vnode gets reclaimed
5393 */
5394 lck_mtx_lock(nfs_node_hash_mutex);
5395 if (np->n_hflag & NHHASHED) {
5396 LIST_REMOVE(np, n_hash);
5397 np->n_hflag &= ~NHHASHED;
5398 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
5399 }
5400 lck_mtx_unlock(nfs_node_hash_mutex);
5401 }
5402 return error;
5403 }
5404
5405 /*
5406 * NFS readdir call
5407 *
5408 * The incoming "offset" is a directory cookie indicating where in the
5409 * directory entries should be read from. A zero cookie means start at
5410 * the beginning of the directory. Any other cookie will be a cookie
5411 * returned from the server.
5412 *
5413 * Using that cookie, determine which buffer (and where in that buffer)
5414 * to start returning entries from. Buffer logical block numbers are
5415 * the cookies they start at. If a buffer is found that is not full,
5416 * call into the bio/RPC code to fill it. The RPC code will probably
5417 * fill several buffers (dropping the first, requiring a re-get).
5418 *
5419 * When done copying entries to the buffer, set the offset to the current
5420 * entry's cookie and enter that cookie in the cookie cache.
5421 *
5422 * Note: because the getdirentries(2) API returns a long-typed offset,
5423 * the incoming offset is a potentially truncated cookie (ptc).
5424 * The cookie matching code is aware of this and will fall back to
5425 * matching only 32 bits of the cookie.
5426 */
5427 int
5428 nfs_vnop_readdir(
5429 struct vnop_readdir_args /* {
5430 * struct vnodeop_desc *a_desc;
5431 * vnode_t a_vp;
5432 * struct uio *a_uio;
5433 * int a_flags;
5434 * int *a_eofflag;
5435 * int *a_numdirent;
5436 * vfs_context_t a_context;
5437 * } */*ap)
5438 {
5439 vfs_context_t ctx = ap->a_context;
5440 vnode_t dvp = ap->a_vp;
5441 nfsnode_t dnp = VTONFS(dvp);
5442 struct nfsmount *nmp;
5443 uio_t uio = ap->a_uio;
5444 int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
5445 uint16_t i, iptc, rlen, nlen;
5446 uint64_t cookie, nextcookie, lbn = 0;
5447 struct nfsbuf *bp = NULL;
5448 struct nfs_dir_buf_header *ndbhp;
5449 struct direntry *dp, *dpptc;
5450 struct dirent dent;
5451 char *cp = NULL;
5452 thread_t thd;
5453
5454 nmp = VTONMP(dvp);
5455 if (nfs_mount_gone(nmp)) {
5456 return ENXIO;
5457 }
5458 nfsvers = nmp->nm_vers;
5459 bigcookies = (nmp->nm_state & NFSSTA_BIGCOOKIES);
5460 extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
5461
5462 if (vnode_vtype(dvp) != VDIR) {
5463 return EPERM;
5464 }
5465
5466 if (ap->a_eofflag) {
5467 *ap->a_eofflag = 0;
5468 }
5469
5470 if (uio_resid(uio) == 0) {
5471 return 0;
5472 }
5473 #if CONFIG_NFS4
5474 if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
5475 /* trigger directories should never be read, return nothing */
5476 return 0;
5477 }
5478 #endif
5479 thd = vfs_context_thread(ctx);
5480 numdirent = done = 0;
5481 nextcookie = uio_offset(uio);
5482 ptc = bigcookies && NFS_DIR_COOKIE_POTENTIALLY_TRUNCATED(nextcookie);
5483
5484 if ((error = nfs_node_lock(dnp))) {
5485 goto out;
5486 }
5487
5488 if (dnp->n_flag & NNEEDINVALIDATE) {
5489 dnp->n_flag &= ~NNEEDINVALIDATE;
5490 nfs_invaldir(dnp);
5491 nfs_node_unlock(dnp);
5492 error = nfs_vinvalbuf(dvp, 0, ctx, 1);
5493 if (!error) {
5494 error = nfs_node_lock(dnp);
5495 }
5496 if (error) {
5497 goto out;
5498 }
5499 }
5500
5501 /*
5502 * check for need to invalidate when (re)starting at beginning
5503 */
5504 if (!nextcookie) {
5505 if (dnp->n_flag & NMODIFIED) {
5506 nfs_invaldir(dnp);
5507 nfs_node_unlock(dnp);
5508 if ((error = nfs_vinvalbuf(dvp, 0, ctx, 1))) {
5509 goto out;
5510 }
5511 } else {
5512 nfs_node_unlock(dnp);
5513 }
5514 /* nfs_getattr() will check changed and purge caches */
5515 if ((error = nfs_getattr(dnp, NULL, ctx, NGA_UNCACHED))) {
5516 goto out;
5517 }
5518 } else {
5519 nfs_node_unlock(dnp);
5520 }
5521
5522 error = nfs_dir_cookie_to_lbn(dnp, nextcookie, &ptc, &lbn);
5523 if (error) {
5524 if (error < 0) { /* just hit EOF cookie */
5525 done = 1;
5526 error = 0;
5527 }
5528 if (ap->a_eofflag) {
5529 *ap->a_eofflag = 1;
5530 }
5531 }
5532
5533 while (!error && !done) {
5534 OSAddAtomic64(1, &nfsstats.biocache_readdirs);
5535 cookie = nextcookie;
5536 getbuffer:
5537 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
5538 if (error) {
5539 goto out;
5540 }
5541 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5542 if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
5543 if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
5544 ndbhp->ndbh_flags = 0;
5545 ndbhp->ndbh_count = 0;
5546 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
5547 ndbhp->ndbh_ncgen = dnp->n_ncgen;
5548 }
5549 error = nfs_buf_readdir(bp, ctx);
5550 if (error == NFSERR_DIRBUFDROPPED) {
5551 goto getbuffer;
5552 }
5553 if (error) {
5554 nfs_buf_release(bp, 1);
5555 }
5556 if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
5557 if (!nfs_node_lock(dnp)) {
5558 nfs_invaldir(dnp);
5559 nfs_node_unlock(dnp);
5560 }
5561 nfs_vinvalbuf(dvp, 0, ctx, 1);
5562 if (error == NFSERR_BAD_COOKIE) {
5563 error = ENOENT;
5564 }
5565 }
5566 if (error) {
5567 goto out;
5568 }
5569 }
5570
5571 /* find next entry to return */
5572 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5573 i = 0;
5574 if ((lbn != cookie) && !(ptc && NFS_DIR_COOKIE_SAME32(lbn, cookie))) {
5575 dpptc = NULL;
5576 iptc = 0;
5577 for (; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
5578 if (ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
5579 iptc = i;
5580 dpptc = dp;
5581 }
5582 nextcookie = dp->d_seekoff;
5583 dp = NFS_DIRENTRY_NEXT(dp);
5584 }
5585 if ((i == ndbhp->ndbh_count) && dpptc) {
5586 i = iptc;
5587 dp = dpptc;
5588 }
5589 if (i < ndbhp->ndbh_count) {
5590 nextcookie = dp->d_seekoff;
5591 dp = NFS_DIRENTRY_NEXT(dp);
5592 i++;
5593 }
5594 }
5595 ptc = 0; /* only have to deal with ptc on first cookie */
5596
5597 /* return as many entries as we can */
5598 for (; i < ndbhp->ndbh_count; i++) {
5599 if (extended) {
5600 rlen = dp->d_reclen;
5601 cp = (char*)dp;
5602 } else {
5603 if (!cp) {
5604 cp = (char*)&dent;
5605 bzero(cp, sizeof(dent));
5606 }
5607 if (dp->d_namlen > (sizeof(dent.d_name) - 1)) {
5608 nlen = sizeof(dent.d_name) - 1;
5609 } else {
5610 nlen = dp->d_namlen;
5611 }
5612 rlen = NFS_DIRENT_LEN(nlen);
5613 dent.d_reclen = rlen;
5614 dent.d_ino = dp->d_ino;
5615 dent.d_type = dp->d_type;
5616 dent.d_namlen = nlen;
5617 strlcpy(dent.d_name, dp->d_name, nlen + 1);
5618 }
5619 /* check that the record fits */
5620 if (rlen > uio_resid(uio)) {
5621 done = 1;
5622 break;
5623 }
5624 if ((error = uiomove(cp, rlen, uio))) {
5625 break;
5626 }
5627 numdirent++;
5628 nextcookie = dp->d_seekoff;
5629 dp = NFS_DIRENTRY_NEXT(dp);
5630 }
5631
5632 if (i == ndbhp->ndbh_count) {
5633 /* hit end of buffer, move to next buffer */
5634 lbn = nextcookie;
5635 /* if we also hit EOF, we're done */
5636 if (ISSET(ndbhp->ndbh_flags, NDB_EOF)) {
5637 done = 1;
5638 if (ap->a_eofflag) {
5639 *ap->a_eofflag = 1;
5640 }
5641 }
5642 }
5643 if (!error) {
5644 uio_setoffset(uio, nextcookie);
5645 }
5646 if (!error && !done && (nextcookie == cookie)) {
5647 printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
5648 error = EIO;
5649 }
5650 nfs_buf_release(bp, 1);
5651 }
5652
5653 if (!error) {
5654 nfs_dir_cookie_cache(dnp, nextcookie, lbn);
5655 }
5656
5657 if (ap->a_numdirent) {
5658 *ap->a_numdirent = numdirent;
5659 }
5660 out:
5661 return error;
5662 }
5663
5664
5665 /*
5666 * Invalidate cached directory information, except for the actual directory
5667 * blocks (which are invalidated separately).
5668 */
5669 void
5670 nfs_invaldir(nfsnode_t dnp)
5671 {
5672 if (vnode_vtype(NFSTOV(dnp)) != VDIR) {
5673 return;
5674 }
5675 dnp->n_eofcookie = 0;
5676 dnp->n_cookieverf = 0;
5677 if (!dnp->n_cookiecache) {
5678 return;
5679 }
5680 dnp->n_cookiecache->free = 0;
5681 dnp->n_cookiecache->mru = -1;
5682 memset(dnp->n_cookiecache->next, -1, NFSNUMCOOKIES);
5683 }
5684
5685 /*
5686 * calculate how much space is available for additional directory entries.
5687 */
5688 uint32_t
5689 nfs_dir_buf_freespace(struct nfsbuf *bp, int rdirplus)
5690 {
5691 struct nfs_dir_buf_header *ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5692 uint32_t space;
5693
5694 if (!ndbhp) {
5695 return 0;
5696 }
5697 space = bp->nb_bufsize - ndbhp->ndbh_entry_end;
5698 if (rdirplus) {
5699 space -= ndbhp->ndbh_count * sizeof(struct nfs_vattr);
5700 }
5701 return space;
5702 }
5703
5704 /*
5705 * add/update a cookie->lbn entry in the directory cookie cache
5706 */
5707 void
5708 nfs_dir_cookie_cache(nfsnode_t dnp, uint64_t cookie, uint64_t lbn)
5709 {
5710 struct nfsdmap *ndcc;
5711 int8_t i, prev;
5712
5713 if (!cookie) {
5714 return;
5715 }
5716
5717 if (nfs_node_lock(dnp)) {
5718 return;
5719 }
5720
5721 if (cookie == dnp->n_eofcookie) { /* EOF cookie */
5722 nfs_node_unlock(dnp);
5723 return;
5724 }
5725
5726 ndcc = dnp->n_cookiecache;
5727 if (!ndcc) {
5728 /* allocate the cookie cache structure */
5729 MALLOC_ZONE(dnp->n_cookiecache, struct nfsdmap *,
5730 sizeof(struct nfsdmap), M_NFSDIROFF, M_WAITOK);
5731 if (!dnp->n_cookiecache) {
5732 nfs_node_unlock(dnp);
5733 return;
5734 }
5735 ndcc = dnp->n_cookiecache;
5736 ndcc->free = 0;
5737 ndcc->mru = -1;
5738 memset(ndcc->next, -1, NFSNUMCOOKIES);
5739 }
5740
5741 /*
5742 * Search the list for this cookie.
5743 * Keep track of previous and last entries.
5744 */
5745 prev = -1;
5746 i = ndcc->mru;
5747 while ((i != -1) && (cookie != ndcc->cookies[i].key)) {
5748 if (ndcc->next[i] == -1) { /* stop on last entry so we can reuse */
5749 break;
5750 }
5751 prev = i;
5752 i = ndcc->next[i];
5753 }
5754 if ((i != -1) && (cookie == ndcc->cookies[i].key)) {
5755 /* found it, remove from list */
5756 if (prev != -1) {
5757 ndcc->next[prev] = ndcc->next[i];
5758 } else {
5759 ndcc->mru = ndcc->next[i];
5760 }
5761 } else {
5762 /* not found, use next free entry or reuse last entry */
5763 if (ndcc->free != NFSNUMCOOKIES) {
5764 i = ndcc->free++;
5765 } else {
5766 ndcc->next[prev] = -1;
5767 }
5768 ndcc->cookies[i].key = cookie;
5769 ndcc->cookies[i].lbn = lbn;
5770 }
5771 /* insert cookie at head of MRU list */
5772 ndcc->next[i] = ndcc->mru;
5773 ndcc->mru = i;
5774 nfs_node_unlock(dnp);
5775 }
5776
5777 /*
5778 * Try to map the given directory cookie to a directory buffer (return lbn).
5779 * If we have a possibly truncated cookie (ptc), check for 32-bit matches too.
5780 */
5781 int
5782 nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
5783 {
5784 struct nfsdmap *ndcc = dnp->n_cookiecache;
5785 int8_t eofptc, found;
5786 int i, iptc;
5787 struct nfsmount *nmp;
5788 struct nfsbuf *bp, *lastbp;
5789 struct nfsbuflists blist;
5790 struct direntry *dp, *dpptc;
5791 struct nfs_dir_buf_header *ndbhp;
5792
5793 if (!cookie) { /* initial cookie */
5794 *lbnp = 0;
5795 *ptc = 0;
5796 return 0;
5797 }
5798
5799 if (nfs_node_lock(dnp)) {
5800 return ENOENT;
5801 }
5802
5803 if (cookie == dnp->n_eofcookie) { /* EOF cookie */
5804 nfs_node_unlock(dnp);
5805 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5806 *ptc = 0;
5807 return -1;
5808 }
5809 /* note if cookie is a 32-bit match with the EOF cookie */
5810 eofptc = *ptc ? NFS_DIR_COOKIE_SAME32(cookie, dnp->n_eofcookie) : 0;
5811 iptc = -1;
5812
5813 /* search the list for the cookie */
5814 for (i = ndcc ? ndcc->mru : -1; i >= 0; i = ndcc->next[i]) {
5815 if (ndcc->cookies[i].key == cookie) {
5816 /* found a match for this cookie */
5817 *lbnp = ndcc->cookies[i].lbn;
5818 nfs_node_unlock(dnp);
5819 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5820 *ptc = 0;
5821 return 0;
5822 }
5823 /* check for 32-bit match */
5824 if (*ptc && (iptc == -1) && NFS_DIR_COOKIE_SAME32(ndcc->cookies[i].key, cookie)) {
5825 iptc = i;
5826 }
5827 }
5828 /* exact match not found */
5829 if (eofptc) {
5830 /* but 32-bit match hit the EOF cookie */
5831 nfs_node_unlock(dnp);
5832 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5833 return -1;
5834 }
5835 if (iptc >= 0) {
5836 /* but 32-bit match got a hit */
5837 *lbnp = ndcc->cookies[iptc].lbn;
5838 nfs_node_unlock(dnp);
5839 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5840 return 0;
5841 }
5842 nfs_node_unlock(dnp);
5843
5844 /*
5845 * No match found in the cookie cache... hmm...
5846 * Let's search the directory's buffers for the cookie.
5847 */
5848 nmp = NFSTONMP(dnp);
5849 if (nfs_mount_gone(nmp)) {
5850 return ENXIO;
5851 }
5852 dpptc = NULL;
5853 found = 0;
5854
5855 lck_mtx_lock(nfs_buf_mutex);
5856 /*
5857 * Scan the list of buffers, keeping them in order.
5858 * Note that itercomplete inserts each of the remaining buffers
5859 * into the head of list (thus reversing the elements). So, we
5860 * make sure to iterate through all buffers, inserting them after
5861 * each other, to keep them in order.
5862 * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
5863 * we don't drop nfs_buf_mutex.
5864 */
5865 if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
5866 lastbp = NULL;
5867 while ((bp = LIST_FIRST(&blist))) {
5868 LIST_REMOVE(bp, nb_vnbufs);
5869 if (!lastbp) {
5870 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
5871 } else {
5872 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
5873 }
5874 lastbp = bp;
5875 if (found) {
5876 continue;
5877 }
5878 nfs_buf_refget(bp);
5879 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
5880 /* just skip this buffer */
5881 nfs_buf_refrele(bp);
5882 continue;
5883 }
5884 nfs_buf_refrele(bp);
5885
5886 /* scan the buffer for the cookie */
5887 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5888 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5889 dpptc = NULL;
5890 for (i = 0; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
5891 if (*ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
5892 dpptc = dp;
5893 iptc = i;
5894 }
5895 dp = NFS_DIRENTRY_NEXT(dp);
5896 }
5897 if ((i == ndbhp->ndbh_count) && dpptc) {
5898 /* found only a PTC match */
5899 dp = dpptc;
5900 i = iptc;
5901 } else if (i < ndbhp->ndbh_count) {
5902 *ptc = 0;
5903 }
5904 if (i < (ndbhp->ndbh_count - 1)) {
5905 /* next entry is *in* this buffer: return this block */
5906 *lbnp = bp->nb_lblkno;
5907 found = 1;
5908 } else if (i == (ndbhp->ndbh_count - 1)) {
5909 /* next entry refers to *next* buffer: return next block */
5910 *lbnp = dp->d_seekoff;
5911 found = 1;
5912 }
5913 nfs_buf_drop(bp);
5914 }
5915 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
5916 }
5917 lck_mtx_unlock(nfs_buf_mutex);
5918 if (found) {
5919 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5920 return 0;
5921 }
5922
5923 /* still not found... oh well, just start a new block */
5924 *lbnp = cookie;
5925 OSAddAtomic64(1, &nfsstats.direofcache_misses);
5926 return 0;
5927 }
5928
5929 /*
5930 * scan a directory buffer for the given name
5931 * Returns: ESRCH if not found, ENOENT if found invalid, 0 if found
5932 * Note: should only be called with RDIRPLUS directory buffers
5933 */
5934
5935 #define NDBS_PURGE 1
5936 #define NDBS_UPDATE 2
5937
5938 int
5939 nfs_dir_buf_search(
5940 struct nfsbuf *bp,
5941 struct componentname *cnp,
5942 fhandle_t *fhp,
5943 struct nfs_vattr *nvap,
5944 uint64_t *xidp,
5945 time_t *attrstampp,
5946 daddr64_t *nextlbnp,
5947 int flags)
5948 {
5949 struct direntry *dp;
5950 struct nfs_dir_buf_header *ndbhp;
5951 struct nfs_vattr *nvattrp;
5952 daddr64_t nextlbn = 0;
5953 int i, error = ESRCH;
5954 uint32_t fhlen;
5955
5956 /* scan the buffer for the name */
5957 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5958 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5959 for (i = 0; i < ndbhp->ndbh_count; i++) {
5960 nextlbn = dp->d_seekoff;
5961 if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) {
5962 fhlen = dp->d_name[dp->d_namlen + 1];
5963 nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
5964 if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhlen == 0) ||
5965 (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
5966 /* entry is not valid */
5967 error = ENOENT;
5968 break;
5969 }
5970 if (flags == NDBS_PURGE) {
5971 dp->d_fileno = 0;
5972 bzero(nvattrp, sizeof(*nvattrp));
5973 error = ENOENT;
5974 break;
5975 }
5976 if (flags == NDBS_UPDATE) {
5977 /* update direntry's attrs if fh matches */
5978 if ((fhp->fh_len == fhlen) && !bcmp(&dp->d_name[dp->d_namlen + 2], fhp->fh_data, fhlen)) {
5979 bcopy(nvap, nvattrp, sizeof(*nvap));
5980 dp->d_fileno = nvattrp->nva_fileid;
5981 nvattrp->nva_fileid = *xidp;
5982 *(time_t*)(&dp->d_name[dp->d_namlen + 2 + fhp->fh_len]) = *attrstampp;
5983 }
5984 error = 0;
5985 break;
5986 }
5987 /* copy out fh, attrs, attrstamp, and xid */
5988 fhp->fh_len = fhlen;
5989 bcopy(&dp->d_name[dp->d_namlen + 2], fhp->fh_data, MAX(fhp->fh_len, (int)sizeof(fhp->fh_data)));
5990 *attrstampp = *(time_t*)(&dp->d_name[dp->d_namlen + 2 + fhp->fh_len]);
5991 bcopy(nvattrp, nvap, sizeof(*nvap));
5992 *xidp = nvap->nva_fileid;
5993 nvap->nva_fileid = dp->d_fileno;
5994 error = 0;
5995 break;
5996 }
5997 dp = NFS_DIRENTRY_NEXT(dp);
5998 }
5999 if (nextlbnp) {
6000 *nextlbnp = nextlbn;
6001 }
6002 return error;
6003 }
6004
6005 /*
6006 * Look up a name in a directory's buffers.
6007 * Note: should only be called with RDIRPLUS directory buffers
6008 */
6009 int
6010 nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cnp, vfs_context_t ctx, int purge)
6011 {
6012 nfsnode_t newnp;
6013 struct nfsmount *nmp;
6014 int error = 0, i, found = 0, count = 0;
6015 u_int64_t xid;
6016 struct nfs_vattr nvattr;
6017 fhandle_t fh;
6018 time_t attrstamp = 0;
6019 thread_t thd = vfs_context_thread(ctx);
6020 struct nfsbuf *bp, *lastbp, *foundbp;
6021 struct nfsbuflists blist;
6022 daddr64_t lbn, nextlbn;
6023 int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_');
6024
6025 nmp = NFSTONMP(dnp);
6026 if (nfs_mount_gone(nmp)) {
6027 return ENXIO;
6028 }
6029 if (!purge) {
6030 *npp = NULL;
6031 }
6032
6033 /* first check most recent buffer (and next one too) */
6034 lbn = dnp->n_lastdbl;
6035 for (i = 0; i < 2; i++) {
6036 if ((error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ | NBLK_ONLYVALID, &bp))) {
6037 return error;
6038 }
6039 if (!bp) {
6040 break;
6041 }
6042 count++;
6043 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, &nextlbn, purge ? NDBS_PURGE : 0);
6044 nfs_buf_release(bp, 0);
6045 if (error == ESRCH) {
6046 error = 0;
6047 } else {
6048 found = 1;
6049 break;
6050 }
6051 lbn = nextlbn;
6052 }
6053
6054 lck_mtx_lock(nfs_buf_mutex);
6055 if (found) {
6056 dnp->n_lastdbl = lbn;
6057 goto done;
6058 }
6059
6060 /*
6061 * Scan the list of buffers, keeping them in order.
6062 * Note that itercomplete inserts each of the remaining buffers
6063 * into the head of list (thus reversing the elements). So, we
6064 * make sure to iterate through all buffers, inserting them after
6065 * each other, to keep them in order.
6066 * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
6067 * we don't drop nfs_buf_mutex.
6068 */
6069 if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
6070 lastbp = foundbp = NULL;
6071 while ((bp = LIST_FIRST(&blist))) {
6072 LIST_REMOVE(bp, nb_vnbufs);
6073 if (!lastbp) {
6074 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
6075 } else {
6076 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
6077 }
6078 lastbp = bp;
6079 if (error || found) {
6080 continue;
6081 }
6082 if (!purge && dotunder && (count > 100)) { /* don't waste too much time looking for ._ files */
6083 continue;
6084 }
6085 nfs_buf_refget(bp);
6086 lbn = bp->nb_lblkno;
6087 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
6088 /* just skip this buffer */
6089 nfs_buf_refrele(bp);
6090 continue;
6091 }
6092 nfs_buf_refrele(bp);
6093 count++;
6094 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, purge ? NDBS_PURGE : 0);
6095 if (error == ESRCH) {
6096 error = 0;
6097 } else {
6098 found = 1;
6099 foundbp = bp;
6100 }
6101 nfs_buf_drop(bp);
6102 }
6103 if (found) {
6104 LIST_REMOVE(foundbp, nb_vnbufs);
6105 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, foundbp, nb_vnbufs);
6106 dnp->n_lastdbl = foundbp->nb_lblkno;
6107 }
6108 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
6109 }
6110 done:
6111 lck_mtx_unlock(nfs_buf_mutex);
6112
6113 if (!error && found && !purge) {
6114 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
6115 &nvattr, &xid, dnp->n_auth, NG_MAKEENTRY, &newnp);
6116 if (error) {
6117 return error;
6118 }
6119 newnp->n_attrstamp = attrstamp;
6120 *npp = newnp;
6121 nfs_node_unlock(newnp);
6122 /* check if the dir buffer's attrs are out of date */
6123 if (!nfs_getattr(newnp, &nvattr, ctx, NGA_CACHED) &&
6124 (newnp->n_attrstamp != attrstamp)) {
6125 /* they are, so update them */
6126 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ | NBLK_ONLYVALID, &bp);
6127 if (!error && bp) {
6128 attrstamp = newnp->n_attrstamp;
6129 xid = newnp->n_xid;
6130 nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, NDBS_UPDATE);
6131 nfs_buf_release(bp, 0);
6132 }
6133 error = 0;
6134 }
6135 }
6136
6137 return error;
6138 }
6139
6140 /*
6141 * Purge name cache entries for the given node.
6142 * For RDIRPLUS, also invalidate the entry in the directory's buffers.
6143 */
6144 void
6145 nfs_name_cache_purge(nfsnode_t dnp, nfsnode_t np, struct componentname *cnp, vfs_context_t ctx)
6146 {
6147 struct nfsmount *nmp = NFSTONMP(dnp);
6148
6149 cache_purge(NFSTOV(np));
6150 if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
6151 nfs_dir_buf_cache_lookup(dnp, NULL, cnp, ctx, 1);
6152 }
6153 }
6154
6155 /*
6156 * NFS V3 readdir (plus) RPC.
6157 */
6158 int
6159 nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
6160 {
6161 struct nfsmount *nmp;
6162 int error = 0, lockerror, nfsvers, rdirplus, bigcookies;
6163 int i, status, attrflag, fhflag, more_entries = 1, eof, bp_dropped = 0;
6164 uint32_t nmreaddirsize, nmrsize;
6165 uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
6166 uint64_t cookie, lastcookie, xid, savedxid, fileno;
6167 struct nfsm_chain nmreq, nmrep, nmrepsave;
6168 fhandle_t fh;
6169 struct nfs_vattr *nvattrp;
6170 struct nfs_dir_buf_header *ndbhp;
6171 struct direntry *dp;
6172 char *padstart, padlen;
6173 struct timeval now;
6174
6175 nmp = NFSTONMP(dnp);
6176 if (nfs_mount_gone(nmp)) {
6177 return ENXIO;
6178 }
6179 nfsvers = nmp->nm_vers;
6180 nmreaddirsize = nmp->nm_readdirsize;
6181 nmrsize = nmp->nm_rsize;
6182 bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
6183 noplus:
6184 rdirplus = ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) ? 1 : 0;
6185
6186 if ((lockerror = nfs_node_lock(dnp))) {
6187 return lockerror;
6188 }
6189
6190 /* determine cookie to use, and move dp to the right offset */
6191 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
6192 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
6193 if (ndbhp->ndbh_count) {
6194 for (i = 0; i < ndbhp->ndbh_count - 1; i++) {
6195 dp = NFS_DIRENTRY_NEXT(dp);
6196 }
6197 cookie = dp->d_seekoff;
6198 dp = NFS_DIRENTRY_NEXT(dp);
6199 } else {
6200 cookie = bp->nb_lblkno;
6201 /* increment with every buffer read */
6202 OSAddAtomic64(1, &nfsstats.readdir_bios);
6203 }
6204 lastcookie = cookie;
6205
6206 /*
6207 * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
6208 * the buffer is full (or we hit EOF). Then put the remainder of the
6209 * results in the next buffer(s).
6210 */
6211 nfsm_chain_null(&nmreq);
6212 nfsm_chain_null(&nmrep);
6213 while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
6214 nfsm_chain_build_alloc_init(error, &nmreq,
6215 NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers) + NFSX_UNSIGNED);
6216 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
6217 if (nfsvers == NFS_VER3) {
6218 /* opaque values don't need swapping, but as long */
6219 /* as we are consistent about it, it should be ok */
6220 nfsm_chain_add_64(error, &nmreq, cookie);
6221 nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
6222 } else {
6223 nfsm_chain_add_32(error, &nmreq, cookie);
6224 }
6225 nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
6226 if (rdirplus) {
6227 nfsm_chain_add_32(error, &nmreq, nmrsize);
6228 }
6229 nfsm_chain_build_done(error, &nmreq);
6230 nfs_node_unlock(dnp);
6231 lockerror = ENOENT;
6232 nfsmout_if(error);
6233
6234 error = nfs_request(dnp, NULL, &nmreq,
6235 rdirplus ? NFSPROC_READDIRPLUS : NFSPROC_READDIR,
6236 ctx, NULL, &nmrep, &xid, &status);
6237
6238 if ((lockerror = nfs_node_lock(dnp))) {
6239 error = lockerror;
6240 }
6241
6242 savedxid = xid;
6243 if (nfsvers == NFS_VER3) {
6244 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
6245 }
6246 if (!error) {
6247 error = status;
6248 }
6249 if (nfsvers == NFS_VER3) {
6250 nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
6251 }
6252 nfsm_chain_get_32(error, &nmrep, more_entries);
6253
6254 if (!lockerror) {
6255 nfs_node_unlock(dnp);
6256 lockerror = ENOENT;
6257 }
6258 if (error == NFSERR_NOTSUPP) {
6259 /* oops... it doesn't look like readdirplus is supported */
6260 lck_mtx_lock(&nmp->nm_lock);
6261 NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
6262 lck_mtx_unlock(&nmp->nm_lock);
6263 goto noplus;
6264 }
6265 nfsmout_if(error);
6266
6267 if (rdirplus) {
6268 microuptime(&now);
6269 }
6270
6271 /* loop through the entries packing them into the buffer */
6272 while (more_entries) {
6273 if (nfsvers == NFS_VER3) {
6274 nfsm_chain_get_64(error, &nmrep, fileno);
6275 } else {
6276 nfsm_chain_get_32(error, &nmrep, fileno);
6277 }
6278 nfsm_chain_get_32(error, &nmrep, namlen);
6279 nfsmout_if(error);
6280 /* just truncate names that don't fit in direntry.d_name */
6281 if (namlen <= 0) {
6282 error = EBADRPC;
6283 goto nfsmout;
6284 }
6285 if (namlen > (sizeof(dp->d_name) - 1)) {
6286 skiplen = namlen - sizeof(dp->d_name) + 1;
6287 namlen = sizeof(dp->d_name) - 1;
6288 } else {
6289 skiplen = 0;
6290 }
6291 /* guess that fh size will be same as parent */
6292 fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
6293 xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
6294 attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
6295 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
6296 space_needed = reclen + attrlen;
6297 space_free = nfs_dir_buf_freespace(bp, rdirplus);
6298 if (space_needed > space_free) {
6299 /*
6300 * We still have entries to pack, but we've
6301 * run out of room in the current buffer.
6302 * So we need to move to the next buffer.
6303 * The block# for the next buffer is the
6304 * last cookie in the current buffer.
6305 */
6306 nextbuffer:
6307 ndbhp->ndbh_flags |= NDB_FULL;
6308 nfs_buf_release(bp, 0);
6309 bp_dropped = 1;
6310 bp = NULL;
6311 error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
6312 nfsmout_if(error);
6313 /* initialize buffer */
6314 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
6315 ndbhp->ndbh_flags = 0;
6316 ndbhp->ndbh_count = 0;
6317 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
6318 ndbhp->ndbh_ncgen = dnp->n_ncgen;
6319 space_free = nfs_dir_buf_freespace(bp, rdirplus);
6320 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
6321 /* increment with every buffer read */
6322 OSAddAtomic64(1, &nfsstats.readdir_bios);
6323 }
6324 nmrepsave = nmrep;
6325 dp->d_fileno = fileno;
6326 dp->d_namlen = namlen;
6327 dp->d_reclen = reclen;
6328 dp->d_type = DT_UNKNOWN;
6329 nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
6330 nfsmout_if(error);
6331 dp->d_name[namlen] = '\0';
6332 if (skiplen) {
6333 nfsm_chain_adv(error, &nmrep,
6334 nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
6335 }
6336 if (nfsvers == NFS_VER3) {
6337 nfsm_chain_get_64(error, &nmrep, cookie);
6338 } else {
6339 nfsm_chain_get_32(error, &nmrep, cookie);
6340 }
6341 nfsmout_if(error);
6342 dp->d_seekoff = cookie;
6343 if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
6344 /* we've got a big cookie, make sure flag is set */
6345 lck_mtx_lock(&nmp->nm_lock);
6346 nmp->nm_state |= NFSSTA_BIGCOOKIES;
6347 lck_mtx_unlock(&nmp->nm_lock);
6348 bigcookies = 1;
6349 }
6350 if (rdirplus) {
6351 nvattrp = NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count);
6352 /* check for attributes */
6353 nfsm_chain_get_32(error, &nmrep, attrflag);
6354 nfsmout_if(error);
6355 if (attrflag) {
6356 /* grab attributes */
6357 error = nfs_parsefattr(nmp, &nmrep, NFS_VER3, nvattrp);
6358 nfsmout_if(error);
6359 dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
6360 /* fileid is already in d_fileno, so stash xid in attrs */
6361 nvattrp->nva_fileid = savedxid;
6362 } else {
6363 /* mark the attributes invalid */
6364 bzero(nvattrp, sizeof(struct nfs_vattr));
6365 }
6366 /* check for file handle */
6367 nfsm_chain_get_32(error, &nmrep, fhflag);
6368 nfsmout_if(error);
6369 if (fhflag) {
6370 nfsm_chain_get_fh(error, &nmrep, NFS_VER3, &fh);
6371 nfsmout_if(error);
6372 fhlen = fh.fh_len + 1;
6373 xlen = fhlen + sizeof(time_t);
6374 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
6375 space_needed = reclen + attrlen;
6376 if (space_needed > space_free) {
6377 /* didn't actually have the room... move on to next buffer */
6378 nmrep = nmrepsave;
6379 goto nextbuffer;
6380 }
6381 /* pack the file handle into the record */
6382 dp->d_name[dp->d_namlen + 1] = fh.fh_len;
6383 bcopy(fh.fh_data, &dp->d_name[dp->d_namlen + 2], fh.fh_len);
6384 } else {
6385 /* mark the file handle invalid */
6386 fh.fh_len = 0;
6387 fhlen = fh.fh_len + 1;
6388 xlen = fhlen + sizeof(time_t);
6389 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
6390 bzero(&dp->d_name[dp->d_namlen + 1], fhlen);
6391 }
6392 *(time_t*)(&dp->d_name[dp->d_namlen + 1 + fhlen]) = now.tv_sec;
6393 dp->d_reclen = reclen;
6394 }
6395 padstart = dp->d_name + dp->d_namlen + 1 + xlen;
6396 ndbhp->ndbh_count++;
6397 lastcookie = cookie;
6398 /* advance to next direntry in buffer */
6399 dp = NFS_DIRENTRY_NEXT(dp);
6400 ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
6401 /* zero out the pad bytes */
6402 padlen = (char*)dp - padstart;
6403 if (padlen > 0) {
6404 bzero(padstart, padlen);
6405 }
6406 /* check for more entries */
6407 nfsm_chain_get_32(error, &nmrep, more_entries);
6408 nfsmout_if(error);
6409 }
6410 /* Finally, get the eof boolean */
6411 nfsm_chain_get_32(error, &nmrep, eof);
6412 nfsmout_if(error);
6413 if (eof) {
6414 ndbhp->ndbh_flags |= (NDB_FULL | NDB_EOF);
6415 nfs_node_lock_force(dnp);
6416 dnp->n_eofcookie = lastcookie;
6417 nfs_node_unlock(dnp);
6418 } else {
6419 more_entries = 1;
6420 }
6421 if (bp_dropped) {
6422 nfs_buf_release(bp, 0);
6423 bp = NULL;
6424 break;
6425 }
6426 if ((lockerror = nfs_node_lock(dnp))) {
6427 error = lockerror;
6428 }
6429 nfsmout_if(error);
6430 nfsm_chain_cleanup(&nmrep);
6431 nfsm_chain_null(&nmreq);
6432 }
6433 nfsmout:
6434 if (bp_dropped && bp) {
6435 nfs_buf_release(bp, 0);
6436 }
6437 if (!lockerror) {
6438 nfs_node_unlock(dnp);
6439 }
6440 nfsm_chain_cleanup(&nmreq);
6441 nfsm_chain_cleanup(&nmrep);
6442 return bp_dropped ? NFSERR_DIRBUFDROPPED : error;
6443 }
6444
6445 /*
6446 * Silly rename. To make the NFS filesystem that is stateless look a little
6447 * more like the "ufs" a remove of an active vnode is translated to a rename
6448 * to a funny looking filename that is removed by nfs_vnop_inactive on the
6449 * nfsnode. There is the potential for another process on a different client
6450 * to create the same funny name between when the lookitup() fails and the
6451 * rename() completes, but...
6452 */
6453
6454 /* format of "random" silly names - includes a number and pid */
6455 /* (note: shouldn't exceed size of nfs_sillyrename.nsr_name) */
6456 #define NFS_SILLYNAME_FORMAT ".nfs.%08x.%04x"
6457 /* starting from zero isn't silly enough */
6458 static uint32_t nfs_sillyrename_number = 0x20051025;
6459
6460 int
6461 nfs_sillyrename(
6462 nfsnode_t dnp,
6463 nfsnode_t np,
6464 struct componentname *cnp,
6465 vfs_context_t ctx)
6466 {
6467 struct nfs_sillyrename *nsp;
6468 int error;
6469 short pid;
6470 kauth_cred_t cred;
6471 uint32_t num;
6472 struct nfsmount *nmp;
6473
6474 nmp = NFSTONMP(dnp);
6475 if (nfs_mount_gone(nmp)) {
6476 return ENXIO;
6477 }
6478
6479 nfs_name_cache_purge(dnp, np, cnp, ctx);
6480
6481 MALLOC_ZONE(nsp, struct nfs_sillyrename *,
6482 sizeof(struct nfs_sillyrename), M_NFSREQ, M_WAITOK);
6483 if (!nsp) {
6484 return ENOMEM;
6485 }
6486 cred = vfs_context_ucred(ctx);
6487 kauth_cred_ref(cred);
6488 nsp->nsr_cred = cred;
6489 nsp->nsr_dnp = dnp;
6490 error = vnode_ref(NFSTOV(dnp));
6491 if (error) {
6492 goto bad_norele;
6493 }
6494
6495 /* Fudge together a funny name */
6496 pid = vfs_context_pid(ctx);
6497 num = OSAddAtomic(1, &nfs_sillyrename_number);
6498 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
6499 NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
6500 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name)) {
6501 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
6502 }
6503
6504 /* Try lookitups until we get one that isn't there */
6505 while (nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, NULL) == 0) {
6506 num = OSAddAtomic(1, &nfs_sillyrename_number);
6507 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
6508 NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
6509 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name)) {
6510 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
6511 }
6512 }
6513
6514 /* now, do the rename */
6515 error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
6516 dnp, nsp->nsr_name, nsp->nsr_namlen, ctx);
6517
6518 /* Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */
6519 if (error == ENOENT) {
6520 error = 0;
6521 }
6522 if (!error) {
6523 nfs_node_lock_force(dnp);
6524 if (dnp->n_flag & NNEGNCENTRIES) {
6525 dnp->n_flag &= ~NNEGNCENTRIES;
6526 cache_purge_negatives(NFSTOV(dnp));
6527 }
6528 nfs_node_unlock(dnp);
6529 }
6530 FSDBG(267, dnp, np, num, error);
6531 if (error) {
6532 goto bad;
6533 }
6534 error = nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, &np);
6535 nfs_node_lock_force(np);
6536 np->n_sillyrename = nsp;
6537 nfs_node_unlock(np);
6538 return 0;
6539 bad:
6540 vnode_rele(NFSTOV(dnp));
6541 bad_norele:
6542 nsp->nsr_cred = NOCRED;
6543 kauth_cred_unref(&cred);
6544 FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ);
6545 return error;
6546 }
6547
6548 int
6549 nfs3_lookup_rpc_async(
6550 nfsnode_t dnp,
6551 char *name,
6552 int namelen,
6553 vfs_context_t ctx,
6554 struct nfsreq **reqp)
6555 {
6556 struct nfsmount *nmp;
6557 struct nfsm_chain nmreq;
6558 int error = 0, nfsvers;
6559
6560 nmp = NFSTONMP(dnp);
6561 if (nfs_mount_gone(nmp)) {
6562 return ENXIO;
6563 }
6564 nfsvers = nmp->nm_vers;
6565
6566 nfsm_chain_null(&nmreq);
6567
6568 nfsm_chain_build_alloc_init(error, &nmreq,
6569 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
6570 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
6571 nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
6572 nfsm_chain_build_done(error, &nmreq);
6573 nfsmout_if(error);
6574 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_LOOKUP,
6575 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, reqp);
6576 nfsmout:
6577 nfsm_chain_cleanup(&nmreq);
6578 return error;
6579 }
6580
6581 int
6582 nfs3_lookup_rpc_async_finish(
6583 nfsnode_t dnp,
6584 __unused char *name,
6585 __unused int namelen,
6586 vfs_context_t ctx,
6587 struct nfsreq *req,
6588 u_int64_t *xidp,
6589 fhandle_t *fhp,
6590 struct nfs_vattr *nvap)
6591 {
6592 int error = 0, lockerror = ENOENT, status, nfsvers, attrflag;
6593 u_int64_t xid;
6594 struct nfsmount *nmp;
6595 struct nfsm_chain nmrep;
6596
6597 nmp = NFSTONMP(dnp);
6598 if (nmp == NULL) {
6599 return ENXIO;
6600 }
6601 nfsvers = nmp->nm_vers;
6602
6603 nfsm_chain_null(&nmrep);
6604
6605 error = nfs_request_async_finish(req, &nmrep, xidp, &status);
6606
6607 if ((lockerror = nfs_node_lock(dnp))) {
6608 error = lockerror;
6609 }
6610 xid = *xidp;
6611 if (error || status) {
6612 if (nfsvers == NFS_VER3) {
6613 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
6614 }
6615 if (!error) {
6616 error = status;
6617 }
6618 goto nfsmout;
6619 }
6620
6621 nfsmout_if(error || !fhp || !nvap);
6622
6623 /* get the file handle */
6624 nfsm_chain_get_fh(error, &nmrep, nfsvers, fhp);
6625
6626 /* get the attributes */
6627 if (nfsvers == NFS_VER3) {
6628 nfsm_chain_postop_attr_get(nmp, error, &nmrep, attrflag, nvap);
6629 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
6630 if (!error && !attrflag) {
6631 error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp);
6632 }
6633 } else {
6634 error = nfs_parsefattr(nmp, &nmrep, nfsvers, nvap);
6635 }
6636 nfsmout:
6637 if (!lockerror) {
6638 nfs_node_unlock(dnp);
6639 }
6640 nfsm_chain_cleanup(&nmrep);
6641 return error;
6642 }
6643
6644 /*
6645 * Look up a file name and optionally either update the file handle or
6646 * allocate an nfsnode, depending on the value of npp.
6647 * npp == NULL --> just do the lookup
6648 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
6649 * handled too
6650 * *npp != NULL --> update the file handle in the vnode
6651 */
6652 int
6653 nfs_lookitup(
6654 nfsnode_t dnp,
6655 char *name,
6656 int namelen,
6657 vfs_context_t ctx,
6658 nfsnode_t *npp)
6659 {
6660 int error = 0;
6661 nfsnode_t np, newnp = NULL;
6662 u_int64_t xid;
6663 fhandle_t fh;
6664 struct nfsmount *nmp;
6665 struct nfs_vattr nvattr;
6666 struct nfsreq rq, *req = &rq;
6667
6668 nmp = NFSTONMP(dnp);
6669 if (nfs_mount_gone(nmp)) {
6670 return ENXIO;
6671 }
6672
6673 if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
6674 (namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
6675 return ENAMETOOLONG;
6676 }
6677
6678 NVATTR_INIT(&nvattr);
6679
6680 /* check for lookup of "." */
6681 if ((name[0] == '.') && (namelen == 1)) {
6682 /* skip lookup, we know who we are */
6683 fh.fh_len = 0;
6684 newnp = dnp;
6685 goto nfsmout;
6686 }
6687
6688 error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, name, namelen, ctx, &req);
6689 nfsmout_if(error);
6690 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, name, namelen, ctx, req, &xid, &fh, &nvattr);
6691 nfsmout_if(!npp || error);
6692
6693 if (*npp) {
6694 np = *npp;
6695 if (fh.fh_len != np->n_fhsize) {
6696 u_char *oldbuf = (np->n_fhsize > NFS_SMALLFH) ? np->n_fhp : NULL;
6697 if (fh.fh_len > NFS_SMALLFH) {
6698 MALLOC_ZONE(np->n_fhp, u_char *, fh.fh_len, M_NFSBIGFH, M_WAITOK);
6699 if (!np->n_fhp) {
6700 np->n_fhp = oldbuf;
6701 error = ENOMEM;
6702 goto nfsmout;
6703 }
6704 } else {
6705 np->n_fhp = &np->n_fh[0];
6706 }
6707 if (oldbuf) {
6708 FREE_ZONE(oldbuf, np->n_fhsize, M_NFSBIGFH);
6709 }
6710 }
6711 bcopy(fh.fh_data, np->n_fhp, fh.fh_len);
6712 np->n_fhsize = fh.fh_len;
6713 nfs_node_lock_force(np);
6714 error = nfs_loadattrcache(np, &nvattr, &xid, 0);
6715 nfs_node_unlock(np);
6716 nfsmout_if(error);
6717 newnp = np;
6718 } else if (NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) {
6719 nfs_node_lock_force(dnp);
6720 if (dnp->n_xid <= xid) {
6721 error = nfs_loadattrcache(dnp, &nvattr, &xid, 0);
6722 }
6723 nfs_node_unlock(dnp);
6724 nfsmout_if(error);
6725 newnp = dnp;
6726 } else {
6727 struct componentname cn, *cnp = &cn;
6728 bzero(cnp, sizeof(*cnp));
6729 cnp->cn_nameptr = name;
6730 cnp->cn_namelen = namelen;
6731 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
6732 &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
6733 nfsmout_if(error);
6734 newnp = np;
6735 }
6736
6737 nfsmout:
6738 if (npp && !*npp && !error) {
6739 *npp = newnp;
6740 }
6741 NVATTR_CLEANUP(&nvattr);
6742 return error;
6743 }
6744
6745 /*
6746 * set up and initialize a "._" file lookup structure used for
6747 * performing async lookups.
6748 */
6749 void
6750 nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen, vfs_context_t ctx)
6751 {
6752 int error, du_namelen;
6753 vnode_t du_vp;
6754 struct nfsmount *nmp = NFSTONMP(dnp);
6755
6756 /* check for ._ file in name cache */
6757 dulp->du_flags = 0;
6758 bzero(&dulp->du_cn, sizeof(dulp->du_cn));
6759 du_namelen = namelen + 2;
6760 if (!nmp || NMFLAG(nmp, NONEGNAMECACHE)) {
6761 return;
6762 }
6763 if ((namelen >= 2) && (name[0] == '.') && (name[1] == '_')) {
6764 return;
6765 }
6766 if (du_namelen >= (int)sizeof(dulp->du_smallname)) {
6767 MALLOC(dulp->du_cn.cn_nameptr, char *, du_namelen + 1, M_TEMP, M_WAITOK);
6768 } else {
6769 dulp->du_cn.cn_nameptr = dulp->du_smallname;
6770 }
6771 if (!dulp->du_cn.cn_nameptr) {
6772 return;
6773 }
6774 dulp->du_cn.cn_namelen = du_namelen;
6775 snprintf(dulp->du_cn.cn_nameptr, du_namelen + 1, "._%s", name);
6776 dulp->du_cn.cn_nameptr[du_namelen] = '\0';
6777 dulp->du_cn.cn_nameiop = LOOKUP;
6778 dulp->du_cn.cn_flags = MAKEENTRY;
6779
6780 error = cache_lookup(NFSTOV(dnp), &du_vp, &dulp->du_cn);
6781 if (error == -1) {
6782 vnode_put(du_vp);
6783 } else if (!error) {
6784 nmp = NFSTONMP(dnp);
6785 if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
6786 /* if rdirplus, try dir buf cache lookup */
6787 nfsnode_t du_np = NULL;
6788 if (!nfs_dir_buf_cache_lookup(dnp, &du_np, &dulp->du_cn, ctx, 0) && du_np) {
6789 /* dir buf cache hit */
6790 du_vp = NFSTOV(du_np);
6791 vnode_put(du_vp);
6792 error = -1;
6793 }
6794 }
6795 if (!error) {
6796 dulp->du_flags |= NFS_DULOOKUP_DOIT;
6797 }
6798 }
6799 }
6800
6801 /*
6802 * start an async "._" file lookup request
6803 */
6804 void
6805 nfs_dulookup_start(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
6806 {
6807 struct nfsmount *nmp = NFSTONMP(dnp);
6808 struct nfsreq *req = &dulp->du_req;
6809
6810 if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT) || (dulp->du_flags & NFS_DULOOKUP_INPROG)) {
6811 return;
6812 }
6813 if (!nmp->nm_funcs->nf_lookup_rpc_async(dnp, dulp->du_cn.cn_nameptr,
6814 dulp->du_cn.cn_namelen, ctx, &req)) {
6815 dulp->du_flags |= NFS_DULOOKUP_INPROG;
6816 }
6817 }
6818
6819 /*
6820 * finish an async "._" file lookup request and clean up the structure
6821 */
6822 void
6823 nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
6824 {
6825 struct nfsmount *nmp = NFSTONMP(dnp);
6826 int error;
6827 nfsnode_t du_np;
6828 u_int64_t xid;
6829 fhandle_t fh;
6830 struct nfs_vattr nvattr;
6831
6832 if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_INPROG)) {
6833 goto out;
6834 }
6835
6836 NVATTR_INIT(&nvattr);
6837 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, dulp->du_cn.cn_nameptr,
6838 dulp->du_cn.cn_namelen, ctx, &dulp->du_req, &xid, &fh, &nvattr);
6839 dulp->du_flags &= ~NFS_DULOOKUP_INPROG;
6840 if (error == ENOENT) {
6841 /* add a negative entry in the name cache */
6842 nfs_node_lock_force(dnp);
6843 cache_enter(NFSTOV(dnp), NULL, &dulp->du_cn);
6844 dnp->n_flag |= NNEGNCENTRIES;
6845 nfs_node_unlock(dnp);
6846 } else if (!error) {
6847 error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len,
6848 &nvattr, &xid, dulp->du_req.r_auth, NG_MAKEENTRY, &du_np);
6849 if (!error) {
6850 nfs_node_unlock(du_np);
6851 vnode_put(NFSTOV(du_np));
6852 }
6853 }
6854 NVATTR_CLEANUP(&nvattr);
6855 out:
6856 if (dulp->du_flags & NFS_DULOOKUP_INPROG) {
6857 nfs_request_async_cancel(&dulp->du_req);
6858 }
6859 if (dulp->du_cn.cn_nameptr && (dulp->du_cn.cn_nameptr != dulp->du_smallname)) {
6860 FREE(dulp->du_cn.cn_nameptr, M_TEMP);
6861 }
6862 }
6863
6864
6865 /*
6866 * NFS Version 3 commit RPC
6867 */
6868 int
6869 nfs3_commit_rpc(
6870 nfsnode_t np,
6871 uint64_t offset,
6872 uint64_t count,
6873 kauth_cred_t cred,
6874 uint64_t wverf)
6875 {
6876 struct nfsmount *nmp;
6877 int error = 0, lockerror, status, wccpostattr = 0, nfsvers;
6878 struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
6879 u_int64_t xid, newwverf;
6880 uint32_t count32;
6881 struct nfsm_chain nmreq, nmrep;
6882
6883 nmp = NFSTONMP(np);
6884 FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
6885 if (nfs_mount_gone(nmp)) {
6886 return ENXIO;
6887 }
6888 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
6889 return 0;
6890 }
6891 nfsvers = nmp->nm_vers;
6892
6893 if (count > UINT32_MAX) {
6894 count32 = 0;
6895 } else {
6896 count32 = count;
6897 }
6898
6899 nfsm_chain_null(&nmreq);
6900 nfsm_chain_null(&nmrep);
6901
6902 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
6903 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6904 nfsm_chain_add_64(error, &nmreq, offset);
6905 nfsm_chain_add_32(error, &nmreq, count32);
6906 nfsm_chain_build_done(error, &nmreq);
6907 nfsmout_if(error);
6908 error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT,
6909 current_thread(), cred, NULL, 0, &nmrep, &xid, &status);
6910 if ((lockerror = nfs_node_lock(np))) {
6911 error = lockerror;
6912 }
6913 /* can we do anything useful with the wcc info? */
6914 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
6915 if (!lockerror) {
6916 nfs_node_unlock(np);
6917 }
6918 if (!error) {
6919 error = status;
6920 }
6921 nfsm_chain_get_64(error, &nmrep, newwverf);
6922 nfsmout_if(error);
6923 lck_mtx_lock(&nmp->nm_lock);
6924 if (nmp->nm_verf != newwverf) {
6925 nmp->nm_verf = newwverf;
6926 }
6927 if (wverf != newwverf) {
6928 error = NFSERR_STALEWRITEVERF;
6929 }
6930 lck_mtx_unlock(&nmp->nm_lock);
6931 nfsmout:
6932 nfsm_chain_cleanup(&nmreq);
6933 nfsm_chain_cleanup(&nmrep);
6934 return error;
6935 }
6936
6937
6938 int
6939 nfs_vnop_blockmap(
6940 __unused struct vnop_blockmap_args /* {
6941 * struct vnodeop_desc *a_desc;
6942 * vnode_t a_vp;
6943 * off_t a_foffset;
6944 * size_t a_size;
6945 * daddr64_t *a_bpn;
6946 * size_t *a_run;
6947 * void *a_poff;
6948 * int a_flags;
6949 * } */*ap)
6950 {
6951 return ENOTSUP;
6952 }
6953
6954
6955 /*
6956 * fsync vnode op. Just call nfs_flush().
6957 */
6958 /* ARGSUSED */
6959 int
6960 nfs_vnop_fsync(
6961 struct vnop_fsync_args /* {
6962 * struct vnodeop_desc *a_desc;
6963 * vnode_t a_vp;
6964 * int a_waitfor;
6965 * vfs_context_t a_context;
6966 * } */*ap)
6967 {
6968 return nfs_flush(VTONFS(ap->a_vp), ap->a_waitfor, vfs_context_thread(ap->a_context), 0);
6969 }
6970
6971
6972 /*
6973 * Do an NFS pathconf RPC.
6974 */
6975 int
6976 nfs3_pathconf_rpc(
6977 nfsnode_t np,
6978 struct nfs_fsattr *nfsap,
6979 vfs_context_t ctx)
6980 {
6981 u_int64_t xid;
6982 int error = 0, lockerror, status, nfsvers;
6983 struct nfsm_chain nmreq, nmrep;
6984 struct nfsmount *nmp = NFSTONMP(np);
6985 uint32_t val = 0;
6986
6987 if (nfs_mount_gone(nmp)) {
6988 return ENXIO;
6989 }
6990 nfsvers = nmp->nm_vers;
6991
6992 nfsm_chain_null(&nmreq);
6993 nfsm_chain_null(&nmrep);
6994
6995 /* fetch pathconf info from server */
6996 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
6997 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6998 nfsm_chain_build_done(error, &nmreq);
6999 nfsmout_if(error);
7000 error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx, NULL, &nmrep, &xid, &status);
7001 if ((lockerror = nfs_node_lock(np))) {
7002 error = lockerror;
7003 }
7004 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
7005 if (!lockerror) {
7006 nfs_node_unlock(np);
7007 }
7008 if (!error) {
7009 error = status;
7010 }
7011 nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink);
7012 nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxname);
7013 nfsap->nfsa_flags &= ~(NFS_FSFLAG_NO_TRUNC | NFS_FSFLAG_CHOWN_RESTRICTED | NFS_FSFLAG_CASE_INSENSITIVE | NFS_FSFLAG_CASE_PRESERVING);
7014 nfsm_chain_get_32(error, &nmrep, val);
7015 if (val) {
7016 nfsap->nfsa_flags |= NFS_FSFLAG_NO_TRUNC;
7017 }
7018 nfsm_chain_get_32(error, &nmrep, val);
7019 if (val) {
7020 nfsap->nfsa_flags |= NFS_FSFLAG_CHOWN_RESTRICTED;
7021 }
7022 nfsm_chain_get_32(error, &nmrep, val);
7023 if (val) {
7024 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_INSENSITIVE;
7025 }
7026 nfsm_chain_get_32(error, &nmrep, val);
7027 if (val) {
7028 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_PRESERVING;
7029 }
7030 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK);
7031 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME);
7032 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC);
7033 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
7034 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
7035 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
7036 nfsmout:
7037 nfsm_chain_cleanup(&nmreq);
7038 nfsm_chain_cleanup(&nmrep);
7039 return error;
7040 }
7041
7042 /* save pathconf info for NFSv3 mount */
7043 void
7044 nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap)
7045 {
7046 nmp->nm_fsattr.nfsa_maxlink = nfsap->nfsa_maxlink;
7047 nmp->nm_fsattr.nfsa_maxname = nfsap->nfsa_maxname;
7048 nmp->nm_fsattr.nfsa_flags &= ~(NFS_FSFLAG_NO_TRUNC | NFS_FSFLAG_CHOWN_RESTRICTED | NFS_FSFLAG_CASE_INSENSITIVE | NFS_FSFLAG_CASE_PRESERVING);
7049 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC;
7050 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED;
7051 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE;
7052 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING;
7053 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXLINK);
7054 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
7055 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_NO_TRUNC);
7056 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
7057 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
7058 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
7059 nmp->nm_state |= NFSSTA_GOTPATHCONF;
7060 }
7061
7062 /*
7063 * Return POSIX pathconf information applicable to nfs.
7064 *
7065 * The NFS V2 protocol doesn't support this, so just return EINVAL
7066 * for V2.
7067 */
7068 /* ARGSUSED */
7069 int
7070 nfs_vnop_pathconf(
7071 struct vnop_pathconf_args /* {
7072 * struct vnodeop_desc *a_desc;
7073 * vnode_t a_vp;
7074 * int a_name;
7075 * int32_t *a_retval;
7076 * vfs_context_t a_context;
7077 * } */*ap)
7078 {
7079 vnode_t vp = ap->a_vp;
7080 nfsnode_t np = VTONFS(vp);
7081 struct nfsmount *nmp;
7082 struct nfs_fsattr nfsa, *nfsap;
7083 int error = 0;
7084 uint64_t maxFileSize;
7085 uint nbits;
7086
7087 nmp = VTONMP(vp);
7088 if (nfs_mount_gone(nmp)) {
7089 return ENXIO;
7090 }
7091
7092 switch (ap->a_name) {
7093 case _PC_LINK_MAX:
7094 case _PC_NAME_MAX:
7095 case _PC_CHOWN_RESTRICTED:
7096 case _PC_NO_TRUNC:
7097 case _PC_CASE_SENSITIVE:
7098 case _PC_CASE_PRESERVING:
7099 break;
7100 case _PC_FILESIZEBITS:
7101 if (nmp->nm_vers == NFS_VER2) {
7102 *ap->a_retval = 32;
7103 return 0;
7104 }
7105 break;
7106 case _PC_XATTR_SIZE_BITS:
7107 /* Do we support xattrs natively? */
7108 if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR) {
7109 break; /* Yes */
7110 }
7111 /* No... so just return an error */
7112 /* FALLTHROUGH */
7113 default:
7114 /* don't bother contacting the server if we know the answer */
7115 return EINVAL;
7116 }
7117
7118 if (nmp->nm_vers == NFS_VER2) {
7119 return EINVAL;
7120 }
7121
7122 lck_mtx_lock(&nmp->nm_lock);
7123 if (nmp->nm_vers == NFS_VER3) {
7124 if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) {
7125 /* no pathconf info cached */
7126 lck_mtx_unlock(&nmp->nm_lock);
7127 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
7128 error = nfs3_pathconf_rpc(np, &nfsa, ap->a_context);
7129 if (error) {
7130 return error;
7131 }
7132 nmp = VTONMP(vp);
7133 if (nfs_mount_gone(nmp)) {
7134 return ENXIO;
7135 }
7136 lck_mtx_lock(&nmp->nm_lock);
7137 if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS) {
7138 /* all files have the same pathconf info, */
7139 /* so cache a copy of the results */
7140 nfs3_pathconf_cache(nmp, &nfsa);
7141 }
7142 nfsap = &nfsa;
7143 } else {
7144 nfsap = &nmp->nm_fsattr;
7145 }
7146 }
7147 #if CONFIG_NFS4
7148 else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
7149 /* no pathconf info cached */
7150 lck_mtx_unlock(&nmp->nm_lock);
7151 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
7152 error = nfs4_pathconf_rpc(np, &nfsa, ap->a_context);
7153 if (error) {
7154 return error;
7155 }
7156 nmp = VTONMP(vp);
7157 if (nfs_mount_gone(nmp)) {
7158 return ENXIO;
7159 }
7160 lck_mtx_lock(&nmp->nm_lock);
7161 nfsap = &nfsa;
7162 }
7163 #endif
7164 else {
7165 nfsap = &nmp->nm_fsattr;
7166 }
7167 switch (ap->a_name) {
7168 case _PC_LINK_MAX:
7169 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK)) {
7170 *ap->a_retval = nfsap->nfsa_maxlink;
7171 #if CONFIG_NFS4
7172 } else if ((nmp->nm_vers == NFS_VER4) && NFS_BITMAP_ISSET(np->n_vattr.nva_bitmap, NFS_FATTR_MAXLINK)) {
7173 *ap->a_retval = np->n_vattr.nva_maxlink;
7174 #endif
7175 } else {
7176 error = EINVAL;
7177 }
7178 break;
7179 case _PC_NAME_MAX:
7180 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME)) {
7181 *ap->a_retval = nfsap->nfsa_maxname;
7182 } else {
7183 error = EINVAL;
7184 }
7185 break;
7186 case _PC_CHOWN_RESTRICTED:
7187 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED)) {
7188 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0;
7189 } else {
7190 error = EINVAL;
7191 }
7192 break;
7193 case _PC_NO_TRUNC:
7194 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC)) {
7195 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0;
7196 } else {
7197 error = EINVAL;
7198 }
7199 break;
7200 case _PC_CASE_SENSITIVE:
7201 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) {
7202 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1;
7203 } else {
7204 error = EINVAL;
7205 }
7206 break;
7207 case _PC_CASE_PRESERVING:
7208 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING)) {
7209 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0;
7210 } else {
7211 error = EINVAL;
7212 }
7213 break;
7214 case _PC_XATTR_SIZE_BITS: /* same as file size bits if named attrs supported */
7215 case _PC_FILESIZEBITS:
7216 if (!NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
7217 *ap->a_retval = 64;
7218 error = 0;
7219 break;
7220 }
7221 maxFileSize = nfsap->nfsa_maxfilesize;
7222 nbits = 1;
7223 if (maxFileSize & 0xffffffff00000000ULL) {
7224 nbits += 32;
7225 maxFileSize >>= 32;
7226 }
7227 if (maxFileSize & 0xffff0000) {
7228 nbits += 16;
7229 maxFileSize >>= 16;
7230 }
7231 if (maxFileSize & 0xff00) {
7232 nbits += 8;
7233 maxFileSize >>= 8;
7234 }
7235 if (maxFileSize & 0xf0) {
7236 nbits += 4;
7237 maxFileSize >>= 4;
7238 }
7239 if (maxFileSize & 0xc) {
7240 nbits += 2;
7241 maxFileSize >>= 2;
7242 }
7243 if (maxFileSize & 0x2) {
7244 nbits += 1;
7245 }
7246 *ap->a_retval = nbits;
7247 break;
7248 default:
7249 error = EINVAL;
7250 }
7251
7252 lck_mtx_unlock(&nmp->nm_lock);
7253
7254 return error;
7255 }
7256
7257 /*
7258 * Read wrapper for special devices.
7259 */
7260 int
7261 nfsspec_vnop_read(
7262 struct vnop_read_args /* {
7263 * struct vnodeop_desc *a_desc;
7264 * vnode_t a_vp;
7265 * struct uio *a_uio;
7266 * int a_ioflag;
7267 * vfs_context_t a_context;
7268 * } */*ap)
7269 {
7270 nfsnode_t np = VTONFS(ap->a_vp);
7271 struct timespec now;
7272 int error;
7273
7274 /*
7275 * Set access flag.
7276 */
7277 if ((error = nfs_node_lock(np))) {
7278 return error;
7279 }
7280 np->n_flag |= NACC;
7281 nanotime(&now);
7282 np->n_atim.tv_sec = now.tv_sec;
7283 np->n_atim.tv_nsec = now.tv_nsec;
7284 nfs_node_unlock(np);
7285 return VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap);
7286 }
7287
7288 /*
7289 * Write wrapper for special devices.
7290 */
7291 int
7292 nfsspec_vnop_write(
7293 struct vnop_write_args /* {
7294 * struct vnodeop_desc *a_desc;
7295 * vnode_t a_vp;
7296 * struct uio *a_uio;
7297 * int a_ioflag;
7298 * vfs_context_t a_context;
7299 * } */*ap)
7300 {
7301 nfsnode_t np = VTONFS(ap->a_vp);
7302 struct timespec now;
7303 int error;
7304
7305 /*
7306 * Set update flag.
7307 */
7308 if ((error = nfs_node_lock(np))) {
7309 return error;
7310 }
7311 np->n_flag |= NUPD;
7312 nanotime(&now);
7313 np->n_mtim.tv_sec = now.tv_sec;
7314 np->n_mtim.tv_nsec = now.tv_nsec;
7315 nfs_node_unlock(np);
7316 return VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap);
7317 }
7318
7319 /*
7320 * Close wrapper for special devices.
7321 *
7322 * Update the times on the nfsnode then do device close.
7323 */
7324 int
7325 nfsspec_vnop_close(
7326 struct vnop_close_args /* {
7327 * struct vnodeop_desc *a_desc;
7328 * vnode_t a_vp;
7329 * int a_fflag;
7330 * vfs_context_t a_context;
7331 * } */*ap)
7332 {
7333 vnode_t vp = ap->a_vp;
7334 nfsnode_t np = VTONFS(vp);
7335 struct vnode_attr vattr;
7336 mount_t mp;
7337 int error;
7338
7339 if ((error = nfs_node_lock(np))) {
7340 return error;
7341 }
7342 if (np->n_flag & (NACC | NUPD)) {
7343 np->n_flag |= NCHG;
7344 if (!vnode_isinuse(vp, 0) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
7345 VATTR_INIT(&vattr);
7346 if (np->n_flag & NACC) {
7347 vattr.va_access_time = np->n_atim;
7348 VATTR_SET_ACTIVE(&vattr, va_access_time);
7349 }
7350 if (np->n_flag & NUPD) {
7351 vattr.va_modify_time = np->n_mtim;
7352 VATTR_SET_ACTIVE(&vattr, va_modify_time);
7353 }
7354 nfs_node_unlock(np);
7355 vnode_setattr(vp, &vattr, ap->a_context);
7356 } else {
7357 nfs_node_unlock(np);
7358 }
7359 } else {
7360 nfs_node_unlock(np);
7361 }
7362 return VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap);
7363 }
7364
7365 #if FIFO
7366 extern vnop_t **fifo_vnodeop_p;
7367
7368 /*
7369 * Read wrapper for fifos.
7370 */
7371 int
7372 nfsfifo_vnop_read(
7373 struct vnop_read_args /* {
7374 * struct vnodeop_desc *a_desc;
7375 * vnode_t a_vp;
7376 * struct uio *a_uio;
7377 * int a_ioflag;
7378 * vfs_context_t a_context;
7379 * } */*ap)
7380 {
7381 nfsnode_t np = VTONFS(ap->a_vp);
7382 struct timespec now;
7383 int error;
7384
7385 /*
7386 * Set access flag.
7387 */
7388 if ((error = nfs_node_lock(np))) {
7389 return error;
7390 }
7391 np->n_flag |= NACC;
7392 nanotime(&now);
7393 np->n_atim.tv_sec = now.tv_sec;
7394 np->n_atim.tv_nsec = now.tv_nsec;
7395 nfs_node_unlock(np);
7396 return VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap);
7397 }
7398
7399 /*
7400 * Write wrapper for fifos.
7401 */
7402 int
7403 nfsfifo_vnop_write(
7404 struct vnop_write_args /* {
7405 * struct vnodeop_desc *a_desc;
7406 * vnode_t a_vp;
7407 * struct uio *a_uio;
7408 * int a_ioflag;
7409 * vfs_context_t a_context;
7410 * } */*ap)
7411 {
7412 nfsnode_t np = VTONFS(ap->a_vp);
7413 struct timespec now;
7414 int error;
7415
7416 /*
7417 * Set update flag.
7418 */
7419 if ((error = nfs_node_lock(np))) {
7420 return error;
7421 }
7422 np->n_flag |= NUPD;
7423 nanotime(&now);
7424 np->n_mtim.tv_sec = now.tv_sec;
7425 np->n_mtim.tv_nsec = now.tv_nsec;
7426 nfs_node_unlock(np);
7427 return VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap);
7428 }
7429
7430 /*
7431 * Close wrapper for fifos.
7432 *
7433 * Update the times on the nfsnode then do fifo close.
7434 */
7435 int
7436 nfsfifo_vnop_close(
7437 struct vnop_close_args /* {
7438 * struct vnodeop_desc *a_desc;
7439 * vnode_t a_vp;
7440 * int a_fflag;
7441 * vfs_context_t a_context;
7442 * } */*ap)
7443 {
7444 vnode_t vp = ap->a_vp;
7445 nfsnode_t np = VTONFS(vp);
7446 struct vnode_attr vattr;
7447 struct timespec now;
7448 mount_t mp;
7449 int error;
7450
7451 if ((error = nfs_node_lock(np))) {
7452 return error;
7453 }
7454 if (np->n_flag & (NACC | NUPD)) {
7455 nanotime(&now);
7456 if (np->n_flag & NACC) {
7457 np->n_atim.tv_sec = now.tv_sec;
7458 np->n_atim.tv_nsec = now.tv_nsec;
7459 }
7460 if (np->n_flag & NUPD) {
7461 np->n_mtim.tv_sec = now.tv_sec;
7462 np->n_mtim.tv_nsec = now.tv_nsec;
7463 }
7464 np->n_flag |= NCHG;
7465 if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
7466 VATTR_INIT(&vattr);
7467 if (np->n_flag & NACC) {
7468 vattr.va_access_time = np->n_atim;
7469 VATTR_SET_ACTIVE(&vattr, va_access_time);
7470 }
7471 if (np->n_flag & NUPD) {
7472 vattr.va_modify_time = np->n_mtim;
7473 VATTR_SET_ACTIVE(&vattr, va_modify_time);
7474 }
7475 nfs_node_unlock(np);
7476 vnode_setattr(vp, &vattr, ap->a_context);
7477 } else {
7478 nfs_node_unlock(np);
7479 }
7480 } else {
7481 nfs_node_unlock(np);
7482 }
7483 return VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap);
7484 }
7485 #endif /* FIFO */
7486
7487 /*ARGSUSED*/
7488 int
7489 nfs_vnop_ioctl(
7490 struct vnop_ioctl_args /* {
7491 * struct vnodeop_desc *a_desc;
7492 * vnode_t a_vp;
7493 * u_int32_t a_command;
7494 * caddr_t a_data;
7495 * int a_fflag;
7496 * vfs_context_t a_context;
7497 * } */*ap)
7498 {
7499 vfs_context_t ctx = ap->a_context;
7500 vnode_t vp = ap->a_vp;
7501 struct nfsmount *mp = VTONMP(vp);
7502 int error = ENOTTY;
7503 #if CONFIG_NFS_GSS
7504 struct user_nfs_gss_principal gprinc = {};
7505 uint32_t len;
7506 #endif
7507
7508 if (mp == NULL) {
7509 return ENXIO;
7510 }
7511 switch (ap->a_command) {
7512 case F_FULLFSYNC:
7513 if (vnode_vfsisrdonly(vp)) {
7514 return EROFS;
7515 }
7516 error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0);
7517 break;
7518 #if CONFIG_NFS_GSS
7519 case NFS_IOC_DESTROY_CRED:
7520 if (!auth_is_kerberized(mp->nm_auth)) {
7521 return ENOTSUP;
7522 }
7523 error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx));
7524 break;
7525 case NFS_IOC_SET_CRED:
7526 case NFS_IOC_SET_CRED64:
7527 if (!auth_is_kerberized(mp->nm_auth)) {
7528 return ENOTSUP;
7529 }
7530 if ((ap->a_command == NFS_IOC_SET_CRED && vfs_context_is64bit(ctx)) ||
7531 (ap->a_command == NFS_IOC_SET_CRED64 && !vfs_context_is64bit(ctx))) {
7532 return EINVAL;
7533 }
7534 if (vfs_context_is64bit(ctx)) {
7535 gprinc = *(struct user_nfs_gss_principal *)ap->a_data;
7536 } else {
7537 struct nfs_gss_principal *tp;
7538 tp = (struct nfs_gss_principal *)ap->a_data;
7539 gprinc.princlen = tp->princlen;
7540 gprinc.nametype = tp->nametype;
7541 gprinc.principal = CAST_USER_ADDR_T(tp->principal);
7542 }
7543 NFS_DBG(NFS_FAC_GSS, 7, "Enter NFS_FSCTL_SET_CRED (64-bit=%d): principal length %d name type %d usr pointer 0x%llx\n", vfs_context_is64bit(ctx), gprinc.princlen, gprinc.nametype, (unsigned long long)gprinc.principal);
7544 if (gprinc.princlen > MAXPATHLEN) {
7545 return EINVAL;
7546 }
7547 uint8_t *p;
7548 MALLOC(p, uint8_t *, gprinc.princlen + 1, M_TEMP, M_WAITOK | M_ZERO);
7549 if (p == NULL) {
7550 return ENOMEM;
7551 }
7552 error = copyin(gprinc.principal, p, gprinc.princlen);
7553 if (error) {
7554 NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_SET_CRED could not copy in princiapl data of len %d: %d\n",
7555 gprinc.princlen, error);
7556 FREE(p, M_TEMP);
7557 return error;
7558 }
7559 NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s\n", p);
7560 error = nfs_gss_clnt_ctx_set_principal(mp, ctx, p, gprinc.princlen, gprinc.nametype);
7561 NFS_DBG(NFS_FAC_GSS, 7, "Seting credential to principal %s returned %d\n", p, error);
7562 FREE(p, M_TEMP);
7563 break;
7564 case NFS_IOC_GET_CRED:
7565 case NFS_IOC_GET_CRED64:
7566 if (!auth_is_kerberized(mp->nm_auth)) {
7567 return ENOTSUP;
7568 }
7569 if ((ap->a_command == NFS_IOC_GET_CRED && vfs_context_is64bit(ctx)) ||
7570 (ap->a_command == NFS_IOC_GET_CRED64 && !vfs_context_is64bit(ctx))) {
7571 return EINVAL;
7572 }
7573 error = nfs_gss_clnt_ctx_get_principal(mp, ctx, &gprinc);
7574 if (error) {
7575 break;
7576 }
7577 if (vfs_context_is64bit(ctx)) {
7578 struct user_nfs_gss_principal *upp = (struct user_nfs_gss_principal *)ap->a_data;
7579 len = upp->princlen;
7580 if (gprinc.princlen < len) {
7581 len = gprinc.princlen;
7582 }
7583 upp->princlen = gprinc.princlen;
7584 upp->nametype = gprinc.nametype;
7585 upp->flags = gprinc.flags;
7586 if (gprinc.principal) {
7587 error = copyout((void *)gprinc.principal, upp->principal, len);
7588 } else {
7589 upp->principal = USER_ADDR_NULL;
7590 }
7591 } else {
7592 struct nfs_gss_principal *u32pp = (struct nfs_gss_principal *)ap->a_data;
7593 len = u32pp->princlen;
7594 if (gprinc.princlen < len) {
7595 len = gprinc.princlen;
7596 }
7597 u32pp->princlen = gprinc.princlen;
7598 u32pp->nametype = gprinc.nametype;
7599 u32pp->flags = gprinc.flags;
7600 if (gprinc.principal) {
7601 error = copyout((void *)gprinc.principal, u32pp->principal, len);
7602 } else {
7603 u32pp->principal = (user32_addr_t)0;
7604 }
7605 }
7606 if (error) {
7607 NFS_DBG(NFS_FAC_GSS, 7, "NFS_FSCTL_GET_CRED could not copy out princiapl data of len %d: %d\n",
7608 gprinc.princlen, error);
7609 }
7610 if (gprinc.principal) {
7611 FREE(gprinc.principal, M_TEMP);
7612 }
7613 #endif /* CONFIG_NFS_GSS */
7614 }
7615
7616 return error;
7617 }
7618
7619 /*ARGSUSED*/
7620 int
7621 nfs_vnop_select(
7622 __unused struct vnop_select_args /* {
7623 * struct vnodeop_desc *a_desc;
7624 * vnode_t a_vp;
7625 * int a_which;
7626 * int a_fflags;
7627 * void *a_wql;
7628 * vfs_context_t a_context;
7629 * } */*ap)
7630 {
7631 /*
7632 * We were once bogusly seltrue() which returns 1. Is this right?
7633 */
7634 return 1;
7635 }
7636
7637 /*
7638 * vnode OP for pagein using UPL
7639 *
7640 * No buffer I/O, just RPCs straight into the mapped pages.
7641 */
7642 int
7643 nfs_vnop_pagein(
7644 struct vnop_pagein_args /* {
7645 * struct vnodeop_desc *a_desc;
7646 * vnode_t a_vp;
7647 * upl_t a_pl;
7648 * vm_offset_t a_pl_offset;
7649 * off_t a_f_offset;
7650 * size_t a_size;
7651 * int a_flags;
7652 * vfs_context_t a_context;
7653 * } */*ap)
7654 {
7655 vnode_t vp = ap->a_vp;
7656 upl_t pl = ap->a_pl;
7657 size_t size = ap->a_size;
7658 off_t f_offset = ap->a_f_offset;
7659 vm_offset_t pl_offset = ap->a_pl_offset;
7660 int flags = ap->a_flags;
7661 thread_t thd;
7662 kauth_cred_t cred;
7663 nfsnode_t np = VTONFS(vp);
7664 size_t nmrsize, iosize, txsize, rxsize, retsize;
7665 off_t txoffset;
7666 struct nfsmount *nmp;
7667 int error = 0;
7668 vm_offset_t ioaddr, rxaddr;
7669 uio_t uio;
7670 char uio_buf[UIO_SIZEOF(1)];
7671 int nofreeupl = flags & UPL_NOCOMMIT;
7672 upl_page_info_t *plinfo;
7673 #define MAXPAGINGREQS 16 /* max outstanding RPCs for pagein/pageout */
7674 struct nfsreq *req[MAXPAGINGREQS];
7675 int nextsend, nextwait;
7676 #if CONFIG_NFS4
7677 uint32_t stategenid = 0;
7678 #endif
7679 uint32_t restart = 0;
7680 kern_return_t kret;
7681
7682 FSDBG(322, np, f_offset, size, flags);
7683 if (pl == (upl_t)NULL) {
7684 panic("nfs_pagein: no upl");
7685 }
7686
7687 if (size <= 0) {
7688 printf("nfs_pagein: invalid size %ld", size);
7689 if (!nofreeupl) {
7690 (void) ubc_upl_abort_range(pl, pl_offset, size, 0);
7691 }
7692 return EINVAL;
7693 }
7694 if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) {
7695 if (!nofreeupl) {
7696 ubc_upl_abort_range(pl, pl_offset, size,
7697 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
7698 }
7699 return EINVAL;
7700 }
7701
7702 thd = vfs_context_thread(ap->a_context);
7703 cred = ubc_getcred(vp);
7704 if (!IS_VALID_CRED(cred)) {
7705 cred = vfs_context_ucred(ap->a_context);
7706 }
7707
7708 uio = uio_createwithbuffer(1, f_offset, UIO_SYSSPACE, UIO_READ,
7709 &uio_buf, sizeof(uio_buf));
7710
7711 nmp = VTONMP(vp);
7712 if (nfs_mount_gone(nmp)) {
7713 if (!nofreeupl) {
7714 ubc_upl_abort_range(pl, pl_offset, size,
7715 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
7716 }
7717 return ENXIO;
7718 }
7719 nmrsize = nmp->nm_rsize;
7720
7721 plinfo = ubc_upl_pageinfo(pl);
7722 kret = ubc_upl_map(pl, &ioaddr);
7723 if (kret != KERN_SUCCESS) {
7724 panic("nfs_vnop_pagein: ubc_upl_map() failed with (%d)", kret);
7725 }
7726 ioaddr += pl_offset;
7727
7728 tryagain:
7729 #if CONFIG_NFS4
7730 if (nmp->nm_vers >= NFS_VER4) {
7731 stategenid = nmp->nm_stategenid;
7732 }
7733 #endif
7734 txsize = rxsize = size;
7735 txoffset = f_offset;
7736 rxaddr = ioaddr;
7737
7738 bzero(req, sizeof(req));
7739 nextsend = nextwait = 0;
7740 do {
7741 if (np->n_flag & NREVOKE) {
7742 error = EIO;
7743 break;
7744 }
7745 /* send requests while we need to and have available slots */
7746 while ((txsize > 0) && (req[nextsend] == NULL)) {
7747 iosize = MIN(nmrsize, txsize);
7748 if ((error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, iosize, thd, cred, NULL, &req[nextsend]))) {
7749 req[nextsend] = NULL;
7750 break;
7751 }
7752 txoffset += iosize;
7753 txsize -= iosize;
7754 nextsend = (nextsend + 1) % MAXPAGINGREQS;
7755 }
7756 /* wait while we need to and break out if more requests to send */
7757 while ((rxsize > 0) && req[nextwait]) {
7758 iosize = retsize = MIN(nmrsize, rxsize);
7759 uio_reset(uio, uio_offset(uio), UIO_SYSSPACE, UIO_READ);
7760 uio_addiov(uio, CAST_USER_ADDR_T(rxaddr), iosize);
7761 FSDBG(322, uio_offset(uio), uio_resid(uio), rxaddr, rxsize);
7762 #if UPL_DEBUG
7763 upl_ubc_alias_set(pl, (uintptr_t) current_thread(), (uintptr_t) 2);
7764 #endif /* UPL_DEBUG */
7765 OSAddAtomic64(1, &nfsstats.pageins);
7766 error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL);
7767 req[nextwait] = NULL;
7768 nextwait = (nextwait + 1) % MAXPAGINGREQS;
7769 #if CONFIG_NFS4
7770 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
7771 lck_mtx_lock(&nmp->nm_lock);
7772 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
7773 NP(np, "nfs_vnop_pagein: error %d, initiating recovery", error);
7774 nfs_need_recover(nmp, error);
7775 }
7776 lck_mtx_unlock(&nmp->nm_lock);
7777 restart++;
7778 goto cancel;
7779 }
7780 #endif
7781 if (error) {
7782 FSDBG(322, uio_offset(uio), uio_resid(uio), error, -1);
7783 break;
7784 }
7785 if (retsize < iosize) {
7786 /* Just zero fill the rest of the valid area. */
7787 int zcnt = iosize - retsize;
7788 bzero((char *)rxaddr + retsize, zcnt);
7789 FSDBG(324, uio_offset(uio), retsize, zcnt, rxaddr);
7790 uio_update(uio, zcnt);
7791 }
7792 rxaddr += iosize;
7793 rxsize -= iosize;
7794 if (txsize) {
7795 break;
7796 }
7797 }
7798 } while (!error && (txsize || rxsize));
7799
7800 restart = 0;
7801
7802 if (error) {
7803 #if CONFIG_NFS4
7804 cancel:
7805 #endif
7806 /* cancel any outstanding requests */
7807 while (req[nextwait]) {
7808 nfs_request_async_cancel(req[nextwait]);
7809 req[nextwait] = NULL;
7810 nextwait = (nextwait + 1) % MAXPAGINGREQS;
7811 }
7812 if (np->n_flag & NREVOKE) {
7813 error = EIO;
7814 } else if (restart) {
7815 if (restart <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
7816 if (error == NFSERR_GRACE) {
7817 tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
7818 }
7819 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
7820 goto tryagain;
7821 }
7822 } else {
7823 NP(np, "nfs_pagein: too many restarts, aborting");
7824 }
7825 }
7826 }
7827
7828 ubc_upl_unmap(pl);
7829
7830 if (!nofreeupl) {
7831 if (error) {
7832 ubc_upl_abort_range(pl, pl_offset, size,
7833 UPL_ABORT_ERROR |
7834 UPL_ABORT_FREE_ON_EMPTY);
7835 } else {
7836 ubc_upl_commit_range(pl, pl_offset, size,
7837 UPL_COMMIT_CLEAR_DIRTY |
7838 UPL_COMMIT_FREE_ON_EMPTY);
7839 }
7840 }
7841 return error;
7842 }
7843
7844
7845 /*
7846 * the following are needed only by nfs_pageout to know how to handle errors
7847 * see nfs_pageout comments on explanation of actions.
7848 * the errors here are copied from errno.h and errors returned by servers
7849 * are expected to match the same numbers here. If not, our actions maybe
7850 * erroneous.
7851 */
7852 char nfs_pageouterrorhandler(int);
7853 enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, SEVER};
7854 #define NFS_ELAST 88
7855 static u_char errorcount[NFS_ELAST + 1]; /* better be zeros when initialized */
7856 static const char errortooutcome[NFS_ELAST + 1] = {
7857 NOACTION,
7858 DUMP, /* EPERM 1 Operation not permitted */
7859 DUMP, /* ENOENT 2 No such file or directory */
7860 DUMPANDLOG, /* ESRCH 3 No such process */
7861 RETRY, /* EINTR 4 Interrupted system call */
7862 DUMP, /* EIO 5 Input/output error */
7863 DUMP, /* ENXIO 6 Device not configured */
7864 DUMPANDLOG, /* E2BIG 7 Argument list too long */
7865 DUMPANDLOG, /* ENOEXEC 8 Exec format error */
7866 DUMPANDLOG, /* EBADF 9 Bad file descriptor */
7867 DUMPANDLOG, /* ECHILD 10 No child processes */
7868 DUMPANDLOG, /* EDEADLK 11 Resource deadlock avoided - was EAGAIN */
7869 RETRY, /* ENOMEM 12 Cannot allocate memory */
7870 DUMP, /* EACCES 13 Permission denied */
7871 DUMPANDLOG, /* EFAULT 14 Bad address */
7872 DUMPANDLOG, /* ENOTBLK 15 POSIX - Block device required */
7873 RETRY, /* EBUSY 16 Device busy */
7874 DUMP, /* EEXIST 17 File exists */
7875 DUMP, /* EXDEV 18 Cross-device link */
7876 DUMP, /* ENODEV 19 Operation not supported by device */
7877 DUMP, /* ENOTDIR 20 Not a directory */
7878 DUMP, /* EISDIR 21 Is a directory */
7879 DUMP, /* EINVAL 22 Invalid argument */
7880 DUMPANDLOG, /* ENFILE 23 Too many open files in system */
7881 DUMPANDLOG, /* EMFILE 24 Too many open files */
7882 DUMPANDLOG, /* ENOTTY 25 Inappropriate ioctl for device */
7883 DUMPANDLOG, /* ETXTBSY 26 Text file busy - POSIX */
7884 DUMP, /* EFBIG 27 File too large */
7885 DUMP, /* ENOSPC 28 No space left on device */
7886 DUMPANDLOG, /* ESPIPE 29 Illegal seek */
7887 DUMP, /* EROFS 30 Read-only file system */
7888 DUMP, /* EMLINK 31 Too many links */
7889 RETRY, /* EPIPE 32 Broken pipe */
7890 /* math software */
7891 DUMPANDLOG, /* EDOM 33 Numerical argument out of domain */
7892 DUMPANDLOG, /* ERANGE 34 Result too large */
7893 RETRY, /* EAGAIN/EWOULDBLOCK 35 Resource temporarily unavailable */
7894 DUMPANDLOG, /* EINPROGRESS 36 Operation now in progress */
7895 DUMPANDLOG, /* EALREADY 37 Operation already in progress */
7896 /* ipc/network software -- argument errors */
7897 DUMPANDLOG, /* ENOTSOC 38 Socket operation on non-socket */
7898 DUMPANDLOG, /* EDESTADDRREQ 39 Destination address required */
7899 DUMPANDLOG, /* EMSGSIZE 40 Message too long */
7900 DUMPANDLOG, /* EPROTOTYPE 41 Protocol wrong type for socket */
7901 DUMPANDLOG, /* ENOPROTOOPT 42 Protocol not available */
7902 DUMPANDLOG, /* EPROTONOSUPPORT 43 Protocol not supported */
7903 DUMPANDLOG, /* ESOCKTNOSUPPORT 44 Socket type not supported */
7904 DUMPANDLOG, /* ENOTSUP 45 Operation not supported */
7905 DUMPANDLOG, /* EPFNOSUPPORT 46 Protocol family not supported */
7906 DUMPANDLOG, /* EAFNOSUPPORT 47 Address family not supported by protocol family */
7907 DUMPANDLOG, /* EADDRINUSE 48 Address already in use */
7908 DUMPANDLOG, /* EADDRNOTAVAIL 49 Can't assign requested address */
7909 /* ipc/network software -- operational errors */
7910 RETRY, /* ENETDOWN 50 Network is down */
7911 RETRY, /* ENETUNREACH 51 Network is unreachable */
7912 RETRY, /* ENETRESET 52 Network dropped connection on reset */
7913 RETRY, /* ECONNABORTED 53 Software caused connection abort */
7914 RETRY, /* ECONNRESET 54 Connection reset by peer */
7915 RETRY, /* ENOBUFS 55 No buffer space available */
7916 RETRY, /* EISCONN 56 Socket is already connected */
7917 RETRY, /* ENOTCONN 57 Socket is not connected */
7918 RETRY, /* ESHUTDOWN 58 Can't send after socket shutdown */
7919 RETRY, /* ETOOMANYREFS 59 Too many references: can't splice */
7920 RETRY, /* ETIMEDOUT 60 Operation timed out */
7921 RETRY, /* ECONNREFUSED 61 Connection refused */
7922
7923 DUMPANDLOG, /* ELOOP 62 Too many levels of symbolic links */
7924 DUMP, /* ENAMETOOLONG 63 File name too long */
7925 RETRY, /* EHOSTDOWN 64 Host is down */
7926 RETRY, /* EHOSTUNREACH 65 No route to host */
7927 DUMP, /* ENOTEMPTY 66 Directory not empty */
7928 /* quotas & mush */
7929 DUMPANDLOG, /* PROCLIM 67 Too many processes */
7930 DUMPANDLOG, /* EUSERS 68 Too many users */
7931 DUMPANDLOG, /* EDQUOT 69 Disc quota exceeded */
7932 /* Network File System */
7933 DUMP, /* ESTALE 70 Stale NFS file handle */
7934 DUMP, /* EREMOTE 71 Too many levels of remote in path */
7935 DUMPANDLOG, /* EBADRPC 72 RPC struct is bad */
7936 DUMPANDLOG, /* ERPCMISMATCH 73 RPC version wrong */
7937 DUMPANDLOG, /* EPROGUNAVAIL 74 RPC prog. not avail */
7938 DUMPANDLOG, /* EPROGMISMATCH 75 Program version wrong */
7939 DUMPANDLOG, /* EPROCUNAVAIL 76 Bad procedure for program */
7940
7941 DUMPANDLOG, /* ENOLCK 77 No locks available */
7942 DUMPANDLOG, /* ENOSYS 78 Function not implemented */
7943 DUMPANDLOG, /* EFTYPE 79 Inappropriate file type or format */
7944 DUMPANDLOG, /* EAUTH 80 Authentication error */
7945 DUMPANDLOG, /* ENEEDAUTH 81 Need authenticator */
7946 /* Intelligent device errors */
7947 DUMPANDLOG, /* EPWROFF 82 Device power is off */
7948 DUMPANDLOG, /* EDEVERR 83 Device error, e.g. paper out */
7949 DUMPANDLOG, /* EOVERFLOW 84 Value too large to be stored in data type */
7950 /* Program loading errors */
7951 DUMPANDLOG, /* EBADEXEC 85 Bad executable */
7952 DUMPANDLOG, /* EBADARCH 86 Bad CPU type in executable */
7953 DUMPANDLOG, /* ESHLIBVERS 87 Shared library version mismatch */
7954 DUMPANDLOG, /* EBADMACHO 88 Malformed Macho file */
7955 };
7956
7957 char
7958 nfs_pageouterrorhandler(int error)
7959 {
7960 if (error > NFS_ELAST) {
7961 return DUMP;
7962 } else {
7963 return errortooutcome[error];
7964 }
7965 }
7966
7967
7968 /*
7969 * vnode OP for pageout using UPL
7970 *
7971 * No buffer I/O, just RPCs straight from the mapped pages.
7972 * File size changes are not permitted in pageout.
7973 */
7974 int
7975 nfs_vnop_pageout(
7976 struct vnop_pageout_args /* {
7977 * struct vnodeop_desc *a_desc;
7978 * vnode_t a_vp;
7979 * upl_t a_pl;
7980 * vm_offset_t a_pl_offset;
7981 * off_t a_f_offset;
7982 * size_t a_size;
7983 * int a_flags;
7984 * vfs_context_t a_context;
7985 * } */*ap)
7986 {
7987 vnode_t vp = ap->a_vp;
7988 upl_t pl = ap->a_pl;
7989 size_t size = ap->a_size;
7990 off_t f_offset = ap->a_f_offset;
7991 vm_offset_t pl_offset = ap->a_pl_offset;
7992 int flags = ap->a_flags;
7993 nfsnode_t np = VTONFS(vp);
7994 thread_t thd;
7995 kauth_cred_t cred;
7996 struct nfsbuf *bp;
7997 struct nfsmount *nmp = VTONMP(vp);
7998 daddr64_t lbn;
7999 int error = 0, iomode;
8000 off_t off, txoffset, rxoffset;
8001 vm_offset_t ioaddr, txaddr, rxaddr;
8002 uio_t auio;
8003 char uio_buf[UIO_SIZEOF(1)];
8004 int nofreeupl = flags & UPL_NOCOMMIT;
8005 size_t nmwsize, biosize, iosize, pgsize, txsize, rxsize, xsize, remsize;
8006 struct nfsreq *req[MAXPAGINGREQS];
8007 int nextsend, nextwait, wverfset, commit;
8008 uint64_t wverf, wverf2;
8009 #if CONFIG_NFS4
8010 uint32_t stategenid = 0;
8011 #endif
8012 uint32_t vrestart = 0, restart = 0, vrestarts = 0, restarts = 0;
8013 kern_return_t kret;
8014
8015 FSDBG(323, f_offset, size, pl, pl_offset);
8016
8017 if (pl == (upl_t)NULL) {
8018 panic("nfs_pageout: no upl");
8019 }
8020
8021 if (size <= 0) {
8022 printf("nfs_pageout: invalid size %ld", size);
8023 if (!nofreeupl) {
8024 ubc_upl_abort_range(pl, pl_offset, size, 0);
8025 }
8026 return EINVAL;
8027 }
8028
8029 if (!nmp) {
8030 if (!nofreeupl) {
8031 ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
8032 }
8033 return ENXIO;
8034 }
8035 biosize = nmp->nm_biosize;
8036 nmwsize = nmp->nm_wsize;
8037
8038 nfs_data_lock_noupdate(np, NFS_DATA_LOCK_SHARED);
8039
8040 /*
8041 * Check to see whether the buffer is incore.
8042 * If incore and not busy, invalidate it from the cache.
8043 */
8044 for (iosize = 0; iosize < size; iosize += xsize) {
8045 off = f_offset + iosize;
8046 /* need make sure we do things on block boundaries */
8047 xsize = biosize - (off % biosize);
8048 if (off + xsize > f_offset + size) {
8049 xsize = f_offset + size - off;
8050 }
8051 lbn = (daddr64_t)(off / biosize);
8052 lck_mtx_lock(nfs_buf_mutex);
8053 if ((bp = nfs_buf_incore(np, lbn))) {
8054 FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags);
8055 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
8056 lck_mtx_unlock(nfs_buf_mutex);
8057 nfs_data_unlock_noupdate(np);
8058 /* no panic. just tell vm we are busy */
8059 if (!nofreeupl) {
8060 ubc_upl_abort_range(pl, pl_offset, size, 0);
8061 }
8062 return EBUSY;
8063 }
8064 if (bp->nb_dirtyend > 0) {
8065 /*
8066 * if there's a dirty range in the buffer, check
8067 * to see if it extends beyond the pageout region
8068 *
8069 * if the dirty region lies completely within the
8070 * pageout region, we just invalidate the buffer
8071 * because it's all being written out now anyway.
8072 *
8073 * if any of the dirty region lies outside the
8074 * pageout region, we'll try to clip the dirty
8075 * region to eliminate the portion that's being
8076 * paged out. If that's not possible, because
8077 * the dirty region extends before and after the
8078 * pageout region, then we'll just return EBUSY.
8079 */
8080 off_t boff, start, end;
8081 boff = NBOFF(bp);
8082 start = off;
8083 end = off + xsize;
8084 /* clip end to EOF */
8085 if (end > (off_t)np->n_size) {
8086 end = np->n_size;
8087 }
8088 start -= boff;
8089 end -= boff;
8090 if ((bp->nb_dirtyoff < start) &&
8091 (bp->nb_dirtyend > end)) {
8092 /*
8093 * not gonna be able to clip the dirty region
8094 *
8095 * But before returning the bad news, move the
8096 * buffer to the start of the delwri list and
8097 * give the list a push to try to flush the
8098 * buffer out.
8099 */
8100 FSDBG(323, np, bp, 0xd00deebc, EBUSY);
8101 nfs_buf_remfree(bp);
8102 TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free);
8103 nfsbufdelwricnt++;
8104 nfs_buf_drop(bp);
8105 nfs_buf_delwri_push(1);
8106 lck_mtx_unlock(nfs_buf_mutex);
8107 nfs_data_unlock_noupdate(np);
8108 if (!nofreeupl) {
8109 ubc_upl_abort_range(pl, pl_offset, size, 0);
8110 }
8111 return EBUSY;
8112 }
8113 if ((bp->nb_dirtyoff < start) ||
8114 (bp->nb_dirtyend > end)) {
8115 /* clip dirty region, if necessary */
8116 if (bp->nb_dirtyoff < start) {
8117 bp->nb_dirtyend = min(bp->nb_dirtyend, start);
8118 }
8119 if (bp->nb_dirtyend > end) {
8120 bp->nb_dirtyoff = max(bp->nb_dirtyoff, end);
8121 }
8122 FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
8123 /* we're leaving this block dirty */
8124 nfs_buf_drop(bp);
8125 lck_mtx_unlock(nfs_buf_mutex);
8126 continue;
8127 }
8128 }
8129 nfs_buf_remfree(bp);
8130 lck_mtx_unlock(nfs_buf_mutex);
8131 SET(bp->nb_flags, NB_INVAL);
8132 nfs_node_lock_force(np);
8133 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
8134 CLR(bp->nb_flags, NB_NEEDCOMMIT);
8135 np->n_needcommitcnt--;
8136 CHECK_NEEDCOMMITCNT(np);
8137 }
8138 nfs_node_unlock(np);
8139 nfs_buf_release(bp, 1);
8140 } else {
8141 lck_mtx_unlock(nfs_buf_mutex);
8142 }
8143 }
8144
8145 thd = vfs_context_thread(ap->a_context);
8146 cred = ubc_getcred(vp);
8147 if (!IS_VALID_CRED(cred)) {
8148 cred = vfs_context_ucred(ap->a_context);
8149 }
8150
8151 nfs_node_lock_force(np);
8152 if (np->n_flag & NWRITEERR) {
8153 error = np->n_error;
8154 nfs_node_unlock(np);
8155 nfs_data_unlock_noupdate(np);
8156 if (!nofreeupl) {
8157 ubc_upl_abort_range(pl, pl_offset, size,
8158 UPL_ABORT_FREE_ON_EMPTY);
8159 }
8160 return error;
8161 }
8162 nfs_node_unlock(np);
8163
8164 if (f_offset < 0 || f_offset >= (off_t)np->n_size ||
8165 f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) {
8166 nfs_data_unlock_noupdate(np);
8167 if (!nofreeupl) {
8168 ubc_upl_abort_range(pl, pl_offset, size,
8169 UPL_ABORT_FREE_ON_EMPTY);
8170 }
8171 return EINVAL;
8172 }
8173
8174 kret = ubc_upl_map(pl, &ioaddr);
8175 if (kret != KERN_SUCCESS) {
8176 panic("nfs_vnop_pageout: ubc_upl_map() failed with (%d)", kret);
8177 }
8178 ioaddr += pl_offset;
8179
8180 if ((u_quad_t)f_offset + size > np->n_size) {
8181 xsize = np->n_size - f_offset;
8182 } else {
8183 xsize = size;
8184 }
8185
8186 pgsize = round_page_64(xsize);
8187 if ((size > pgsize) && !nofreeupl) {
8188 ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
8189 UPL_ABORT_FREE_ON_EMPTY);
8190 }
8191
8192 /*
8193 * check for partial page and clear the
8194 * contents past end of the file before
8195 * releasing it in the VM page cache
8196 */
8197 if ((u_quad_t)f_offset < np->n_size && (u_quad_t)f_offset + size > np->n_size) {
8198 size_t io = np->n_size - f_offset;
8199 bzero((caddr_t)(ioaddr + io), size - io);
8200 FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
8201 }
8202 nfs_data_unlock_noupdate(np);
8203
8204 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE,
8205 &uio_buf, sizeof(uio_buf));
8206
8207 tryagain:
8208 #if CONFIG_NFS4
8209 if (nmp->nm_vers >= NFS_VER4) {
8210 stategenid = nmp->nm_stategenid;
8211 }
8212 #endif
8213 wverf = wverf2 = wverfset = 0;
8214 txsize = rxsize = xsize;
8215 txoffset = rxoffset = f_offset;
8216 txaddr = rxaddr = ioaddr;
8217 commit = NFS_WRITE_FILESYNC;
8218
8219 bzero(req, sizeof(req));
8220 nextsend = nextwait = 0;
8221 do {
8222 if (np->n_flag & NREVOKE) {
8223 error = EIO;
8224 break;
8225 }
8226 /* send requests while we need to and have available slots */
8227 while ((txsize > 0) && (req[nextsend] == NULL)) {
8228 iosize = MIN(nmwsize, txsize);
8229 uio_reset(auio, txoffset, UIO_SYSSPACE, UIO_WRITE);
8230 uio_addiov(auio, CAST_USER_ADDR_T(txaddr), iosize);
8231 FSDBG(323, uio_offset(auio), iosize, txaddr, txsize);
8232 OSAddAtomic64(1, &nfsstats.pageouts);
8233 nfs_node_lock_force(np);
8234 np->n_numoutput++;
8235 nfs_node_unlock(np);
8236 vnode_startwrite(vp);
8237 iomode = NFS_WRITE_UNSTABLE;
8238 if ((error = nmp->nm_funcs->nf_write_rpc_async(np, auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) {
8239 req[nextsend] = NULL;
8240 vnode_writedone(vp);
8241 nfs_node_lock_force(np);
8242 np->n_numoutput--;
8243 nfs_node_unlock(np);
8244 break;
8245 }
8246 txaddr += iosize;
8247 txoffset += iosize;
8248 txsize -= iosize;
8249 nextsend = (nextsend + 1) % MAXPAGINGREQS;
8250 }
8251 /* wait while we need to and break out if more requests to send */
8252 while ((rxsize > 0) && req[nextwait]) {
8253 iosize = remsize = MIN(nmwsize, rxsize);
8254 error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req[nextwait], &iomode, &iosize, &wverf2);
8255 req[nextwait] = NULL;
8256 nextwait = (nextwait + 1) % MAXPAGINGREQS;
8257 vnode_writedone(vp);
8258 nfs_node_lock_force(np);
8259 np->n_numoutput--;
8260 nfs_node_unlock(np);
8261 #if CONFIG_NFS4
8262 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
8263 lck_mtx_lock(&nmp->nm_lock);
8264 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
8265 NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
8266 nfs_need_recover(nmp, error);
8267 }
8268 lck_mtx_unlock(&nmp->nm_lock);
8269 restart = 1;
8270 goto cancel;
8271 }
8272 #endif
8273 if (error) {
8274 FSDBG(323, rxoffset, rxsize, error, -1);
8275 break;
8276 }
8277 if (!wverfset) {
8278 wverf = wverf2;
8279 wverfset = 1;
8280 } else if (wverf != wverf2) {
8281 /* verifier changed, so we need to restart all the writes */
8282 vrestart = 1;
8283 goto cancel;
8284 }
8285 /* Retain the lowest commitment level returned. */
8286 if (iomode < commit) {
8287 commit = iomode;
8288 }
8289 rxaddr += iosize;
8290 rxoffset += iosize;
8291 rxsize -= iosize;
8292 remsize -= iosize;
8293 if (remsize > 0) {
8294 /* need to try sending the remainder */
8295 iosize = remsize;
8296 uio_reset(auio, rxoffset, UIO_SYSSPACE, UIO_WRITE);
8297 uio_addiov(auio, CAST_USER_ADDR_T(rxaddr), remsize);
8298 iomode = NFS_WRITE_UNSTABLE;
8299 error = nfs_write_rpc2(np, auio, thd, cred, &iomode, &wverf2);
8300 #if CONFIG_NFS4
8301 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
8302 NP(np, "nfs_vnop_pageout: restart: error %d", error);
8303 lck_mtx_lock(&nmp->nm_lock);
8304 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
8305 NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
8306 nfs_need_recover(nmp, error);
8307 }
8308 lck_mtx_unlock(&nmp->nm_lock);
8309 restart = 1;
8310 goto cancel;
8311 }
8312 #endif
8313 if (error) {
8314 FSDBG(323, rxoffset, rxsize, error, -1);
8315 break;
8316 }
8317 if (wverf != wverf2) {
8318 /* verifier changed, so we need to restart all the writes */
8319 vrestart = 1;
8320 goto cancel;
8321 }
8322 if (iomode < commit) {
8323 commit = iomode;
8324 }
8325 rxaddr += iosize;
8326 rxoffset += iosize;
8327 rxsize -= iosize;
8328 }
8329 if (txsize) {
8330 break;
8331 }
8332 }
8333 } while (!error && (txsize || rxsize));
8334
8335 vrestart = 0;
8336
8337 if (!error && (commit != NFS_WRITE_FILESYNC)) {
8338 error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred, wverf);
8339 if (error == NFSERR_STALEWRITEVERF) {
8340 vrestart = 1;
8341 error = EIO;
8342 }
8343 }
8344
8345 if (error) {
8346 cancel:
8347 /* cancel any outstanding requests */
8348 while (req[nextwait]) {
8349 nfs_request_async_cancel(req[nextwait]);
8350 req[nextwait] = NULL;
8351 nextwait = (nextwait + 1) % MAXPAGINGREQS;
8352 vnode_writedone(vp);
8353 nfs_node_lock_force(np);
8354 np->n_numoutput--;
8355 nfs_node_unlock(np);
8356 }
8357 if (np->n_flag & NREVOKE) {
8358 error = EIO;
8359 } else {
8360 if (vrestart) {
8361 if (++vrestarts <= 100) { /* guard against no progress */
8362 goto tryagain;
8363 }
8364 NP(np, "nfs_pageout: too many restarts, aborting");
8365 FSDBG(323, f_offset, xsize, ERESTART, -1);
8366 }
8367 if (restart) {
8368 if (restarts <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
8369 if (error == NFSERR_GRACE) {
8370 tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
8371 }
8372 if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
8373 goto tryagain;
8374 }
8375 } else {
8376 NP(np, "nfs_pageout: too many restarts, aborting");
8377 FSDBG(323, f_offset, xsize, ERESTART, -1);
8378 }
8379 }
8380 }
8381 }
8382
8383 ubc_upl_unmap(pl);
8384
8385 /*
8386 * We've had several different solutions on what to do when the pageout
8387 * gets an error. If we don't handle it, and return an error to the
8388 * caller, vm, it will retry . This can end in endless looping
8389 * between vm and here doing retries of the same page. Doing a dump
8390 * back to vm, will get it out of vm's knowledge and we lose whatever
8391 * data existed. This is risky, but in some cases necessary. For
8392 * example, the initial fix here was to do that for ESTALE. In that case
8393 * the server is telling us that the file is no longer the same. We
8394 * would not want to keep paging out to that. We also saw some 151
8395 * errors from Auspex server and NFSv3 can return errors higher than
8396 * ELAST. Those along with NFS known server errors we will "dump" from
8397 * vm. Errors we don't expect to occur, we dump and log for further
8398 * analysis. Errors that could be transient, networking ones,
8399 * we let vm "retry". Lastly, errors that we retry, but may have potential
8400 * to storm the network, we "retrywithsleep". "sever" will be used in
8401 * in the future to dump all pages of object for cases like ESTALE.
8402 * All this is the basis for the states returned and first guesses on
8403 * error handling. Tweaking expected as more statistics are gathered.
8404 * Note, in the long run we may need another more robust solution to
8405 * have some kind of persistant store when the vm cannot dump nor keep
8406 * retrying as a solution, but this would be a file architectural change
8407 */
8408 if (!nofreeupl) { /* otherwise stacked file system has to handle this */
8409 if (error) {
8410 int abortflags = 0;
8411 char action = nfs_pageouterrorhandler(error);
8412
8413 switch (action) {
8414 case DUMP:
8415 abortflags = UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY;
8416 break;
8417 case DUMPANDLOG:
8418 abortflags = UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY;
8419 if (error <= NFS_ELAST) {
8420 if ((errorcount[error] % 100) == 0) {
8421 NP(np, "nfs_pageout: unexpected error %d. dumping vm page", error);
8422 }
8423 errorcount[error]++;
8424 }
8425 break;
8426 case RETRY:
8427 abortflags = UPL_ABORT_FREE_ON_EMPTY;
8428 break;
8429 case SEVER: /* not implemented */
8430 default:
8431 NP(np, "nfs_pageout: action %d not expected", action);
8432 break;
8433 }
8434
8435 ubc_upl_abort_range(pl, pl_offset, pgsize, abortflags);
8436 /* return error in all cases above */
8437 } else {
8438 ubc_upl_commit_range(pl, pl_offset, pgsize,
8439 UPL_COMMIT_CLEAR_DIRTY |
8440 UPL_COMMIT_FREE_ON_EMPTY);
8441 }
8442 }
8443 return error;
8444 }
8445
8446 /* Blktooff derives file offset given a logical block number */
8447 int
8448 nfs_vnop_blktooff(
8449 struct vnop_blktooff_args /* {
8450 * struct vnodeop_desc *a_desc;
8451 * vnode_t a_vp;
8452 * daddr64_t a_lblkno;
8453 * off_t *a_offset;
8454 * } */*ap)
8455 {
8456 int biosize;
8457 vnode_t vp = ap->a_vp;
8458 struct nfsmount *nmp = VTONMP(vp);
8459
8460 if (nfs_mount_gone(nmp)) {
8461 return ENXIO;
8462 }
8463 biosize = nmp->nm_biosize;
8464
8465 *ap->a_offset = (off_t)(ap->a_lblkno * biosize);
8466
8467 return 0;
8468 }
8469
8470 int
8471 nfs_vnop_offtoblk(
8472 struct vnop_offtoblk_args /* {
8473 * struct vnodeop_desc *a_desc;
8474 * vnode_t a_vp;
8475 * off_t a_offset;
8476 * daddr64_t *a_lblkno;
8477 * } */*ap)
8478 {
8479 int biosize;
8480 vnode_t vp = ap->a_vp;
8481 struct nfsmount *nmp = VTONMP(vp);
8482
8483 if (nfs_mount_gone(nmp)) {
8484 return ENXIO;
8485 }
8486 biosize = nmp->nm_biosize;
8487
8488 *ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize);
8489
8490 return 0;
8491 }
8492
8493 /*
8494 * vnode change monitoring
8495 */
8496 int
8497 nfs_vnop_monitor(
8498 struct vnop_monitor_args /* {
8499 * struct vnodeop_desc *a_desc;
8500 * vnode_t a_vp;
8501 * uint32_t a_events;
8502 * uint32_t a_flags;
8503 * void *a_handle;
8504 * vfs_context_t a_context;
8505 * } */*ap)
8506 {
8507 nfsnode_t np = VTONFS(ap->a_vp);
8508 struct nfsmount *nmp = VTONMP(ap->a_vp);
8509 int error = 0;
8510
8511 if (nfs_mount_gone(nmp)) {
8512 return ENXIO;
8513 }
8514
8515 /* make sure that the vnode's monitoring status is up to date */
8516 lck_mtx_lock(&nmp->nm_lock);
8517 if (vnode_ismonitored(ap->a_vp)) {
8518 /* This vnode is currently being monitored, make sure we're tracking it. */
8519 if (np->n_monlink.le_next == NFSNOLIST) {
8520 LIST_INSERT_HEAD(&nmp->nm_monlist, np, n_monlink);
8521 nfs_mount_sock_thread_wake(nmp);
8522 }
8523 } else {
8524 /* This vnode is no longer being monitored, make sure we're not tracking it. */
8525 /* Wait for any in-progress getattr to complete first. */
8526 while (np->n_mflag & NMMONSCANINPROG) {
8527 struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
8528 np->n_mflag |= NMMONSCANWANT;
8529 msleep(&np->n_mflag, &nmp->nm_lock, PZERO - 1, "nfswaitmonscan", &ts);
8530 }
8531 if (np->n_monlink.le_next != NFSNOLIST) {
8532 LIST_REMOVE(np, n_monlink);
8533 np->n_monlink.le_next = NFSNOLIST;
8534 }
8535 }
8536 lck_mtx_unlock(&nmp->nm_lock);
8537
8538 return error;
8539 }
8540
8541 /*
8542 * Send a vnode notification for the given events.
8543 */
8544 void
8545 nfs_vnode_notify(nfsnode_t np, uint32_t events)
8546 {
8547 struct nfsmount *nmp = NFSTONMP(np);
8548 struct nfs_vattr nvattr;
8549 struct vnode_attr vattr, *vap = NULL;
8550 struct timeval now;
8551
8552 microuptime(&now);
8553 if ((np->n_evtstamp == now.tv_sec) || !nmp) {
8554 /* delay sending this notify */
8555 np->n_events |= events;
8556 return;
8557 }
8558 events |= np->n_events;
8559 np->n_events = 0;
8560 np->n_evtstamp = now.tv_sec;
8561
8562 vfs_get_notify_attributes(&vattr);
8563 if (!nfs_getattrcache(np, &nvattr, 0)) {
8564 vap = &vattr;
8565 VATTR_INIT(vap);
8566
8567 VATTR_RETURN(vap, va_fsid, vfs_statfs(nmp->nm_mountp)->f_fsid.val[0]);
8568 VATTR_RETURN(vap, va_fileid, nvattr.nva_fileid);
8569 VATTR_RETURN(vap, va_mode, nvattr.nva_mode);
8570 VATTR_RETURN(vap, va_uid, nvattr.nva_uid);
8571 VATTR_RETURN(vap, va_gid, nvattr.nva_gid);
8572 VATTR_RETURN(vap, va_nlink, nvattr.nva_nlink);
8573 }
8574 vnode_notify(NFSTOV(np), events, vap);
8575 }
8576