]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_vnops.c
eb636101d7e253c89575779689a784d3603c821a
[apple/xnu.git] / bsd / nfs / nfs_vnops.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
65 * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
66 */
67
68
69 /*
70 * vnode op calls for Sun NFS version 2 and 3
71 */
72 #include <sys/param.h>
73 #include <sys/kernel.h>
74 #include <sys/systm.h>
75 #include <sys/resourcevar.h>
76 #include <sys/proc_internal.h>
77 #include <sys/kauth.h>
78 #include <sys/mount_internal.h>
79 #include <sys/malloc.h>
80 #include <sys/kpi_mbuf.h>
81 #include <sys/conf.h>
82 #include <sys/vnode_internal.h>
83 #include <sys/dirent.h>
84 #include <sys/fcntl.h>
85 #include <sys/lockf.h>
86 #include <sys/ubc_internal.h>
87 #include <sys/attr.h>
88 #include <sys/signalvar.h>
89 #include <sys/uio_internal.h>
90
91 #include <vfs/vfs_support.h>
92
93 #include <sys/vm.h>
94
95 #include <sys/time.h>
96 #include <kern/clock.h>
97 #include <libkern/OSAtomic.h>
98
99 #include <miscfs/fifofs/fifo.h>
100 #include <miscfs/specfs/specdev.h>
101
102 #include <nfs/rpcv2.h>
103 #include <nfs/nfsproto.h>
104 #include <nfs/nfs.h>
105 #include <nfs/nfsnode.h>
106 #include <nfs/nfs_gss.h>
107 #include <nfs/nfsmount.h>
108 #include <nfs/nfs_lock.h>
109 #include <nfs/xdr_subs.h>
110 #include <nfs/nfsm_subs.h>
111
112 #include <net/if.h>
113 #include <netinet/in.h>
114 #include <netinet/in_var.h>
115
116 #include <vm/vm_kern.h>
117 #include <vm/vm_pageout.h>
118
119 #include <kern/task.h>
120 #include <kern/sched_prim.h>
121
122 /*
123 * NFS vnode ops
124 */
125 int nfs_vnop_lookup(struct vnop_lookup_args *);
126 int nfsspec_vnop_read(struct vnop_read_args *);
127 int nfsspec_vnop_write(struct vnop_write_args *);
128 int nfsspec_vnop_close(struct vnop_close_args *);
129 #if FIFO
130 int nfsfifo_vnop_read(struct vnop_read_args *);
131 int nfsfifo_vnop_write(struct vnop_write_args *);
132 int nfsfifo_vnop_close(struct vnop_close_args *);
133 #endif
134 int nfs_vnop_ioctl(struct vnop_ioctl_args *);
135 int nfs_vnop_select(struct vnop_select_args *);
136 int nfs_vnop_setattr(struct vnop_setattr_args *);
137 int nfs_vnop_fsync(struct vnop_fsync_args *);
138 int nfs_vnop_rename(struct vnop_rename_args *);
139 int nfs_vnop_readdir(struct vnop_readdir_args *);
140 int nfs_vnop_readlink(struct vnop_readlink_args *);
141 int nfs_vnop_pathconf(struct vnop_pathconf_args *);
142 int nfs_vnop_pagein(struct vnop_pagein_args *);
143 int nfs_vnop_pageout(struct vnop_pageout_args *);
144 int nfs_vnop_blktooff(struct vnop_blktooff_args *);
145 int nfs_vnop_offtoblk(struct vnop_offtoblk_args *);
146 int nfs_vnop_blockmap(struct vnop_blockmap_args *);
147 int nfs_vnop_monitor(struct vnop_monitor_args *);
148
149 int nfs3_vnop_create(struct vnop_create_args *);
150 int nfs3_vnop_mknod(struct vnop_mknod_args *);
151 int nfs3_vnop_getattr(struct vnop_getattr_args *);
152 int nfs3_vnop_link(struct vnop_link_args *);
153 int nfs3_vnop_mkdir(struct vnop_mkdir_args *);
154 int nfs3_vnop_rmdir(struct vnop_rmdir_args *);
155 int nfs3_vnop_symlink(struct vnop_symlink_args *);
156
157 vnop_t **nfsv2_vnodeop_p;
158 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
159 { &vnop_default_desc, (vnop_t *)vn_default_error },
160 { &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup }, /* lookup */
161 { &vnop_create_desc, (vnop_t *)nfs3_vnop_create }, /* create */
162 { &vnop_mknod_desc, (vnop_t *)nfs3_vnop_mknod }, /* mknod */
163 { &vnop_open_desc, (vnop_t *)nfs_vnop_open }, /* open */
164 { &vnop_close_desc, (vnop_t *)nfs_vnop_close }, /* close */
165 { &vnop_access_desc, (vnop_t *)nfs_vnop_access }, /* access */
166 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
167 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
168 { &vnop_read_desc, (vnop_t *)nfs_vnop_read }, /* read */
169 { &vnop_write_desc, (vnop_t *)nfs_vnop_write }, /* write */
170 { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl }, /* ioctl */
171 { &vnop_select_desc, (vnop_t *)nfs_vnop_select }, /* select */
172 { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke }, /* revoke */
173 { &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap }, /* mmap */
174 { &vnop_mnomap_desc, (vnop_t *)nfs_vnop_mnomap }, /* mnomap */
175 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
176 { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove }, /* remove */
177 { &vnop_link_desc, (vnop_t *)nfs3_vnop_link }, /* link */
178 { &vnop_rename_desc, (vnop_t *)nfs_vnop_rename }, /* rename */
179 { &vnop_mkdir_desc, (vnop_t *)nfs3_vnop_mkdir }, /* mkdir */
180 { &vnop_rmdir_desc, (vnop_t *)nfs3_vnop_rmdir }, /* rmdir */
181 { &vnop_symlink_desc, (vnop_t *)nfs3_vnop_symlink }, /* symlink */
182 { &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir }, /* readdir */
183 { &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink }, /* readlink */
184 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
185 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
186 { &vnop_strategy_desc, (vnop_t *)err_strategy }, /* strategy */
187 { &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf }, /* pathconf */
188 { &vnop_advlock_desc, (vnop_t *)nfs_vnop_advlock }, /* advlock */
189 { &vnop_bwrite_desc, (vnop_t *)err_bwrite }, /* bwrite */
190 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
191 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
192 { &vnop_copyfile_desc, (vnop_t *)err_copyfile }, /* Copyfile */
193 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
194 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
195 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
196 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
197 { NULL, NULL }
198 };
199 struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
200 { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
201
202 vnop_t **nfsv4_vnodeop_p;
203 static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
204 { &vnop_default_desc, (vnop_t *)vn_default_error },
205 { &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup }, /* lookup */
206 { &vnop_create_desc, (vnop_t *)nfs4_vnop_create }, /* create */
207 { &vnop_mknod_desc, (vnop_t *)nfs4_vnop_mknod }, /* mknod */
208 { &vnop_open_desc, (vnop_t *)nfs_vnop_open }, /* open */
209 { &vnop_close_desc, (vnop_t *)nfs_vnop_close }, /* close */
210 { &vnop_access_desc, (vnop_t *)nfs_vnop_access }, /* access */
211 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
212 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
213 { &vnop_read_desc, (vnop_t *)nfs_vnop_read }, /* read */
214 { &vnop_write_desc, (vnop_t *)nfs_vnop_write }, /* write */
215 { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl }, /* ioctl */
216 { &vnop_select_desc, (vnop_t *)nfs_vnop_select }, /* select */
217 { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke }, /* revoke */
218 { &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap }, /* mmap */
219 { &vnop_mnomap_desc, (vnop_t *)nfs_vnop_mnomap }, /* mnomap */
220 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
221 { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove }, /* remove */
222 { &vnop_link_desc, (vnop_t *)nfs4_vnop_link }, /* link */
223 { &vnop_rename_desc, (vnop_t *)nfs_vnop_rename }, /* rename */
224 { &vnop_mkdir_desc, (vnop_t *)nfs4_vnop_mkdir }, /* mkdir */
225 { &vnop_rmdir_desc, (vnop_t *)nfs4_vnop_rmdir }, /* rmdir */
226 { &vnop_symlink_desc, (vnop_t *)nfs4_vnop_symlink }, /* symlink */
227 { &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir }, /* readdir */
228 { &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink }, /* readlink */
229 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
230 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
231 { &vnop_strategy_desc, (vnop_t *)err_strategy }, /* strategy */
232 { &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf }, /* pathconf */
233 { &vnop_advlock_desc, (vnop_t *)nfs_vnop_advlock }, /* advlock */
234 { &vnop_bwrite_desc, (vnop_t *)err_bwrite }, /* bwrite */
235 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
236 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
237 { &vnop_copyfile_desc, (vnop_t *)err_copyfile }, /* Copyfile */
238 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
239 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
240 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
241 { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr }, /* getxattr */
242 { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr }, /* setxattr */
243 { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
244 { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
245 #if NAMEDSTREAMS
246 { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream }, /* getnamedstream */
247 { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream }, /* makenamedstream */
248 { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
249 #endif
250 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
251 { NULL, NULL }
252 };
253 struct vnodeopv_desc nfsv4_vnodeop_opv_desc =
254 { &nfsv4_vnodeop_p, nfsv4_vnodeop_entries };
255
256 /*
257 * Special device vnode ops
258 */
259 vnop_t **spec_nfsv2nodeop_p;
260 static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
261 { &vnop_default_desc, (vnop_t *)vn_default_error },
262 { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */
263 { &vnop_create_desc, (vnop_t *)spec_create }, /* create */
264 { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */
265 { &vnop_open_desc, (vnop_t *)spec_open }, /* open */
266 { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close }, /* close */
267 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
268 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
269 { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read }, /* read */
270 { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write }, /* write */
271 { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */
272 { &vnop_select_desc, (vnop_t *)spec_select }, /* select */
273 { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */
274 { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */
275 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
276 { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */
277 { &vnop_link_desc, (vnop_t *)spec_link }, /* link */
278 { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */
279 { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */
280 { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */
281 { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */
282 { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */
283 { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */
284 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
285 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
286 { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */
287 { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */
288 { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */
289 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
290 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
291 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
292 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
293 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
294 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
295 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
296 { NULL, NULL }
297 };
298 struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
299 { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
300 vnop_t **spec_nfsv4nodeop_p;
301 static struct vnodeopv_entry_desc spec_nfsv4nodeop_entries[] = {
302 { &vnop_default_desc, (vnop_t *)vn_default_error },
303 { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */
304 { &vnop_create_desc, (vnop_t *)spec_create }, /* create */
305 { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */
306 { &vnop_open_desc, (vnop_t *)spec_open }, /* open */
307 { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close }, /* close */
308 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
309 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
310 { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read }, /* read */
311 { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write }, /* write */
312 { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */
313 { &vnop_select_desc, (vnop_t *)spec_select }, /* select */
314 { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */
315 { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */
316 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
317 { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */
318 { &vnop_link_desc, (vnop_t *)spec_link }, /* link */
319 { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */
320 { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */
321 { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */
322 { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */
323 { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */
324 { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */
325 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
326 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
327 { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */
328 { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */
329 { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */
330 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
331 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
332 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
333 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
334 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
335 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
336 { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr }, /* getxattr */
337 { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr }, /* setxattr */
338 { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
339 { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
340 #if NAMEDSTREAMS
341 { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream }, /* getnamedstream */
342 { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream }, /* makenamedstream */
343 { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
344 #endif
345 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
346 { NULL, NULL }
347 };
348 struct vnodeopv_desc spec_nfsv4nodeop_opv_desc =
349 { &spec_nfsv4nodeop_p, spec_nfsv4nodeop_entries };
350
351 #if FIFO
352 vnop_t **fifo_nfsv2nodeop_p;
353 static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
354 { &vnop_default_desc, (vnop_t *)vn_default_error },
355 { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */
356 { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */
357 { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */
358 { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */
359 { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close }, /* close */
360 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
361 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
362 { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read }, /* read */
363 { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write }, /* write */
364 { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */
365 { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */
366 { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */
367 { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */
368 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
369 { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */
370 { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */
371 { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */
372 { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */
373 { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */
374 { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */
375 { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */
376 { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */
377 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
378 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
379 { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */
380 { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */
381 { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */
382 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
383 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
384 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
385 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
386 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
387 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
388 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
389 { NULL, NULL }
390 };
391 struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
392 { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
393
394 vnop_t **fifo_nfsv4nodeop_p;
395 static struct vnodeopv_entry_desc fifo_nfsv4nodeop_entries[] = {
396 { &vnop_default_desc, (vnop_t *)vn_default_error },
397 { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */
398 { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */
399 { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */
400 { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */
401 { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close }, /* close */
402 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
403 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
404 { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read }, /* read */
405 { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write }, /* write */
406 { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */
407 { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */
408 { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */
409 { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */
410 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
411 { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */
412 { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */
413 { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */
414 { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */
415 { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */
416 { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */
417 { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */
418 { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */
419 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
420 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
421 { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */
422 { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */
423 { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */
424 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
425 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
426 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
427 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
428 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
429 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
430 { &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr }, /* getxattr */
431 { &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr }, /* setxattr */
432 { &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
433 { &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
434 #if NAMEDSTREAMS
435 { &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream }, /* getnamedstream */
436 { &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream }, /* makenamedstream */
437 { &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
438 #endif
439 { &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor }, /* monitor */
440 { NULL, NULL }
441 };
442 struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc =
443 { &fifo_nfsv4nodeop_p, fifo_nfsv4nodeop_entries };
444 #endif /* FIFO */
445
446
447 int nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t);
448
449 /*
450 * Find the slot in the access cache for this UID.
451 * If adding and no existing slot is found, reuse slots in FIFO order.
452 * The index of the next slot to use is kept in the last entry of the n_access array.
453 */
454 int
455 nfs_node_access_slot(nfsnode_t np, uid_t uid, int add)
456 {
457 int slot;
458
459 for (slot=0; slot < NFS_ACCESS_CACHE_SIZE; slot++)
460 if (np->n_accessuid[slot] == uid)
461 break;
462 if (slot == NFS_ACCESS_CACHE_SIZE) {
463 if (!add)
464 return (-1);
465 slot = np->n_access[NFS_ACCESS_CACHE_SIZE];
466 np->n_access[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
467 }
468 return (slot);
469 }
470
471 int
472 nfs3_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx)
473 {
474 int error = 0, lockerror = ENOENT, status, slot;
475 uint32_t access_result = 0;
476 u_int64_t xid;
477 struct nfsm_chain nmreq, nmrep;
478 struct timeval now;
479 uid_t uid;
480
481 nfsm_chain_null(&nmreq);
482 nfsm_chain_null(&nmrep);
483
484 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3) + NFSX_UNSIGNED);
485 nfsm_chain_add_fh(error, &nmreq, NFS_VER3, np->n_fhp, np->n_fhsize);
486 nfsm_chain_add_32(error, &nmreq, *access);
487 nfsm_chain_build_done(error, &nmreq);
488 nfsmout_if(error);
489 error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx, NULL, &nmrep, &xid, &status);
490 if ((lockerror = nfs_node_lock(np)))
491 error = lockerror;
492 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
493 if (!error)
494 error = status;
495 nfsm_chain_get_32(error, &nmrep, access_result);
496 nfsmout_if(error);
497
498 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
499 slot = nfs_node_access_slot(np, uid, 1);
500 np->n_accessuid[slot] = uid;
501 microuptime(&now);
502 np->n_accessstamp[slot] = now.tv_sec;
503 np->n_access[slot] = access_result;
504
505 /*
506 * If we asked for DELETE but didn't get it, the server
507 * may simply not support returning that bit (possible
508 * on UNIX systems). So, we'll assume that it is OK,
509 * and just let any subsequent delete action fail if it
510 * really isn't deletable.
511 */
512 if ((*access & NFS_ACCESS_DELETE) &&
513 !(np->n_access[slot] & NFS_ACCESS_DELETE))
514 np->n_access[slot] |= NFS_ACCESS_DELETE;
515 /* ".zfs" subdirectories may erroneously give a denied answer for add/remove */
516 if (nfs_access_dotzfs && (np->n_flag & NISDOTZFSCHILD))
517 np->n_access[slot] |= (NFS_ACCESS_MODIFY|NFS_ACCESS_EXTEND|NFS_ACCESS_DELETE);
518 /* pass back the access returned with this request */
519 *access = np->n_access[slot];
520 nfsmout:
521 if (!lockerror)
522 nfs_node_unlock(np);
523 nfsm_chain_cleanup(&nmreq);
524 nfsm_chain_cleanup(&nmrep);
525 return (error);
526 }
527
528 /*
529 * NFS access vnode op.
530 * For NFS version 2, just return ok. File accesses may fail later.
531 * For NFS version 3+, use the access RPC to check accessibility. If file
532 * permissions are changed on the server, accesses might still fail later.
533 */
534 int
535 nfs_vnop_access(
536 struct vnop_access_args /* {
537 struct vnodeop_desc *a_desc;
538 vnode_t a_vp;
539 int a_action;
540 vfs_context_t a_context;
541 } */ *ap)
542 {
543 vfs_context_t ctx = ap->a_context;
544 vnode_t vp = ap->a_vp;
545 int error = 0, slot, dorpc;
546 u_int32_t access, waccess;
547 nfsnode_t np = VTONFS(vp);
548 struct nfsmount *nmp;
549 int nfsvers;
550 struct timeval now;
551 uid_t uid;
552
553 nmp = VTONMP(vp);
554 if (!nmp)
555 return (ENXIO);
556 nfsvers = nmp->nm_vers;
557
558 if (nfsvers == NFS_VER2) {
559 if ((ap->a_action & KAUTH_VNODE_WRITE_RIGHTS) &&
560 vfs_isrdonly(vnode_mount(vp)))
561 return (EROFS);
562 return (0);
563 }
564
565 /*
566 * For NFS v3, do an access rpc, otherwise you are stuck emulating
567 * ufs_access() locally using the vattr. This may not be correct,
568 * since the server may apply other access criteria such as
569 * client uid-->server uid mapping that we do not know about, but
570 * this is better than just returning anything that is lying about
571 * in the cache.
572 */
573
574 /*
575 * Convert KAUTH primitives to NFS access rights.
576 */
577 access = 0;
578 if (vnode_isdir(vp)) {
579 /* directory */
580 if (ap->a_action &
581 (KAUTH_VNODE_LIST_DIRECTORY |
582 KAUTH_VNODE_READ_EXTATTRIBUTES))
583 access |= NFS_ACCESS_READ;
584 if (ap->a_action & KAUTH_VNODE_SEARCH)
585 access |= NFS_ACCESS_LOOKUP;
586 if (ap->a_action &
587 (KAUTH_VNODE_ADD_FILE |
588 KAUTH_VNODE_ADD_SUBDIRECTORY))
589 access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
590 if (ap->a_action & KAUTH_VNODE_DELETE_CHILD)
591 access |= NFS_ACCESS_MODIFY;
592 } else {
593 /* file */
594 if (ap->a_action &
595 (KAUTH_VNODE_READ_DATA |
596 KAUTH_VNODE_READ_EXTATTRIBUTES))
597 access |= NFS_ACCESS_READ;
598 if (ap->a_action & KAUTH_VNODE_WRITE_DATA)
599 access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
600 if (ap->a_action & KAUTH_VNODE_APPEND_DATA)
601 access |= NFS_ACCESS_EXTEND;
602 if (ap->a_action & KAUTH_VNODE_EXECUTE)
603 access |= NFS_ACCESS_EXECUTE;
604 }
605 /* common */
606 if (ap->a_action & KAUTH_VNODE_DELETE)
607 access |= NFS_ACCESS_DELETE;
608 if (ap->a_action &
609 (KAUTH_VNODE_WRITE_ATTRIBUTES |
610 KAUTH_VNODE_WRITE_EXTATTRIBUTES |
611 KAUTH_VNODE_WRITE_SECURITY))
612 access |= NFS_ACCESS_MODIFY;
613 /* XXX this is pretty dubious */
614 if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER)
615 access |= NFS_ACCESS_MODIFY;
616
617 /* if caching, always ask for every right */
618 if (nfs_access_cache_timeout > 0) {
619 waccess = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
620 NFS_ACCESS_EXTEND | NFS_ACCESS_EXECUTE |
621 NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
622 } else {
623 waccess = access;
624 }
625
626 if ((error = nfs_node_lock(np)))
627 return (error);
628
629 /*
630 * Does our cached result allow us to give a definite yes to
631 * this request?
632 */
633 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
634 slot = nfs_node_access_slot(np, uid, 0);
635 dorpc = 1;
636 if (access == 0) {
637 /* not asking for any rights understood by NFS, so don't bother doing an RPC */
638 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
639 dorpc = 0;
640 waccess = 0;
641 } else if (NACCESSVALID(np, slot)) {
642 microuptime(&now);
643 if ((now.tv_sec < (np->n_accessstamp[slot] + nfs_access_cache_timeout)) &&
644 ((np->n_access[slot] & access) == access)) {
645 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
646 dorpc = 0;
647 waccess = np->n_access[slot];
648 }
649 }
650 nfs_node_unlock(np);
651 if (dorpc) {
652 /* Either a no, or a don't know. Go to the wire. */
653 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
654 error = nmp->nm_funcs->nf_access_rpc(np, &waccess, ctx);
655 }
656 if (!error && ((waccess & access) != access))
657 error = EACCES;
658
659 return (error);
660 }
661
662
663 /*
664 * NFS open vnode op
665 *
666 * Perform various update/invalidation checks and then add the
667 * open to the node. Regular files will have an open file structure
668 * on the node and, for NFSv4, perform an OPEN request on the server.
669 */
670 int
671 nfs_vnop_open(
672 struct vnop_open_args /* {
673 struct vnodeop_desc *a_desc;
674 vnode_t a_vp;
675 int a_mode;
676 vfs_context_t a_context;
677 } */ *ap)
678 {
679 vfs_context_t ctx = ap->a_context;
680 vnode_t vp = ap->a_vp;
681 nfsnode_t np = VTONFS(vp);
682 struct nfsmount *nmp;
683 int error, accessMode, denyMode, opened = 0;
684 struct nfs_open_owner *noop = NULL;
685 struct nfs_open_file *nofp = NULL;
686 enum vtype vtype;
687
688 if (!(ap->a_mode & (FREAD|FWRITE)))
689 return (EINVAL);
690
691 nmp = VTONMP(vp);
692 if (!nmp)
693 return (ENXIO);
694 if (np->n_flag & NREVOKE)
695 return (EIO);
696
697 vtype = vnode_vtype(vp);
698 if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK))
699 return (EACCES);
700
701 /* First, check if we need to update/invalidate */
702 if (ISSET(np->n_flag, NUPDATESIZE))
703 nfs_data_update_size(np, 0);
704 if ((error = nfs_node_lock(np)))
705 return (error);
706 if (np->n_flag & NNEEDINVALIDATE) {
707 np->n_flag &= ~NNEEDINVALIDATE;
708 if (vtype == VDIR)
709 nfs_invaldir(np);
710 nfs_node_unlock(np);
711 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
712 if ((error = nfs_node_lock(np)))
713 return (error);
714 }
715 if (vtype == VREG)
716 np->n_lastrahead = -1;
717 if (np->n_flag & NMODIFIED) {
718 if (vtype == VDIR)
719 nfs_invaldir(np);
720 nfs_node_unlock(np);
721 if ((error = nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1)))
722 return (error);
723 } else {
724 nfs_node_unlock(np);
725 }
726
727 /* nfs_getattr() will check changed and purge caches */
728 if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED)))
729 return (error);
730
731 if (vtype != VREG) {
732 /* Just mark that it was opened */
733 lck_mtx_lock(&np->n_openlock);
734 np->n_openrefcnt++;
735 lck_mtx_unlock(&np->n_openlock);
736 return (0);
737 }
738
739 /* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */
740 accessMode = 0;
741 if (ap->a_mode & FREAD)
742 accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
743 if (ap->a_mode & FWRITE)
744 accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
745 if (ap->a_mode & O_EXLOCK)
746 denyMode = NFS_OPEN_SHARE_DENY_BOTH;
747 else if (ap->a_mode & O_SHLOCK)
748 denyMode = NFS_OPEN_SHARE_DENY_WRITE;
749 else
750 denyMode = NFS_OPEN_SHARE_DENY_NONE;
751 // XXX don't do deny modes just yet (and never do it for !v4)
752 denyMode = NFS_OPEN_SHARE_DENY_NONE;
753
754 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
755 if (!noop)
756 return (ENOMEM);
757
758 restart:
759 error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
760 if (error) {
761 nfs_open_owner_rele(noop);
762 return (error);
763 }
764 if (np->n_flag & NREVOKE) {
765 error = EIO;
766 nfs_mount_state_in_use_end(nmp, 0);
767 nfs_open_owner_rele(noop);
768 return (error);
769 }
770
771 error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1);
772 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
773 NP(np, "nfs_vnop_open: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
774 error = EIO;
775 }
776 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
777 nfs_mount_state_in_use_end(nmp, 0);
778 error = nfs4_reopen(nofp, vfs_context_thread(ctx));
779 nofp = NULL;
780 if (!error)
781 goto restart;
782 }
783 if (!error)
784 error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
785 if (error) {
786 nofp = NULL;
787 goto out;
788 }
789
790 if (nmp->nm_vers < NFS_VER4) {
791 /*
792 * NFS v2/v3 opens are always allowed - so just add it.
793 */
794 nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
795 goto out;
796 }
797
798 /*
799 * If we just created the file and the modes match, then we simply use
800 * the open performed in the create. Otherwise, send the request.
801 */
802 if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
803 (nofp->nof_creator == current_thread()) &&
804 (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) &&
805 (denyMode == NFS_OPEN_SHARE_DENY_NONE)) {
806 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
807 nofp->nof_creator = NULL;
808 } else {
809 if (!opened)
810 error = nfs4_open(np, nofp, accessMode, denyMode, ctx);
811 if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
812 (nofp->nof_creator == current_thread())) {
813 /*
814 * Ugh. This can happen if we just created the file with read-only
815 * perms and we're trying to open it for real with different modes
816 * (e.g. write-only or with a deny mode) and the server decides to
817 * not allow the second open because of the read-only perms.
818 * The best we can do is to just use the create's open.
819 * We may have access we don't need or we may not have a requested
820 * deny mode. We may log complaints later, but we'll try to avoid it.
821 */
822 if (denyMode != NFS_OPEN_SHARE_DENY_NONE)
823 NP(np, "nfs_vnop_open: deny mode foregone on create, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
824 nofp->nof_creator = NULL;
825 error = 0;
826 }
827 if (error)
828 goto out;
829 opened = 1;
830 /*
831 * If we had just created the file, we already had it open.
832 * If the actual open mode is less than what we grabbed at
833 * create time, then we'll downgrade the open here.
834 */
835 if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
836 (nofp->nof_creator == current_thread())) {
837 error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
838 if (error)
839 NP(np, "nfs_vnop_open: create close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
840 if (!nfs_mount_state_error_should_restart(error)) {
841 error = 0;
842 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
843 }
844 }
845 }
846
847 out:
848 if (nofp)
849 nfs_open_file_clear_busy(nofp);
850 if (nfs_mount_state_in_use_end(nmp, error)) {
851 nofp = NULL;
852 goto restart;
853 }
854 if (error)
855 NP(np, "nfs_vnop_open: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
856 if (noop)
857 nfs_open_owner_rele(noop);
858 if (!error && vtype == VREG && (ap->a_mode & FWRITE)) {
859 lck_mtx_lock(&nmp->nm_lock);
860 nmp->nm_state &= ~NFSSTA_SQUISHY;
861 nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
862 if (nmp->nm_curdeadtimeout <= 0)
863 nmp->nm_deadto_start = 0;
864 nmp->nm_writers++;
865 lck_mtx_unlock(&nmp->nm_lock);
866 }
867
868 return (error);
869 }
870
871 static uint32_t
872 nfs_no_of_open_file_writers(nfsnode_t np)
873 {
874 uint32_t writers = 0;
875 struct nfs_open_file *nofp;
876
877 TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
878 writers += nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw + nofp->nof_rw_dw +
879 nofp->nof_w_drw + nofp->nof_rw_drw + nofp->nof_d_w_dw +
880 nofp->nof_d_rw_dw + nofp->nof_d_w_drw + nofp->nof_d_rw_drw +
881 nofp->nof_d_w + nofp->nof_d_rw;
882 }
883
884 return (writers);
885 }
886
887 /*
888 * NFS close vnode op
889 *
890 * What an NFS client should do upon close after writing is a debatable issue.
891 * Most NFS clients push delayed writes to the server upon close, basically for
892 * two reasons:
893 * 1 - So that any write errors may be reported back to the client process
894 * doing the close system call. By far the two most likely errors are
895 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
896 * 2 - To put a worst case upper bound on cache inconsistency between
897 * multiple clients for the file.
898 * There is also a consistency problem for Version 2 of the protocol w.r.t.
899 * not being able to tell if other clients are writing a file concurrently,
900 * since there is no way of knowing if the changed modify time in the reply
901 * is only due to the write for this client.
902 * (NFS Version 3 provides weak cache consistency data in the reply that
903 * should be sufficient to detect and handle this case.)
904 *
905 * The current code does the following:
906 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
907 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate them.
908 * for NFS Version 4 - basically the same as NFSv3
909 */
910 int
911 nfs_vnop_close(
912 struct vnop_close_args /* {
913 struct vnodeop_desc *a_desc;
914 vnode_t a_vp;
915 int a_fflag;
916 vfs_context_t a_context;
917 } */ *ap)
918 {
919 vfs_context_t ctx = ap->a_context;
920 vnode_t vp = ap->a_vp;
921 nfsnode_t np = VTONFS(vp);
922 struct nfsmount *nmp;
923 int error = 0, error1, nfsvers;
924 int fflag = ap->a_fflag;
925 enum vtype vtype;
926 int accessMode, denyMode;
927 struct nfs_open_owner *noop = NULL;
928 struct nfs_open_file *nofp = NULL;
929
930 nmp = VTONMP(vp);
931 if (!nmp)
932 return (ENXIO);
933 nfsvers = nmp->nm_vers;
934 vtype = vnode_vtype(vp);
935
936 /* First, check if we need to update/flush/invalidate */
937 if (ISSET(np->n_flag, NUPDATESIZE))
938 nfs_data_update_size(np, 0);
939 nfs_node_lock_force(np);
940 if (np->n_flag & NNEEDINVALIDATE) {
941 np->n_flag &= ~NNEEDINVALIDATE;
942 nfs_node_unlock(np);
943 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
944 nfs_node_lock_force(np);
945 }
946 if ((vtype == VREG) && (np->n_flag & NMODIFIED) && (fflag & FWRITE)) {
947 /* we're closing an open for write and the file is modified, so flush it */
948 nfs_node_unlock(np);
949 if (nfsvers != NFS_VER2)
950 error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0);
951 else
952 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
953 nfs_node_lock_force(np);
954 NATTRINVALIDATE(np);
955 }
956 if (np->n_flag & NWRITEERR) {
957 np->n_flag &= ~NWRITEERR;
958 error = np->n_error;
959 }
960 nfs_node_unlock(np);
961
962 if (vtype != VREG) {
963 /* Just mark that it was closed */
964 lck_mtx_lock(&np->n_openlock);
965 if (np->n_openrefcnt == 0) {
966 if (fflag & (FREAD|FWRITE)) {
967 NP(np, "nfs_vnop_close: open reference underrun");
968 error = EINVAL;
969 }
970 } else if (fflag & (FREAD|FWRITE)) {
971 np->n_openrefcnt--;
972 } else {
973 /* No FREAD/FWRITE set - probably the final close */
974 np->n_openrefcnt = 0;
975 }
976 lck_mtx_unlock(&np->n_openlock);
977 return (error);
978 }
979 error1 = error;
980
981 /* fflag should contain some combination of: FREAD, FWRITE, FHASLOCK */
982 accessMode = 0;
983 if (fflag & FREAD)
984 accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
985 if (fflag & FWRITE)
986 accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
987 // XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open
988 // if (fflag & O_EXLOCK)
989 // denyMode = NFS_OPEN_SHARE_DENY_BOTH;
990 // else if (fflag & O_SHLOCK)
991 // denyMode = NFS_OPEN_SHARE_DENY_WRITE;
992 // else
993 // denyMode = NFS_OPEN_SHARE_DENY_NONE;
994 #if 0 // Not yet
995 if (fflag & FHASLOCK) {
996 /* XXX assume FHASLOCK is for the deny mode and not flock */
997 /* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */
998 if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ)
999 denyMode = NFS_OPEN_SHARE_DENY_BOTH;
1000 else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE)
1001 denyMode = NFS_OPEN_SHARE_DENY_WRITE;
1002 else
1003 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1004 } else {
1005 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1006 }
1007 #else
1008 // XXX don't do deny modes just yet (and never do it for !v4)
1009 denyMode = NFS_OPEN_SHARE_DENY_NONE;
1010 #endif
1011
1012 if (!accessMode) {
1013 /*
1014 * No mode given to close?
1015 * Guess this is the final close.
1016 * We should unlock all locks and close all opens.
1017 */
1018 uint32_t writers;
1019 mount_t mp = vnode_mount(vp);
1020 int force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
1021
1022 writers = nfs_no_of_open_file_writers(np);
1023 nfs_release_open_state_for_node(np, force);
1024 if (writers) {
1025 lck_mtx_lock(&nmp->nm_lock);
1026 if (writers > nmp->nm_writers) {
1027 NP(np, "nfs_vnop_close: number of write opens for mount underrun. Node has %d"
1028 " opens for write. Mount has total of %d opens for write\n",
1029 writers, nmp->nm_writers);
1030 nmp->nm_writers = 0;
1031 } else {
1032 nmp->nm_writers -= writers;
1033 }
1034 lck_mtx_unlock(&nmp->nm_lock);
1035 }
1036
1037 return (error);
1038 } else if (fflag & FWRITE) {
1039 lck_mtx_lock(&nmp->nm_lock);
1040 if (nmp->nm_writers == 0) {
1041 NP(np, "nfs_vnop_close: removing open writer from mount, but mount has no files open for writing");
1042 } else {
1043 nmp->nm_writers--;
1044 }
1045 lck_mtx_unlock(&nmp->nm_lock);
1046 }
1047
1048
1049 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
1050 if (!noop) {
1051 // printf("nfs_vnop_close: can't get open owner!\n");
1052 return (EIO);
1053 }
1054
1055 restart:
1056 error = nfs_mount_state_in_use_start(nmp, NULL);
1057 if (error) {
1058 nfs_open_owner_rele(noop);
1059 return (error);
1060 }
1061
1062 error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
1063 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
1064 nfs_mount_state_in_use_end(nmp, 0);
1065 error = nfs4_reopen(nofp, NULL);
1066 nofp = NULL;
1067 if (!error)
1068 goto restart;
1069 }
1070 if (error) {
1071 NP(np, "nfs_vnop_close: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1072 error = EBADF;
1073 goto out;
1074 }
1075 error = nfs_open_file_set_busy(nofp, NULL);
1076 if (error) {
1077 nofp = NULL;
1078 goto out;
1079 }
1080
1081 error = nfs_close(np, nofp, accessMode, denyMode, ctx);
1082 if (error)
1083 NP(np, "nfs_vnop_close: close error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1084
1085 out:
1086 if (nofp)
1087 nfs_open_file_clear_busy(nofp);
1088 if (nfs_mount_state_in_use_end(nmp, error)) {
1089 nofp = NULL;
1090 goto restart;
1091 }
1092 if (!error)
1093 error = error1;
1094 if (error)
1095 NP(np, "nfs_vnop_close: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
1096 if (noop)
1097 nfs_open_owner_rele(noop);
1098 return (error);
1099 }
1100
1101 /*
1102 * nfs_close(): common function that does all the heavy lifting of file closure
1103 *
1104 * Takes an open file structure and a set of access/deny modes and figures out how
1105 * to update the open file structure (and the state on the server) appropriately.
1106 */
1107 int
1108 nfs_close(
1109 nfsnode_t np,
1110 struct nfs_open_file *nofp,
1111 uint32_t accessMode,
1112 uint32_t denyMode,
1113 vfs_context_t ctx)
1114 {
1115 struct nfs_lock_owner *nlop;
1116 int error = 0, changed = 0, delegated = 0, closed = 0, downgrade = 0;
1117 uint32_t newAccessMode, newDenyMode;
1118
1119 /* warn if modes don't match current state */
1120 if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode))
1121 NP(np, "nfs_close: mode mismatch %d %d, current %d %d, %d",
1122 accessMode, denyMode, nofp->nof_access, nofp->nof_deny,
1123 kauth_cred_getuid(nofp->nof_owner->noo_cred));
1124
1125 /*
1126 * If we're closing a write-only open, we may not have a write-only count
1127 * if we also grabbed read access. So, check the read-write count.
1128 */
1129 if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
1130 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1131 (nofp->nof_w == 0) && (nofp->nof_d_w == 0) &&
1132 (nofp->nof_rw || nofp->nof_d_rw))
1133 accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1134 } else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
1135 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1136 (nofp->nof_w_dw == 0) && (nofp->nof_d_w_dw == 0) &&
1137 (nofp->nof_rw_dw || nofp->nof_d_rw_dw))
1138 accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1139 } else { /* NFS_OPEN_SHARE_DENY_BOTH */
1140 if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
1141 (nofp->nof_w_drw == 0) && (nofp->nof_d_w_drw == 0) &&
1142 (nofp->nof_rw_drw || nofp->nof_d_rw_drw))
1143 accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
1144 }
1145
1146 nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
1147 if ((newAccessMode != nofp->nof_access) || (newDenyMode != nofp->nof_deny))
1148 changed = 1;
1149 else
1150 changed = 0;
1151
1152 if (NFSTONMP(np)->nm_vers < NFS_VER4) /* NFS v2/v3 closes simply need to remove the open. */
1153 goto v3close;
1154
1155 if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) {
1156 /*
1157 * No more access after this close, so clean up and close it.
1158 * Don't send a close RPC if we're closing a delegated open.
1159 */
1160 nfs_wait_bufs(np);
1161 closed = 1;
1162 if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST))
1163 error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
1164 if (error == NFSERR_LOCKS_HELD) {
1165 /*
1166 * Hmm... the server says we have locks we need to release first
1167 * Find the lock owner and try to unlock everything.
1168 */
1169 nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0);
1170 if (nlop) {
1171 nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX,
1172 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
1173 nfs_lock_owner_rele(nlop);
1174 }
1175 error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
1176 }
1177 } else if (changed) {
1178 /*
1179 * File is still open but with less access, so downgrade the open.
1180 * Don't send a downgrade RPC if we're closing a delegated open.
1181 */
1182 if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
1183 downgrade = 1;
1184 /*
1185 * If we have delegated opens, we should probably claim them before sending
1186 * the downgrade because the server may not know the open we are downgrading to.
1187 */
1188 if (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
1189 nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
1190 nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r)
1191 nfs4_claim_delegated_state_for_open_file(nofp, 0);
1192 /* need to remove the open before sending the downgrade */
1193 nfs_open_file_remove_open(nofp, accessMode, denyMode);
1194 error = nfs4_open_downgrade_rpc(np, nofp, ctx);
1195 if (error) /* Hmm.. that didn't work. Add the open back in. */
1196 nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
1197 }
1198 }
1199
1200 if (error) {
1201 NP(np, "nfs_close: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
1202 return (error);
1203 }
1204
1205 v3close:
1206 if (!downgrade)
1207 nfs_open_file_remove_open(nofp, accessMode, denyMode);
1208
1209 if (closed) {
1210 lck_mtx_lock(&nofp->nof_lock);
1211 if (nofp->nof_r || nofp->nof_d_r || nofp->nof_w || nofp->nof_d_w || nofp->nof_d_rw ||
1212 (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) ||
1213 nofp->nof_r_dw || nofp->nof_d_r_dw || nofp->nof_w_dw || nofp->nof_d_w_dw ||
1214 nofp->nof_rw_dw || nofp->nof_d_rw_dw || nofp->nof_r_drw || nofp->nof_d_r_drw ||
1215 nofp->nof_w_drw || nofp->nof_d_w_drw || nofp->nof_rw_drw || nofp->nof_d_rw_drw)
1216 NP(np, "nfs_close: unexpected count: %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u flags 0x%x, %d",
1217 nofp->nof_r, nofp->nof_d_r, nofp->nof_w, nofp->nof_d_w,
1218 nofp->nof_rw, nofp->nof_d_rw, nofp->nof_r_dw, nofp->nof_d_r_dw,
1219 nofp->nof_w_dw, nofp->nof_d_w_dw, nofp->nof_rw_dw, nofp->nof_d_rw_dw,
1220 nofp->nof_r_drw, nofp->nof_d_r_drw, nofp->nof_w_drw, nofp->nof_d_w_drw,
1221 nofp->nof_rw_drw, nofp->nof_d_rw_drw, nofp->nof_flags,
1222 kauth_cred_getuid(nofp->nof_owner->noo_cred));
1223 /* clear out all open info, just to be safe */
1224 nofp->nof_access = nofp->nof_deny = 0;
1225 nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
1226 nofp->nof_r = nofp->nof_d_r = 0;
1227 nofp->nof_w = nofp->nof_d_w = 0;
1228 nofp->nof_rw = nofp->nof_d_rw = 0;
1229 nofp->nof_r_dw = nofp->nof_d_r_dw = 0;
1230 nofp->nof_w_dw = nofp->nof_d_w_dw = 0;
1231 nofp->nof_rw_dw = nofp->nof_d_rw_dw = 0;
1232 nofp->nof_r_drw = nofp->nof_d_r_drw = 0;
1233 nofp->nof_w_drw = nofp->nof_d_w_drw = 0;
1234 nofp->nof_rw_drw = nofp->nof_d_rw_drw = 0;
1235 nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
1236 lck_mtx_unlock(&nofp->nof_lock);
1237 /* XXX we may potentially want to clean up idle/unused open file structures */
1238 }
1239 if (nofp->nof_flags & NFS_OPEN_FILE_LOST) {
1240 error = EIO;
1241 NP(np, "nfs_close: LOST%s, %d", !nofp->nof_opencnt ? " (last)" : "",
1242 kauth_cred_getuid(nofp->nof_owner->noo_cred));
1243 }
1244
1245 return (error);
1246 }
1247
1248
1249
1250
1251 int
1252 nfs3_getattr_rpc(
1253 nfsnode_t np,
1254 mount_t mp,
1255 u_char *fhp,
1256 size_t fhsize,
1257 int flags,
1258 vfs_context_t ctx,
1259 struct nfs_vattr *nvap,
1260 u_int64_t *xidp)
1261 {
1262 struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
1263 int error = 0, status, nfsvers, rpcflags = 0;
1264 struct nfsm_chain nmreq, nmrep;
1265
1266 if (!nmp)
1267 return (ENXIO);
1268 nfsvers = nmp->nm_vers;
1269
1270 if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */
1271 rpcflags = R_RECOVER;
1272
1273 nfsm_chain_null(&nmreq);
1274 nfsm_chain_null(&nmrep);
1275
1276 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
1277 if (nfsvers != NFS_VER2)
1278 nfsm_chain_add_32(error, &nmreq, fhsize);
1279 nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
1280 nfsm_chain_build_done(error, &nmreq);
1281 nfsmout_if(error);
1282 error = nfs_request2(np, mp, &nmreq, NFSPROC_GETATTR,
1283 vfs_context_thread(ctx), vfs_context_ucred(ctx),
1284 NULL, rpcflags, &nmrep, xidp, &status);
1285 if (!error)
1286 error = status;
1287 nfsmout_if(error);
1288 error = nfs_parsefattr(&nmrep, nfsvers, nvap);
1289 nfsmout:
1290 nfsm_chain_cleanup(&nmreq);
1291 nfsm_chain_cleanup(&nmrep);
1292 return (error);
1293 }
1294
1295
1296 int
1297 nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
1298 {
1299 struct nfsmount *nmp;
1300 int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods;
1301 struct nfs_vattr nvattr;
1302 struct timespec ts = { 2, 0 };
1303 u_int64_t xid;
1304
1305 FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
1306
1307 if (!(nmp = NFSTONMP(np)))
1308 return (ENXIO);
1309 nfsvers = nmp->nm_vers;
1310
1311 if (!nvap)
1312 nvap = &nvattr;
1313 NVATTR_INIT(nvap);
1314
1315 /* Update local times for special files. */
1316 if (np->n_flag & (NACC | NUPD)) {
1317 nfs_node_lock_force(np);
1318 np->n_flag |= NCHG;
1319 nfs_node_unlock(np);
1320 }
1321 /* Update size, if necessary */
1322 if (ISSET(np->n_flag, NUPDATESIZE))
1323 nfs_data_update_size(np, 0);
1324
1325 error = nfs_node_lock(np);
1326 nfsmout_if(error);
1327 if (!(flags & (NGA_UNCACHED|NGA_MONITOR)) || ((nfsvers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))) {
1328 /*
1329 * Use the cache or wait for any getattr in progress if:
1330 * - it's a cached request, or
1331 * - we have a delegation
1332 */
1333 while (1) {
1334 error = nfs_getattrcache(np, nvap, flags);
1335 if (!error || (error != ENOENT)) {
1336 nfs_node_unlock(np);
1337 goto nfsmout;
1338 }
1339 error = 0;
1340 if (!ISSET(np->n_flag, NGETATTRINPROG))
1341 break;
1342 if (flags & NGA_MONITOR) {
1343 /* no need to wait if a request is pending */
1344 error = EINPROGRESS;
1345 nfs_node_unlock(np);
1346 goto nfsmout;
1347 }
1348 SET(np->n_flag, NGETATTRWANT);
1349 msleep(np, &np->n_lock, PZERO-1, "nfsgetattrwant", &ts);
1350 if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
1351 nfs_node_unlock(np);
1352 goto nfsmout;
1353 }
1354 }
1355 SET(np->n_flag, NGETATTRINPROG);
1356 inprogset = 1;
1357 } else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
1358 SET(np->n_flag, NGETATTRINPROG);
1359 inprogset = 1;
1360 } else if (flags & NGA_MONITOR) {
1361 /* no need to make a request if one is pending */
1362 error = EINPROGRESS;
1363 }
1364 nfs_node_unlock(np);
1365
1366 nmp = NFSTONMP(np);
1367 if (!nmp)
1368 error = ENXIO;
1369 if (error)
1370 goto nfsmout;
1371
1372 /*
1373 * We might want to try to get both the attributes and access info by
1374 * making an ACCESS call and seeing if it returns updated attributes.
1375 * But don't bother if we aren't caching access info or if the
1376 * attributes returned wouldn't be cached.
1377 */
1378 if (!(flags & NGA_ACL) && (nfsvers != NFS_VER2) && nfs_access_for_getattr && (nfs_access_cache_timeout > 0)) {
1379 if (nfs_attrcachetimeout(np) > 0) {
1380 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
1381 u_int32_t access = NFS_ACCESS_ALL;
1382 error = nmp->nm_funcs->nf_access_rpc(np, &access, ctx);
1383 if (error)
1384 goto nfsmout;
1385 nfs_node_lock_force(np);
1386 error = nfs_getattrcache(np, nvap, flags);
1387 nfs_node_unlock(np);
1388 if (!error || (error != ENOENT))
1389 goto nfsmout;
1390 /* Well, that didn't work... just do a getattr... */
1391 error = 0;
1392 }
1393 }
1394
1395 avoidfloods = 0;
1396 tryagain:
1397 error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid);
1398 if (!error) {
1399 nfs_node_lock_force(np);
1400 error = nfs_loadattrcache(np, nvap, &xid, 0);
1401 nfs_node_unlock(np);
1402 }
1403 nfsmout_if(error);
1404 if (!xid) { /* out-of-order rpc - attributes were dropped */
1405 FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
1406 if (avoidfloods++ < 20)
1407 goto tryagain;
1408 /* avoidfloods>1 is bizarre. at 20 pull the plug */
1409 /* just return the last attributes we got */
1410 }
1411 nfsmout:
1412 nfs_node_lock_force(np);
1413 if (inprogset) {
1414 wanted = ISSET(np->n_flag, NGETATTRWANT);
1415 CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT));
1416 }
1417 if (!error) {
1418 /* check if the node changed on us */
1419 vnode_t vp = NFSTOV(np);
1420 enum vtype vtype = vnode_vtype(vp);
1421 if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) {
1422 FSDBG(513, -1, np, 0, np);
1423 np->n_flag &= ~NNEGNCENTRIES;
1424 cache_purge(vp);
1425 np->n_ncgen++;
1426 NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
1427 }
1428 if (NFS_CHANGED(nfsvers, np, nvap)) {
1429 FSDBG(513, -1, np, -1, np);
1430 if (vtype == VDIR)
1431 nfs_invaldir(np);
1432 nfs_node_unlock(np);
1433 if (wanted)
1434 wakeup(np);
1435 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1436 FSDBG(513, -1, np, -2, error);
1437 if (!error) {
1438 nfs_node_lock_force(np);
1439 NFS_CHANGED_UPDATE(nfsvers, np, nvap);
1440 nfs_node_unlock(np);
1441 }
1442 } else {
1443 nfs_node_unlock(np);
1444 if (wanted)
1445 wakeup(np);
1446 }
1447 } else {
1448 nfs_node_unlock(np);
1449 if (wanted)
1450 wakeup(np);
1451 }
1452
1453 if (nvap == &nvattr) {
1454 NVATTR_CLEANUP(nvap);
1455 } else if (!(flags & NGA_ACL)) {
1456 /* make sure we don't return an ACL if it wasn't asked for */
1457 NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
1458 if (nvap->nva_acl) {
1459 kauth_acl_free(nvap->nva_acl);
1460 nvap->nva_acl = NULL;
1461 }
1462 }
1463 FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
1464 return (error);
1465 }
1466
1467 /*
1468 * NFS getattr call from vfs.
1469 */
1470 int
1471 nfs3_vnop_getattr(
1472 struct vnop_getattr_args /* {
1473 struct vnodeop_desc *a_desc;
1474 vnode_t a_vp;
1475 struct vnode_attr *a_vap;
1476 vfs_context_t a_context;
1477 } */ *ap)
1478 {
1479 int error;
1480 struct nfs_vattr nva;
1481 struct vnode_attr *vap = ap->a_vap;
1482 dev_t rdev;
1483
1484 error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
1485 if (error)
1486 return (error);
1487
1488 /* copy nva to *a_vap */
1489 VATTR_RETURN(vap, va_type, nva.nva_type);
1490 VATTR_RETURN(vap, va_mode, nva.nva_mode);
1491 rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
1492 VATTR_RETURN(vap, va_rdev, rdev);
1493 VATTR_RETURN(vap, va_uid, nva.nva_uid);
1494 VATTR_RETURN(vap, va_gid, nva.nva_gid);
1495 VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
1496 VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
1497 VATTR_RETURN(vap, va_data_size, nva.nva_size);
1498 VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes);
1499 VATTR_RETURN(vap, va_iosize, nfs_iosize);
1500 vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
1501 vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
1502 VATTR_SET_SUPPORTED(vap, va_access_time);
1503 vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
1504 vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
1505 VATTR_SET_SUPPORTED(vap, va_modify_time);
1506 vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
1507 vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
1508 VATTR_SET_SUPPORTED(vap, va_change_time);
1509
1510 // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
1511 return (error);
1512 }
1513
1514 /*
1515 * NFS setattr call.
1516 */
1517 int
1518 nfs_vnop_setattr(
1519 struct vnop_setattr_args /* {
1520 struct vnodeop_desc *a_desc;
1521 vnode_t a_vp;
1522 struct vnode_attr *a_vap;
1523 vfs_context_t a_context;
1524 } */ *ap)
1525 {
1526 vfs_context_t ctx = ap->a_context;
1527 vnode_t vp = ap->a_vp;
1528 nfsnode_t np = VTONFS(vp);
1529 struct nfsmount *nmp;
1530 struct vnode_attr *vap = ap->a_vap;
1531 int error = 0;
1532 int biosize, nfsvers, namedattrs;
1533 u_quad_t origsize, vapsize;
1534 struct nfs_dulookup dul;
1535 nfsnode_t dnp = NULL;
1536 vnode_t dvp = NULL;
1537 const char *vname = NULL;
1538 struct nfs_open_owner *noop = NULL;
1539 struct nfs_open_file *nofp = NULL;
1540
1541 nmp = VTONMP(vp);
1542 if (!nmp)
1543 return (ENXIO);
1544 nfsvers = nmp->nm_vers;
1545 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
1546 biosize = nmp->nm_biosize;
1547
1548 /* Disallow write attempts if the filesystem is mounted read-only. */
1549 if (vnode_vfsisrdonly(vp))
1550 return (EROFS);
1551
1552 origsize = np->n_size;
1553 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1554 switch (vnode_vtype(vp)) {
1555 case VDIR:
1556 return (EISDIR);
1557 case VCHR:
1558 case VBLK:
1559 case VSOCK:
1560 case VFIFO:
1561 if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
1562 !VATTR_IS_ACTIVE(vap, va_access_time) &&
1563 !VATTR_IS_ACTIVE(vap, va_mode) &&
1564 !VATTR_IS_ACTIVE(vap, va_uid) &&
1565 !VATTR_IS_ACTIVE(vap, va_gid)) {
1566 return (0);
1567 }
1568 VATTR_CLEAR_ACTIVE(vap, va_data_size);
1569 break;
1570 default:
1571 /*
1572 * Disallow write attempts if the filesystem is
1573 * mounted read-only.
1574 */
1575 if (vnode_vfsisrdonly(vp))
1576 return (EROFS);
1577 FSDBG_TOP(512, np->n_size, vap->va_data_size,
1578 np->n_vattr.nva_size, np->n_flag);
1579 /* clear NNEEDINVALIDATE, if set */
1580 if ((error = nfs_node_lock(np)))
1581 return (error);
1582 if (np->n_flag & NNEEDINVALIDATE)
1583 np->n_flag &= ~NNEEDINVALIDATE;
1584 nfs_node_unlock(np);
1585 /* flush everything */
1586 error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0) , ctx, 1);
1587 if (error) {
1588 NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
1589 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
1590 return (error);
1591 }
1592 if (nfsvers >= NFS_VER4) {
1593 /* setting file size requires having the file open for write access */
1594 if (np->n_flag & NREVOKE)
1595 return (EIO);
1596 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
1597 if (!noop)
1598 return (ENOMEM);
1599 restart:
1600 error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
1601 if (error)
1602 return (error);
1603 if (np->n_flag & NREVOKE) {
1604 nfs_mount_state_in_use_end(nmp, 0);
1605 return (EIO);
1606 }
1607 error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
1608 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))
1609 error = EIO;
1610 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
1611 nfs_mount_state_in_use_end(nmp, 0);
1612 error = nfs4_reopen(nofp, vfs_context_thread(ctx));
1613 nofp = NULL;
1614 if (!error)
1615 goto restart;
1616 }
1617 if (!error)
1618 error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
1619 if (error) {
1620 nfs_open_owner_rele(noop);
1621 return (error);
1622 }
1623 if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
1624 /* we don't have the file open for write access, so open it */
1625 error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
1626 if (!error)
1627 nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
1628 if (nfs_mount_state_error_should_restart(error)) {
1629 nfs_open_file_clear_busy(nofp);
1630 nofp = NULL;
1631 if (nfs_mount_state_in_use_end(nmp, error))
1632 goto restart;
1633 }
1634 }
1635 }
1636 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
1637 if (np->n_size > vap->va_data_size) { /* shrinking? */
1638 daddr64_t obn, bn;
1639 int neweofoff, mustwrite;
1640 struct nfsbuf *bp;
1641
1642 obn = (np->n_size - 1) / biosize;
1643 bn = vap->va_data_size / biosize;
1644 for ( ; obn >= bn; obn--) {
1645 if (!nfs_buf_is_incore(np, obn))
1646 continue;
1647 error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
1648 if (error)
1649 continue;
1650 if (obn != bn) {
1651 FSDBG(512, bp, bp->nb_flags, 0, obn);
1652 SET(bp->nb_flags, NB_INVAL);
1653 nfs_buf_release(bp, 1);
1654 continue;
1655 }
1656 mustwrite = 0;
1657 neweofoff = vap->va_data_size - NBOFF(bp);
1658 /* check for any dirty data before the new EOF */
1659 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
1660 /* clip dirty range to EOF */
1661 if (bp->nb_dirtyend > neweofoff) {
1662 bp->nb_dirtyend = neweofoff;
1663 if (bp->nb_dirtyoff >= bp->nb_dirtyend)
1664 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
1665 }
1666 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff))
1667 mustwrite++;
1668 }
1669 bp->nb_dirty &= (1 << round_page_32(neweofoff)/PAGE_SIZE) - 1;
1670 if (bp->nb_dirty)
1671 mustwrite++;
1672 if (!mustwrite) {
1673 FSDBG(512, bp, bp->nb_flags, 0, obn);
1674 SET(bp->nb_flags, NB_INVAL);
1675 nfs_buf_release(bp, 1);
1676 continue;
1677 }
1678 /* gotta write out dirty data before invalidating */
1679 /* (NB_STABLE indicates that data writes should be FILESYNC) */
1680 /* (NB_NOCACHE indicates buffer should be discarded) */
1681 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
1682 SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
1683 if (!IS_VALID_CRED(bp->nb_wcred)) {
1684 kauth_cred_t cred = vfs_context_ucred(ctx);
1685 kauth_cred_ref(cred);
1686 bp->nb_wcred = cred;
1687 }
1688 error = nfs_buf_write(bp);
1689 // Note: bp has been released
1690 if (error) {
1691 FSDBG(512, bp, 0xd00dee, 0xbad, error);
1692 nfs_node_lock_force(np);
1693 np->n_error = error;
1694 np->n_flag |= NWRITEERR;
1695 /*
1696 * There was a write error and we need to
1697 * invalidate attrs and flush buffers in
1698 * order to sync up with the server.
1699 * (if this write was extending the file,
1700 * we may no longer know the correct size)
1701 */
1702 NATTRINVALIDATE(np);
1703 nfs_node_unlock(np);
1704 nfs_data_unlock(np);
1705 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
1706 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
1707 error = 0;
1708 }
1709 }
1710 }
1711 if (vap->va_data_size != np->n_size)
1712 ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
1713 origsize = np->n_size;
1714 np->n_size = np->n_vattr.nva_size = vap->va_data_size;
1715 nfs_node_lock_force(np);
1716 CLR(np->n_flag, NUPDATESIZE);
1717 nfs_node_unlock(np);
1718 FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
1719 }
1720 } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
1721 VATTR_IS_ACTIVE(vap, va_access_time) ||
1722 (vap->va_vaflags & VA_UTIMES_NULL)) {
1723 if ((error = nfs_node_lock(np)))
1724 return (error);
1725 if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
1726 nfs_node_unlock(np);
1727 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1728 if (error == EINTR)
1729 return (error);
1730 } else {
1731 nfs_node_unlock(np);
1732 }
1733 }
1734 if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
1735 VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
1736 !(error = nfs_node_lock(np))) {
1737 NACCESSINVALIDATE(np);
1738 nfs_node_unlock(np);
1739 if (!namedattrs) {
1740 dvp = vnode_getparent(vp);
1741 vname = vnode_getname(vp);
1742 dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
1743 if (dnp) {
1744 error = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
1745 if (error) {
1746 dnp = NULL;
1747 error = 0;
1748 }
1749 }
1750 if (dnp) {
1751 nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
1752 nfs_dulookup_start(&dul, dnp, ctx);
1753 }
1754 }
1755 }
1756
1757 if (!error)
1758 error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
1759
1760 if (VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
1761 VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) {
1762 if (!namedattrs) {
1763 if (dnp) {
1764 nfs_dulookup_finish(&dul, dnp, ctx);
1765 nfs_node_clear_busy(dnp);
1766 }
1767 if (dvp != NULLVP)
1768 vnode_put(dvp);
1769 if (vname != NULL)
1770 vnode_putname(vname);
1771 }
1772 }
1773
1774 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
1775 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1776 if (error && (origsize != np->n_size) &&
1777 ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
1778 /* make every effort to resync file size w/ server... */
1779 /* (don't bother if we'll be restarting the operation) */
1780 int err; /* preserve "error" for return */
1781 np->n_size = np->n_vattr.nva_size = origsize;
1782 nfs_node_lock_force(np);
1783 CLR(np->n_flag, NUPDATESIZE);
1784 nfs_node_unlock(np);
1785 FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
1786 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
1787 vapsize = vap->va_data_size;
1788 vap->va_data_size = origsize;
1789 err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
1790 if (err)
1791 NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
1792 vap->va_data_size = vapsize;
1793 }
1794 nfs_node_lock_force(np);
1795 /*
1796 * The size was just set. If the size is already marked for update, don't
1797 * trust the newsize (it may have been set while the setattr was in progress).
1798 * Clear the update flag and make sure we fetch new attributes so we are sure
1799 * we have the latest size.
1800 */
1801 if (ISSET(np->n_flag, NUPDATESIZE)) {
1802 CLR(np->n_flag, NUPDATESIZE);
1803 NATTRINVALIDATE(np);
1804 nfs_node_unlock(np);
1805 nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
1806 } else {
1807 nfs_node_unlock(np);
1808 }
1809 nfs_data_unlock(np);
1810 if (nfsvers >= NFS_VER4) {
1811 if (nofp) {
1812 /* don't close our setattr open if we'll be restarting... */
1813 if (!nfs_mount_state_error_should_restart(error) &&
1814 (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
1815 int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
1816 if (err)
1817 NP(np, "nfs_vnop_setattr: close error: %d", err);
1818 nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
1819 }
1820 nfs_open_file_clear_busy(nofp);
1821 nofp = NULL;
1822 }
1823 if (nfs_mount_state_in_use_end(nmp, error))
1824 goto restart;
1825 nfs_open_owner_rele(noop);
1826 }
1827 }
1828 return (error);
1829 }
1830
1831 /*
1832 * Do an NFS setattr RPC.
1833 */
1834 int
1835 nfs3_setattr_rpc(
1836 nfsnode_t np,
1837 struct vnode_attr *vap,
1838 vfs_context_t ctx)
1839 {
1840 struct nfsmount *nmp = NFSTONMP(np);
1841 int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers;
1842 u_int64_t xid, nextxid;
1843 struct nfsm_chain nmreq, nmrep;
1844
1845 if (!nmp)
1846 return (ENXIO);
1847 nfsvers = nmp->nm_vers;
1848
1849 VATTR_SET_SUPPORTED(vap, va_mode);
1850 VATTR_SET_SUPPORTED(vap, va_uid);
1851 VATTR_SET_SUPPORTED(vap, va_gid);
1852 VATTR_SET_SUPPORTED(vap, va_data_size);
1853 VATTR_SET_SUPPORTED(vap, va_access_time);
1854 VATTR_SET_SUPPORTED(vap, va_modify_time);
1855
1856 if (VATTR_IS_ACTIVE(vap, va_flags)) {
1857 if (vap->va_flags) { /* we don't support setting flags */
1858 if (vap->va_active & ~VNODE_ATTR_va_flags)
1859 return (EINVAL); /* return EINVAL if other attributes also set */
1860 else
1861 return (ENOTSUP); /* return ENOTSUP for chflags(2) */
1862 }
1863 /* no flags set, so we'll just ignore it */
1864 if (!(vap->va_active & ~VNODE_ATTR_va_flags))
1865 return (0); /* no (other) attributes to set, so nothing to do */
1866 }
1867
1868 nfsm_chain_null(&nmreq);
1869 nfsm_chain_null(&nmrep);
1870
1871 nfsm_chain_build_alloc_init(error, &nmreq,
1872 NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
1873 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1874 if (nfsvers == NFS_VER3) {
1875 if (VATTR_IS_ACTIVE(vap, va_mode)) {
1876 nfsm_chain_add_32(error, &nmreq, TRUE);
1877 nfsm_chain_add_32(error, &nmreq, vap->va_mode);
1878 } else {
1879 nfsm_chain_add_32(error, &nmreq, FALSE);
1880 }
1881 if (VATTR_IS_ACTIVE(vap, va_uid)) {
1882 nfsm_chain_add_32(error, &nmreq, TRUE);
1883 nfsm_chain_add_32(error, &nmreq, vap->va_uid);
1884 } else {
1885 nfsm_chain_add_32(error, &nmreq, FALSE);
1886 }
1887 if (VATTR_IS_ACTIVE(vap, va_gid)) {
1888 nfsm_chain_add_32(error, &nmreq, TRUE);
1889 nfsm_chain_add_32(error, &nmreq, vap->va_gid);
1890 } else {
1891 nfsm_chain_add_32(error, &nmreq, FALSE);
1892 }
1893 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1894 nfsm_chain_add_32(error, &nmreq, TRUE);
1895 nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
1896 } else {
1897 nfsm_chain_add_32(error, &nmreq, FALSE);
1898 }
1899 if (vap->va_vaflags & VA_UTIMES_NULL) {
1900 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
1901 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
1902 } else {
1903 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
1904 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
1905 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
1906 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
1907 } else {
1908 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
1909 }
1910 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
1911 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
1912 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
1913 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
1914 } else {
1915 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
1916 }
1917 }
1918 nfsm_chain_add_32(error, &nmreq, FALSE);
1919 } else {
1920 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
1921 vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
1922 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
1923 vap->va_uid : (uint32_t)-1);
1924 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
1925 vap->va_gid : (uint32_t)-1);
1926 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
1927 vap->va_data_size : (uint32_t)-1);
1928 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
1929 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
1930 nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
1931 ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
1932 } else {
1933 nfsm_chain_add_32(error, &nmreq, -1);
1934 nfsm_chain_add_32(error, &nmreq, -1);
1935 }
1936 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
1937 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
1938 nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
1939 ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
1940 } else {
1941 nfsm_chain_add_32(error, &nmreq, -1);
1942 nfsm_chain_add_32(error, &nmreq, -1);
1943 }
1944 }
1945 nfsm_chain_build_done(error, &nmreq);
1946 nfsmout_if(error);
1947 error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
1948 if ((lockerror = nfs_node_lock(np)))
1949 error = lockerror;
1950 if (nfsvers == NFS_VER3) {
1951 struct timespec premtime = { 0, 0 };
1952 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
1953 nfsmout_if(error);
1954 /* if file hadn't changed, update cached mtime */
1955 if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
1956 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
1957 /* if directory hadn't changed, update namecache mtime */
1958 if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
1959 nfstimespeccmp(&np->n_ncmtime, &premtime, ==))
1960 NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
1961 if (!wccpostattr)
1962 NATTRINVALIDATE(np);
1963 error = status;
1964 } else {
1965 if (!error)
1966 error = status;
1967 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
1968 }
1969 /*
1970 * We just changed the attributes and we want to make sure that we
1971 * see the latest attributes. Get the next XID. If it's not the
1972 * next XID after the SETATTR XID, then it's possible that another
1973 * RPC was in flight at the same time and it might put stale attributes
1974 * in the cache. In that case, we invalidate the attributes and set
1975 * the attribute cache XID to guarantee that newer attributes will
1976 * get loaded next.
1977 */
1978 nextxid = 0;
1979 nfs_get_xid(&nextxid);
1980 if (nextxid != (xid + 1)) {
1981 np->n_xid = nextxid;
1982 NATTRINVALIDATE(np);
1983 }
1984 nfsmout:
1985 if (!lockerror)
1986 nfs_node_unlock(np);
1987 nfsm_chain_cleanup(&nmreq);
1988 nfsm_chain_cleanup(&nmrep);
1989 return (error);
1990 }
1991
1992 /*
1993 * NFS lookup call, one step at a time...
1994 * First look in cache
1995 * If not found, unlock the directory nfsnode and do the RPC
1996 */
1997 int
1998 nfs_vnop_lookup(
1999 struct vnop_lookup_args /* {
2000 struct vnodeop_desc *a_desc;
2001 vnode_t a_dvp;
2002 vnode_t *a_vpp;
2003 struct componentname *a_cnp;
2004 vfs_context_t a_context;
2005 } */ *ap)
2006 {
2007 vfs_context_t ctx = ap->a_context;
2008 struct componentname *cnp = ap->a_cnp;
2009 vnode_t dvp = ap->a_dvp;
2010 vnode_t *vpp = ap->a_vpp;
2011 int flags = cnp->cn_flags;
2012 vnode_t newvp;
2013 nfsnode_t dnp, np;
2014 struct nfsmount *nmp;
2015 mount_t mp;
2016 int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
2017 u_int64_t xid;
2018 struct nfs_vattr nvattr;
2019 int ngflags;
2020 struct vnop_access_args naa;
2021 fhandle_t fh;
2022 struct nfsreq rq, *req = &rq;
2023
2024 *vpp = NULLVP;
2025
2026 dnp = VTONFS(dvp);
2027 NVATTR_INIT(&nvattr);
2028
2029 mp = vnode_mount(dvp);
2030 nmp = VFSTONFS(mp);
2031 if (!nmp) {
2032 error = ENXIO;
2033 goto error_return;
2034 }
2035 nfsvers = nmp->nm_vers;
2036 negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
2037
2038 if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx))))
2039 goto error_return;
2040 /* nfs_getattr() will check changed and purge caches */
2041 if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED)))
2042 goto error_return;
2043
2044 error = cache_lookup(dvp, vpp, cnp);
2045 switch (error) {
2046 case ENOENT:
2047 /* negative cache entry */
2048 goto error_return;
2049 case 0:
2050 /* cache miss */
2051 if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
2052 /* if rdirplus, try dir buf cache lookup */
2053 error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0);
2054 if (!error && np) {
2055 /* dir buf cache hit */
2056 *vpp = NFSTOV(np);
2057 error = -1;
2058 }
2059 }
2060 if (error != -1) /* cache miss */
2061 break;
2062 /* FALLTHROUGH */
2063 case -1:
2064 /* cache hit, not really an error */
2065 OSAddAtomic64(1, &nfsstats.lookupcache_hits);
2066
2067 nfs_node_clear_busy(dnp);
2068 busyerror = ENOENT;
2069
2070 /* check for directory access */
2071 naa.a_desc = &vnop_access_desc;
2072 naa.a_vp = dvp;
2073 naa.a_action = KAUTH_VNODE_SEARCH;
2074 naa.a_context = ctx;
2075
2076 /* compute actual success/failure based on accessibility */
2077 error = nfs_vnop_access(&naa);
2078 /* FALLTHROUGH */
2079 default:
2080 /* unexpected error from cache_lookup */
2081 goto error_return;
2082 }
2083
2084 /* skip lookup, if we know who we are: "." or ".." */
2085 isdot = isdotdot = 0;
2086 if (cnp->cn_nameptr[0] == '.') {
2087 if (cnp->cn_namelen == 1)
2088 isdot = 1;
2089 if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.'))
2090 isdotdot = 1;
2091 }
2092 if (isdotdot || isdot) {
2093 fh.fh_len = 0;
2094 goto found;
2095 }
2096 if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
2097 /* we should never be looking things up in a trigger directory, return nothing */
2098 error = ENOENT;
2099 goto error_return;
2100 }
2101
2102 /* do we know this name is too long? */
2103 nmp = VTONMP(dvp);
2104 if (!nmp) {
2105 error = ENXIO;
2106 goto error_return;
2107 }
2108 if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
2109 (cnp->cn_namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
2110 error = ENAMETOOLONG;
2111 goto error_return;
2112 }
2113
2114 error = 0;
2115 newvp = NULLVP;
2116
2117 OSAddAtomic64(1, &nfsstats.lookupcache_misses);
2118
2119 error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
2120 nfsmout_if(error);
2121 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, &fh, &nvattr);
2122 nfsmout_if(error);
2123
2124 /* is the file handle the same as this directory's file handle? */
2125 isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len);
2126
2127 found:
2128 if (flags & ISLASTCN) {
2129 switch (cnp->cn_nameiop) {
2130 case DELETE:
2131 cnp->cn_flags &= ~MAKEENTRY;
2132 break;
2133 case RENAME:
2134 cnp->cn_flags &= ~MAKEENTRY;
2135 if (isdot) {
2136 error = EISDIR;
2137 goto error_return;
2138 }
2139 break;
2140 }
2141 }
2142
2143 if (isdotdot) {
2144 newvp = vnode_getparent(dvp);
2145 if (!newvp) {
2146 error = ENOENT;
2147 goto error_return;
2148 }
2149 } else if (isdot) {
2150 error = vnode_get(dvp);
2151 if (error)
2152 goto error_return;
2153 newvp = dvp;
2154 nfs_node_lock_force(dnp);
2155 if (fh.fh_len && (dnp->n_xid <= xid))
2156 nfs_loadattrcache(dnp, &nvattr, &xid, 0);
2157 nfs_node_unlock(dnp);
2158 } else {
2159 ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
2160 error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, ngflags, &np);
2161 if (error)
2162 goto error_return;
2163 newvp = NFSTOV(np);
2164 nfs_node_unlock(np);
2165 }
2166 *vpp = newvp;
2167
2168 nfsmout:
2169 if (error) {
2170 if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
2171 (flags & ISLASTCN) && (error == ENOENT)) {
2172 if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp))
2173 error = EROFS;
2174 else
2175 error = EJUSTRETURN;
2176 }
2177 }
2178 if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
2179 (cnp->cn_nameiop != CREATE) && negnamecache) {
2180 /* add a negative entry in the name cache */
2181 nfs_node_lock_force(dnp);
2182 cache_enter(dvp, NULL, cnp);
2183 dnp->n_flag |= NNEGNCENTRIES;
2184 nfs_node_unlock(dnp);
2185 }
2186 error_return:
2187 NVATTR_CLEANUP(&nvattr);
2188 if (!busyerror)
2189 nfs_node_clear_busy(dnp);
2190 if (error && *vpp) {
2191 vnode_put(*vpp);
2192 *vpp = NULLVP;
2193 }
2194 return (error);
2195 }
2196
2197 /*
2198 * NFS readlink call
2199 */
2200 int
2201 nfs_vnop_readlink(
2202 struct vnop_readlink_args /* {
2203 struct vnodeop_desc *a_desc;
2204 vnode_t a_vp;
2205 struct uio *a_uio;
2206 vfs_context_t a_context;
2207 } */ *ap)
2208 {
2209 vfs_context_t ctx = ap->a_context;
2210 nfsnode_t np = VTONFS(ap->a_vp);
2211 struct nfsmount *nmp;
2212 int error = 0, nfsvers;
2213 uint32_t buflen;
2214 uio_t uio = ap->a_uio;
2215 struct nfsbuf *bp = NULL;
2216
2217 if (vnode_vtype(ap->a_vp) != VLNK)
2218 return (EPERM);
2219
2220 if (uio_resid(uio) == 0)
2221 return (0);
2222 if (uio_offset(uio) < 0)
2223 return (EINVAL);
2224
2225 nmp = VTONMP(ap->a_vp);
2226 if (!nmp)
2227 return (ENXIO);
2228 nfsvers = nmp->nm_vers;
2229
2230 /* nfs_getattr() will check changed and purge caches */
2231 if ((error = nfs_getattr(np, NULL, ctx, NGA_CACHED))) {
2232 FSDBG(531, np, 0xd1e0001, 0, error);
2233 return (error);
2234 }
2235
2236 OSAddAtomic64(1, &nfsstats.biocache_readlinks);
2237 error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_READ, &bp);
2238 if (error) {
2239 FSDBG(531, np, 0xd1e0002, 0, error);
2240 return (error);
2241 }
2242 if (!ISSET(bp->nb_flags, NB_CACHE)) {
2243 OSAddAtomic64(1, &nfsstats.readlink_bios);
2244 buflen = bp->nb_bufsize;
2245 error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
2246 if (error) {
2247 SET(bp->nb_flags, NB_ERROR);
2248 bp->nb_error = error;
2249 } else {
2250 bp->nb_validoff = 0;
2251 bp->nb_validend = buflen;
2252 }
2253 }
2254 if (!error && (bp->nb_validend > 0))
2255 error = uiomove(bp->nb_data, bp->nb_validend, uio);
2256 FSDBG(531, np, bp->nb_validend, 0, error);
2257 nfs_buf_release(bp, 1);
2258 return (error);
2259 }
2260
2261 /*
2262 * Do a readlink RPC.
2263 */
2264 int
2265 nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
2266 {
2267 struct nfsmount *nmp;
2268 int error = 0, lockerror = ENOENT, nfsvers, status;
2269 uint32_t len;
2270 u_int64_t xid;
2271 struct nfsm_chain nmreq, nmrep;
2272
2273 nmp = NFSTONMP(np);
2274 if (!nmp)
2275 return (ENXIO);
2276 nfsvers = nmp->nm_vers;
2277 nfsm_chain_null(&nmreq);
2278 nfsm_chain_null(&nmrep);
2279
2280 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
2281 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2282 nfsm_chain_build_done(error, &nmreq);
2283 nfsmout_if(error);
2284 error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
2285 if ((lockerror = nfs_node_lock(np)))
2286 error = lockerror;
2287 if (nfsvers == NFS_VER3)
2288 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
2289 if (!error)
2290 error = status;
2291 nfsm_chain_get_32(error, &nmrep, len);
2292 nfsmout_if(error);
2293 if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
2294 error = EBADRPC;
2295 goto nfsmout;
2296 }
2297 if (len >= *buflenp) {
2298 if (np->n_size && (np->n_size < *buflenp))
2299 len = np->n_size;
2300 else
2301 len = *buflenp - 1;
2302 }
2303 nfsm_chain_get_opaque(error, &nmrep, len, buf);
2304 if (!error)
2305 *buflenp = len;
2306 nfsmout:
2307 if (!lockerror)
2308 nfs_node_unlock(np);
2309 nfsm_chain_cleanup(&nmreq);
2310 nfsm_chain_cleanup(&nmrep);
2311 return (error);
2312 }
2313
2314 /*
2315 * NFS read RPC call
2316 * Ditto above
2317 */
2318 int
2319 nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
2320 {
2321 struct nfsmount *nmp;
2322 int error = 0, nfsvers, eof = 0;
2323 size_t nmrsize, len, retlen;
2324 user_ssize_t tsiz;
2325 off_t txoffset;
2326 struct nfsreq rq, *req = &rq;
2327 uint32_t stategenid = 0, restart = 0;
2328
2329 FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
2330 nmp = NFSTONMP(np);
2331 if (!nmp)
2332 return (ENXIO);
2333 nfsvers = nmp->nm_vers;
2334 nmrsize = nmp->nm_rsize;
2335
2336 txoffset = uio_offset(uio);
2337 tsiz = uio_resid(uio);
2338 if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
2339 FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
2340 return (EFBIG);
2341 }
2342
2343 while (tsiz > 0) {
2344 len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
2345 FSDBG(536, np, txoffset, len, 0);
2346 if (np->n_flag & NREVOKE) {
2347 error = EIO;
2348 break;
2349 }
2350 if (nmp->nm_vers >= NFS_VER4)
2351 stategenid = nmp->nm_stategenid;
2352 error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
2353 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
2354 if (!error)
2355 error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
2356 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
2357 (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
2358 lck_mtx_lock(&nmp->nm_lock);
2359 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
2360 NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
2361 nfs_need_recover(nmp, error);
2362 }
2363 lck_mtx_unlock(&nmp->nm_lock);
2364 if (np->n_flag & NREVOKE) {
2365 error = EIO;
2366 } else {
2367 if (error == NFSERR_GRACE)
2368 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
2369 if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
2370 continue;
2371 }
2372 }
2373 if (error)
2374 break;
2375 txoffset += retlen;
2376 tsiz -= retlen;
2377 if (nfsvers != NFS_VER2) {
2378 if (eof || (retlen == 0))
2379 tsiz = 0;
2380 } else if (retlen < len)
2381 tsiz = 0;
2382 }
2383
2384 FSDBG_BOT(536, np, eof, uio_resid(uio), error);
2385 return (error);
2386 }
2387
2388 int
2389 nfs3_read_rpc_async(
2390 nfsnode_t np,
2391 off_t offset,
2392 size_t len,
2393 thread_t thd,
2394 kauth_cred_t cred,
2395 struct nfsreq_cbinfo *cb,
2396 struct nfsreq **reqp)
2397 {
2398 struct nfsmount *nmp;
2399 int error = 0, nfsvers;
2400 struct nfsm_chain nmreq;
2401
2402 nmp = NFSTONMP(np);
2403 if (!nmp)
2404 return (ENXIO);
2405 nfsvers = nmp->nm_vers;
2406
2407 nfsm_chain_null(&nmreq);
2408 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
2409 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2410 if (nfsvers == NFS_VER3) {
2411 nfsm_chain_add_64(error, &nmreq, offset);
2412 nfsm_chain_add_32(error, &nmreq, len);
2413 } else {
2414 nfsm_chain_add_32(error, &nmreq, offset);
2415 nfsm_chain_add_32(error, &nmreq, len);
2416 nfsm_chain_add_32(error, &nmreq, 0);
2417 }
2418 nfsm_chain_build_done(error, &nmreq);
2419 nfsmout_if(error);
2420 error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
2421 nfsmout:
2422 nfsm_chain_cleanup(&nmreq);
2423 return (error);
2424 }
2425
2426 int
2427 nfs3_read_rpc_async_finish(
2428 nfsnode_t np,
2429 struct nfsreq *req,
2430 uio_t uio,
2431 size_t *lenp,
2432 int *eofp)
2433 {
2434 int error = 0, lockerror, nfsvers, status, eof = 0;
2435 size_t retlen = 0;
2436 uint64_t xid;
2437 struct nfsmount *nmp;
2438 struct nfsm_chain nmrep;
2439
2440 nmp = NFSTONMP(np);
2441 if (!nmp) {
2442 nfs_request_async_cancel(req);
2443 return (ENXIO);
2444 }
2445 nfsvers = nmp->nm_vers;
2446
2447 nfsm_chain_null(&nmrep);
2448
2449 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2450 if (error == EINPROGRESS) /* async request restarted */
2451 return (error);
2452
2453 if ((lockerror = nfs_node_lock(np)))
2454 error = lockerror;
2455 if (nfsvers == NFS_VER3)
2456 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
2457 if (!error)
2458 error = status;
2459 if (nfsvers == NFS_VER3) {
2460 nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
2461 nfsm_chain_get_32(error, &nmrep, eof);
2462 } else {
2463 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
2464 }
2465 if (!lockerror)
2466 nfs_node_unlock(np);
2467 nfsm_chain_get_32(error, &nmrep, retlen);
2468 if ((nfsvers == NFS_VER2) && (retlen > *lenp))
2469 error = EBADRPC;
2470 nfsmout_if(error);
2471 error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
2472 if (eofp) {
2473 if (nfsvers == NFS_VER3) {
2474 if (!eof && !retlen)
2475 eof = 1;
2476 } else if (retlen < *lenp) {
2477 eof = 1;
2478 }
2479 *eofp = eof;
2480 }
2481 *lenp = MIN(retlen, *lenp);
2482 nfsmout:
2483 nfsm_chain_cleanup(&nmrep);
2484 return (error);
2485 }
2486
2487 /*
2488 * NFS write call
2489 */
2490 int
2491 nfs_vnop_write(
2492 struct vnop_write_args /* {
2493 struct vnodeop_desc *a_desc;
2494 vnode_t a_vp;
2495 struct uio *a_uio;
2496 int a_ioflag;
2497 vfs_context_t a_context;
2498 } */ *ap)
2499 {
2500 vfs_context_t ctx = ap->a_context;
2501 uio_t uio = ap->a_uio;
2502 vnode_t vp = ap->a_vp;
2503 nfsnode_t np = VTONFS(vp);
2504 int ioflag = ap->a_ioflag;
2505 struct nfsbuf *bp;
2506 struct nfsmount *nmp = VTONMP(vp);
2507 daddr64_t lbn;
2508 int biosize;
2509 int n, on, error = 0;
2510 off_t boff, start, end;
2511 uio_t auio;
2512 char auio_buf [ UIO_SIZEOF(1) ];
2513 thread_t thd;
2514 kauth_cred_t cred;
2515
2516 FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
2517
2518 if (vnode_vtype(vp) != VREG) {
2519 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
2520 return (EIO);
2521 }
2522
2523 thd = vfs_context_thread(ctx);
2524 cred = vfs_context_ucred(ctx);
2525
2526 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
2527
2528 if ((error = nfs_node_lock(np))) {
2529 nfs_data_unlock(np);
2530 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
2531 return (error);
2532 }
2533 np->n_wrbusy++;
2534
2535 if (np->n_flag & NWRITEERR) {
2536 error = np->n_error;
2537 np->n_flag &= ~NWRITEERR;
2538 }
2539 if (np->n_flag & NNEEDINVALIDATE) {
2540 np->n_flag &= ~NNEEDINVALIDATE;
2541 nfs_node_unlock(np);
2542 nfs_data_unlock(np);
2543 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
2544 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
2545 } else {
2546 nfs_node_unlock(np);
2547 }
2548 if (error)
2549 goto out;
2550
2551 biosize = nmp->nm_biosize;
2552
2553 if (ioflag & (IO_APPEND | IO_SYNC)) {
2554 nfs_node_lock_force(np);
2555 if (np->n_flag & NMODIFIED) {
2556 NATTRINVALIDATE(np);
2557 nfs_node_unlock(np);
2558 nfs_data_unlock(np);
2559 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
2560 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
2561 if (error) {
2562 FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
2563 goto out;
2564 }
2565 } else {
2566 nfs_node_unlock(np);
2567 }
2568 if (ioflag & IO_APPEND) {
2569 nfs_data_unlock(np);
2570 /* nfs_getattr() will check changed and purge caches */
2571 error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
2572 /* we'll be extending the file, so take the data lock exclusive */
2573 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2574 if (error) {
2575 FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
2576 goto out;
2577 }
2578 uio_setoffset(uio, np->n_size);
2579 }
2580 }
2581 if (uio_offset(uio) < 0) {
2582 error = EINVAL;
2583 FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
2584 goto out;
2585 }
2586 if (uio_resid(uio) == 0)
2587 goto out;
2588
2589 if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
2590 /* it looks like we'll be extending the file, so take the data lock exclusive */
2591 nfs_data_unlock(np);
2592 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2593 }
2594
2595 do {
2596 OSAddAtomic64(1, &nfsstats.biocache_writes);
2597 lbn = uio_offset(uio) / biosize;
2598 on = uio_offset(uio) % biosize;
2599 n = biosize - on;
2600 if (uio_resid(uio) < n)
2601 n = uio_resid(uio);
2602 again:
2603 /*
2604 * Get a cache block for writing. The range to be written is
2605 * (off..off+n) within the block. We ensure that the block
2606 * either has no dirty region or that the given range is
2607 * contiguous with the existing dirty region.
2608 */
2609 error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
2610 if (error)
2611 goto out;
2612 /* map the block because we know we're going to write to it */
2613 NFS_BUF_MAP(bp);
2614
2615 if (ioflag & IO_NOCACHE)
2616 SET(bp->nb_flags, NB_NOCACHE);
2617
2618 if (!IS_VALID_CRED(bp->nb_wcred)) {
2619 kauth_cred_ref(cred);
2620 bp->nb_wcred = cred;
2621 }
2622
2623 /*
2624 * If there's already a dirty range AND dirty pages in this block we
2625 * need to send a commit AND write the dirty pages before continuing.
2626 *
2627 * If there's already a dirty range OR dirty pages in this block
2628 * and the new write range is not contiguous with the existing range,
2629 * then force the buffer to be written out now.
2630 * (We used to just extend the dirty range to cover the valid,
2631 * but unwritten, data in between also. But writing ranges
2632 * of data that weren't actually written by an application
2633 * risks overwriting some other client's data with stale data
2634 * that's just masquerading as new written data.)
2635 */
2636 if (bp->nb_dirtyend > 0) {
2637 if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
2638 FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
2639 /* write/commit buffer "synchronously" */
2640 /* (NB_STABLE indicates that data writes should be FILESYNC) */
2641 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
2642 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
2643 error = nfs_buf_write(bp);
2644 if (error)
2645 goto out;
2646 goto again;
2647 }
2648 } else if (bp->nb_dirty) {
2649 int firstpg, lastpg;
2650 u_int32_t pagemask;
2651 /* calculate write range pagemask */
2652 firstpg = on/PAGE_SIZE;
2653 lastpg = (on+n-1)/PAGE_SIZE;
2654 pagemask = ((1 << (lastpg+1)) - 1) & ~((1 << firstpg) - 1);
2655 /* check if there are dirty pages outside the write range */
2656 if (bp->nb_dirty & ~pagemask) {
2657 FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
2658 /* write/commit buffer "synchronously" */
2659 /* (NB_STABLE indicates that data writes should be FILESYNC) */
2660 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
2661 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
2662 error = nfs_buf_write(bp);
2663 if (error)
2664 goto out;
2665 goto again;
2666 }
2667 /* if the first or last pages are already dirty */
2668 /* make sure that the dirty range encompasses those pages */
2669 if (NBPGDIRTY(bp,firstpg) || NBPGDIRTY(bp,lastpg)) {
2670 FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
2671 bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
2672 if (NBPGDIRTY(bp,lastpg)) {
2673 bp->nb_dirtyend = (lastpg+1) * PAGE_SIZE;
2674 /* clip to EOF */
2675 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
2676 bp->nb_dirtyend = np->n_size - NBOFF(bp);
2677 if (bp->nb_dirtyoff >= bp->nb_dirtyend)
2678 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2679 }
2680 } else
2681 bp->nb_dirtyend = on+n;
2682 }
2683 }
2684
2685 /*
2686 * Are we extending the size of the file with this write?
2687 * If so, update file size now that we have the block.
2688 * If there was a partial buf at the old eof, validate
2689 * and zero the new bytes.
2690 */
2691 if ((uio_offset(uio) + n) > (off_t)np->n_size) {
2692 struct nfsbuf *eofbp = NULL;
2693 daddr64_t eofbn = np->n_size / biosize;
2694 int eofoff = np->n_size % biosize;
2695 int neweofoff = (uio_offset(uio) + n) % biosize;
2696
2697 FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
2698
2699 if (eofoff && (eofbn < lbn) &&
2700 ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp))))
2701 goto out;
2702
2703 /* if we're extending within the same last block */
2704 /* and the block is flagged as being cached... */
2705 if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
2706 /* ...check that all pages in buffer are valid */
2707 int endpg = ((neweofoff ? neweofoff : biosize) - 1)/PAGE_SIZE;
2708 u_int32_t pagemask;
2709 /* pagemask only has to extend to last page being written to */
2710 pagemask = (1 << (endpg+1)) - 1;
2711 FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
2712 if ((bp->nb_valid & pagemask) != pagemask) {
2713 /* zerofill any hole */
2714 if (on > bp->nb_validend) {
2715 int i;
2716 for (i=bp->nb_validend/PAGE_SIZE; i <= (on - 1)/PAGE_SIZE; i++)
2717 NBPGVALID_SET(bp, i);
2718 NFS_BUF_MAP(bp);
2719 FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
2720 bzero((char *)bp->nb_data + bp->nb_validend,
2721 on - bp->nb_validend);
2722 }
2723 /* zerofill any trailing data in the last page */
2724 if (neweofoff) {
2725 NFS_BUF_MAP(bp);
2726 FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
2727 bzero((char *)bp->nb_data + neweofoff,
2728 PAGE_SIZE - (neweofoff & PAGE_MASK));
2729 }
2730 }
2731 }
2732 np->n_size = uio_offset(uio) + n;
2733 nfs_node_lock_force(np);
2734 CLR(np->n_flag, NUPDATESIZE);
2735 np->n_flag |= NMODIFIED;
2736 nfs_node_unlock(np);
2737 FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
2738 ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
2739 if (eofbp) {
2740 /*
2741 * We may need to zero any previously invalid data
2742 * after the old EOF in the previous EOF buffer.
2743 *
2744 * For the old last page, don't zero bytes if there
2745 * are invalid bytes in that page (i.e. the page isn't
2746 * currently valid).
2747 * For pages after the old last page, zero them and
2748 * mark them as valid.
2749 */
2750 char *d;
2751 int i;
2752 if (ioflag & IO_NOCACHE)
2753 SET(eofbp->nb_flags, NB_NOCACHE);
2754 NFS_BUF_MAP(eofbp);
2755 FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
2756 d = eofbp->nb_data;
2757 i = eofoff/PAGE_SIZE;
2758 while (eofoff < biosize) {
2759 int poff = eofoff & PAGE_MASK;
2760 if (!poff || NBPGVALID(eofbp,i)) {
2761 bzero(d + eofoff, PAGE_SIZE - poff);
2762 NBPGVALID_SET(eofbp, i);
2763 }
2764 if (bp->nb_validend == eofoff)
2765 bp->nb_validend += PAGE_SIZE - poff;
2766 eofoff += PAGE_SIZE - poff;
2767 i++;
2768 }
2769 nfs_buf_release(eofbp, 1);
2770 }
2771 }
2772 /*
2773 * If dirtyend exceeds file size, chop it down. This should
2774 * not occur unless there is a race.
2775 */
2776 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
2777 bp->nb_dirtyend = np->n_size - NBOFF(bp);
2778 if (bp->nb_dirtyoff >= bp->nb_dirtyend)
2779 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2780 }
2781 /*
2782 * UBC doesn't handle partial pages, so we need to make sure
2783 * that any pages left in the page cache are completely valid.
2784 *
2785 * Writes that are smaller than a block are delayed if they
2786 * don't extend to the end of the block.
2787 *
2788 * If the block isn't (completely) cached, we may need to read
2789 * in some parts of pages that aren't covered by the write.
2790 * If the write offset (on) isn't page aligned, we'll need to
2791 * read the start of the first page being written to. Likewise,
2792 * if the offset of the end of the write (on+n) isn't page aligned,
2793 * we'll need to read the end of the last page being written to.
2794 *
2795 * Notes:
2796 * We don't want to read anything we're just going to write over.
2797 * We don't want to read anything we're just going drop when the
2798 * I/O is complete (i.e. don't do reads for NOCACHE requests).
2799 * We don't want to issue multiple I/Os if we don't have to
2800 * (because they're synchronous rpcs).
2801 * We don't want to read anything we already have modified in the
2802 * page cache.
2803 */
2804 if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
2805 int firstpg, lastpg, dirtypg;
2806 int firstpgoff, lastpgoff;
2807 start = end = -1;
2808 firstpg = on/PAGE_SIZE;
2809 firstpgoff = on & PAGE_MASK;
2810 lastpg = (on+n-1)/PAGE_SIZE;
2811 lastpgoff = (on+n) & PAGE_MASK;
2812 if (firstpgoff && !NBPGVALID(bp,firstpg)) {
2813 /* need to read start of first page */
2814 start = firstpg * PAGE_SIZE;
2815 end = start + firstpgoff;
2816 }
2817 if (lastpgoff && !NBPGVALID(bp,lastpg)) {
2818 /* need to read end of last page */
2819 if (start < 0)
2820 start = (lastpg * PAGE_SIZE) + lastpgoff;
2821 end = (lastpg + 1) * PAGE_SIZE;
2822 }
2823 if (ISSET(bp->nb_flags, NB_NOCACHE)) {
2824 /*
2825 * For nocache writes, if there is any partial page at the
2826 * start or end of the write range, then we do the write
2827 * synchronously to make sure that we can drop the data
2828 * from the cache as soon as the WRITE finishes. Normally,
2829 * we would do an unstable write and not drop the data until
2830 * it was committed. But doing that here would risk allowing
2831 * invalid data to be read from the cache between the WRITE
2832 * and the COMMIT.
2833 * (NB_STABLE indicates that data writes should be FILESYNC)
2834 */
2835 if (end > start)
2836 SET(bp->nb_flags, NB_STABLE);
2837 goto skipread;
2838 }
2839 if (end > start) {
2840 /* need to read the data in range: start...end-1 */
2841
2842 /* first, check for dirty pages in between */
2843 /* if there are, we'll have to do two reads because */
2844 /* we don't want to overwrite the dirty pages. */
2845 for (dirtypg=start/PAGE_SIZE; dirtypg <= (end-1)/PAGE_SIZE; dirtypg++)
2846 if (NBPGDIRTY(bp,dirtypg))
2847 break;
2848
2849 /* if start is at beginning of page, try */
2850 /* to get any preceeding pages as well. */
2851 if (!(start & PAGE_MASK)) {
2852 /* stop at next dirty/valid page or start of block */
2853 for (; start > 0; start-=PAGE_SIZE)
2854 if (NBPGVALID(bp,((start-1)/PAGE_SIZE)))
2855 break;
2856 }
2857
2858 NFS_BUF_MAP(bp);
2859 /* setup uio for read(s) */
2860 boff = NBOFF(bp);
2861 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
2862 &auio_buf, sizeof(auio_buf));
2863
2864 if (dirtypg <= (end-1)/PAGE_SIZE) {
2865 /* there's a dirty page in the way, so just do two reads */
2866 /* we'll read the preceding data here */
2867 uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
2868 uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
2869 error = nfs_read_rpc(np, auio, ctx);
2870 if (error) {
2871 /* couldn't read the data, so treat buffer as synchronous NOCACHE */
2872 SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
2873 goto skipread;
2874 }
2875 if (uio_resid(auio) > 0) {
2876 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
2877 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
2878 }
2879 if (!error) {
2880 /* update validoff/validend if necessary */
2881 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
2882 bp->nb_validoff = start;
2883 if ((bp->nb_validend < 0) || (bp->nb_validend < on))
2884 bp->nb_validend = on;
2885 if ((off_t)np->n_size > boff + bp->nb_validend)
2886 bp->nb_validend = min(np->n_size - (boff + start), biosize);
2887 /* validate any pages before the write offset */
2888 for (; start < on/PAGE_SIZE; start+=PAGE_SIZE)
2889 NBPGVALID_SET(bp, start/PAGE_SIZE);
2890 }
2891 /* adjust start to read any trailing data */
2892 start = on+n;
2893 }
2894
2895 /* if end is at end of page, try to */
2896 /* get any following pages as well. */
2897 if (!(end & PAGE_MASK)) {
2898 /* stop at next valid page or end of block */
2899 for (; end < biosize; end+=PAGE_SIZE)
2900 if (NBPGVALID(bp,end/PAGE_SIZE))
2901 break;
2902 }
2903
2904 if (((boff+start) >= (off_t)np->n_size) ||
2905 ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
2906 /*
2907 * Either this entire read is beyond the current EOF
2908 * or the range that we won't be modifying (on+n...end)
2909 * is all beyond the current EOF.
2910 * No need to make a trip across the network to
2911 * read nothing. So, just zero the buffer instead.
2912 */
2913 FSDBG(516, bp, start, end - start, 0xd00dee00);
2914 bzero(bp->nb_data + start, end - start);
2915 error = 0;
2916 } else {
2917 /* now we'll read the (rest of the) data */
2918 uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
2919 uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
2920 error = nfs_read_rpc(np, auio, ctx);
2921 if (error) {
2922 /* couldn't read the data, so treat buffer as synchronous NOCACHE */
2923 SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
2924 goto skipread;
2925 }
2926 if (uio_resid(auio) > 0) {
2927 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
2928 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
2929 }
2930 }
2931 if (!error) {
2932 /* update validoff/validend if necessary */
2933 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
2934 bp->nb_validoff = start;
2935 if ((bp->nb_validend < 0) || (bp->nb_validend < end))
2936 bp->nb_validend = end;
2937 if ((off_t)np->n_size > boff + bp->nb_validend)
2938 bp->nb_validend = min(np->n_size - (boff + start), biosize);
2939 /* validate any pages before the write offset's page */
2940 for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE)
2941 NBPGVALID_SET(bp, start/PAGE_SIZE);
2942 /* validate any pages after the range of pages being written to */
2943 for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE)
2944 NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
2945 }
2946 /* Note: pages being written to will be validated when written */
2947 }
2948 }
2949 skipread:
2950
2951 if (ISSET(bp->nb_flags, NB_ERROR)) {
2952 error = bp->nb_error;
2953 nfs_buf_release(bp, 1);
2954 goto out;
2955 }
2956
2957 nfs_node_lock_force(np);
2958 np->n_flag |= NMODIFIED;
2959 nfs_node_unlock(np);
2960
2961 NFS_BUF_MAP(bp);
2962 error = uiomove((char *)bp->nb_data + on, n, uio);
2963 if (error) {
2964 SET(bp->nb_flags, NB_ERROR);
2965 nfs_buf_release(bp, 1);
2966 goto out;
2967 }
2968
2969 /* validate any pages written to */
2970 start = on & ~PAGE_MASK;
2971 for (; start < on+n; start += PAGE_SIZE) {
2972 NBPGVALID_SET(bp, start/PAGE_SIZE);
2973 /*
2974 * This may seem a little weird, but we don't actually set the
2975 * dirty bits for writes. This is because we keep the dirty range
2976 * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
2977 * delayed writes, when we give the pages back to the VM we don't
2978 * want to keep them marked dirty, because when we later write the
2979 * buffer we won't be able to tell which pages were written dirty
2980 * and which pages were mmapped and dirtied.
2981 */
2982 }
2983 if (bp->nb_dirtyend > 0) {
2984 bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
2985 bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
2986 } else {
2987 bp->nb_dirtyoff = on;
2988 bp->nb_dirtyend = on + n;
2989 }
2990 if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
2991 bp->nb_validoff > bp->nb_dirtyend) {
2992 bp->nb_validoff = bp->nb_dirtyoff;
2993 bp->nb_validend = bp->nb_dirtyend;
2994 } else {
2995 bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
2996 bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
2997 }
2998 if (!ISSET(bp->nb_flags, NB_CACHE))
2999 nfs_buf_normalize_valid_range(np, bp);
3000
3001 /*
3002 * Since this block is being modified, it must be written
3003 * again and not just committed.
3004 */
3005 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
3006 nfs_node_lock_force(np);
3007 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
3008 np->n_needcommitcnt--;
3009 CHECK_NEEDCOMMITCNT(np);
3010 }
3011 CLR(bp->nb_flags, NB_NEEDCOMMIT);
3012 nfs_node_unlock(np);
3013 }
3014
3015 if (ioflag & IO_SYNC) {
3016 error = nfs_buf_write(bp);
3017 if (error)
3018 goto out;
3019 } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
3020 (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
3021 SET(bp->nb_flags, NB_ASYNC);
3022 error = nfs_buf_write(bp);
3023 if (error)
3024 goto out;
3025 } else {
3026 /* If the block wasn't already delayed: charge for the write */
3027 if (!ISSET(bp->nb_flags, NB_DELWRI)) {
3028 proc_t p = vfs_context_proc(ctx);
3029 if (p && p->p_stats)
3030 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
3031 }
3032 nfs_buf_write_delayed(bp);
3033 }
3034 if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS)
3035 nfs_flushcommits(np, 1);
3036
3037 } while (uio_resid(uio) > 0 && n > 0);
3038
3039 out:
3040 nfs_node_lock_force(np);
3041 np->n_wrbusy--;
3042 nfs_node_unlock(np);
3043 nfs_data_unlock(np);
3044 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
3045 return (error);
3046 }
3047
3048
3049 /*
3050 * NFS write call
3051 */
3052 int
3053 nfs_write_rpc(
3054 nfsnode_t np,
3055 uio_t uio,
3056 vfs_context_t ctx,
3057 int *iomodep,
3058 uint64_t *wverfp)
3059 {
3060 return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
3061 }
3062
3063 int
3064 nfs_write_rpc2(
3065 nfsnode_t np,
3066 uio_t uio,
3067 thread_t thd,
3068 kauth_cred_t cred,
3069 int *iomodep,
3070 uint64_t *wverfp)
3071 {
3072 struct nfsmount *nmp;
3073 int error = 0, nfsvers;
3074 int backup, wverfset, commit, committed;
3075 uint64_t wverf = 0, wverf2;
3076 size_t nmwsize, totalsize, tsiz, len, rlen;
3077 struct nfsreq rq, *req = &rq;
3078 uint32_t stategenid = 0, vrestart = 0, restart = 0;
3079
3080 #if DIAGNOSTIC
3081 /* XXX limitation based on need to back up uio on short write */
3082 if (uio_iovcnt(uio) != 1)
3083 panic("nfs3_write_rpc: iovcnt > 1");
3084 #endif
3085 FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
3086 nmp = NFSTONMP(np);
3087 if (!nmp)
3088 return (ENXIO);
3089 nfsvers = nmp->nm_vers;
3090 nmwsize = nmp->nm_wsize;
3091
3092 wverfset = 0;
3093 committed = NFS_WRITE_FILESYNC;
3094
3095 totalsize = tsiz = uio_resid(uio);
3096 if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
3097 FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
3098 return (EFBIG);
3099 }
3100
3101 while (tsiz > 0) {
3102 len = (tsiz > nmwsize) ? nmwsize : tsiz;
3103 FSDBG(537, np, uio_offset(uio), len, 0);
3104 if (np->n_flag & NREVOKE) {
3105 error = EIO;
3106 break;
3107 }
3108 if (nmp->nm_vers >= NFS_VER4)
3109 stategenid = nmp->nm_stategenid;
3110 error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
3111 if (!error)
3112 error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
3113 nmp = NFSTONMP(np);
3114 if (!nmp)
3115 error = ENXIO;
3116 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
3117 (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
3118 lck_mtx_lock(&nmp->nm_lock);
3119 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
3120 NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
3121 nfs_need_recover(nmp, error);
3122 }
3123 lck_mtx_unlock(&nmp->nm_lock);
3124 if (np->n_flag & NREVOKE) {
3125 error = EIO;
3126 } else {
3127 if (error == NFSERR_GRACE)
3128 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
3129 if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
3130 continue;
3131 }
3132 }
3133 if (error)
3134 break;
3135 if (nfsvers == NFS_VER2) {
3136 tsiz -= len;
3137 continue;
3138 }
3139
3140 /* check for a short write */
3141 if (rlen < len) {
3142 backup = len - rlen;
3143 uio_pushback(uio, backup);
3144 len = rlen;
3145 }
3146
3147 /* return lowest commit level returned */
3148 if (commit < committed)
3149 committed = commit;
3150
3151 tsiz -= len;
3152
3153 /* check write verifier */
3154 if (!wverfset) {
3155 wverf = wverf2;
3156 wverfset = 1;
3157 } else if (wverf != wverf2) {
3158 /* verifier changed, so we need to restart all the writes */
3159 if (++vrestart > 100) {
3160 /* give up after too many restarts */
3161 error = EIO;
3162 break;
3163 }
3164 backup = totalsize - tsiz;
3165 uio_pushback(uio, backup);
3166 committed = NFS_WRITE_FILESYNC;
3167 wverfset = 0;
3168 tsiz = totalsize;
3169 }
3170 }
3171 if (wverfset && wverfp)
3172 *wverfp = wverf;
3173 *iomodep = committed;
3174 if (error)
3175 uio_setresid(uio, tsiz);
3176 FSDBG_BOT(537, np, committed, uio_resid(uio), error);
3177 return (error);
3178 }
3179
3180 int
3181 nfs3_write_rpc_async(
3182 nfsnode_t np,
3183 uio_t uio,
3184 size_t len,
3185 thread_t thd,
3186 kauth_cred_t cred,
3187 int iomode,
3188 struct nfsreq_cbinfo *cb,
3189 struct nfsreq **reqp)
3190 {
3191 struct nfsmount *nmp;
3192 mount_t mp;
3193 int error = 0, nfsvers;
3194 struct nfsm_chain nmreq;
3195
3196 nmp = NFSTONMP(np);
3197 if (!nmp)
3198 return (ENXIO);
3199 nfsvers = nmp->nm_vers;
3200
3201 /* for async mounts, don't bother sending sync write requests */
3202 if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
3203 ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
3204 iomode = NFS_WRITE_UNSTABLE;
3205
3206 nfsm_chain_null(&nmreq);
3207 nfsm_chain_build_alloc_init(error, &nmreq,
3208 NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
3209 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
3210 if (nfsvers == NFS_VER3) {
3211 nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
3212 nfsm_chain_add_32(error, &nmreq, len);
3213 nfsm_chain_add_32(error, &nmreq, iomode);
3214 } else {
3215 nfsm_chain_add_32(error, &nmreq, 0);
3216 nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
3217 nfsm_chain_add_32(error, &nmreq, 0);
3218 }
3219 nfsm_chain_add_32(error, &nmreq, len);
3220 nfsmout_if(error);
3221 error = nfsm_chain_add_uio(&nmreq, uio, len);
3222 nfsm_chain_build_done(error, &nmreq);
3223 nfsmout_if(error);
3224 error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, NULL, 0, cb, reqp);
3225 nfsmout:
3226 nfsm_chain_cleanup(&nmreq);
3227 return (error);
3228 }
3229
3230 int
3231 nfs3_write_rpc_async_finish(
3232 nfsnode_t np,
3233 struct nfsreq *req,
3234 int *iomodep,
3235 size_t *rlenp,
3236 uint64_t *wverfp)
3237 {
3238 struct nfsmount *nmp;
3239 int error = 0, lockerror = ENOENT, nfsvers, status;
3240 int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
3241 u_int64_t xid, wverf;
3242 mount_t mp;
3243 struct nfsm_chain nmrep;
3244
3245 nmp = NFSTONMP(np);
3246 if (!nmp) {
3247 nfs_request_async_cancel(req);
3248 return (ENXIO);
3249 }
3250 nfsvers = nmp->nm_vers;
3251
3252 nfsm_chain_null(&nmrep);
3253
3254 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3255 if (error == EINPROGRESS) /* async request restarted */
3256 return (error);
3257 nmp = NFSTONMP(np);
3258 if (!nmp)
3259 error = ENXIO;
3260 if (!error && (lockerror = nfs_node_lock(np)))
3261 error = lockerror;
3262 if (nfsvers == NFS_VER3) {
3263 struct timespec premtime = { 0, 0 };
3264 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
3265 if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
3266 updatemtime = 1;
3267 if (!error)
3268 error = status;
3269 nfsm_chain_get_32(error, &nmrep, rlen);
3270 nfsmout_if(error);
3271 *rlenp = rlen;
3272 if (rlen <= 0)
3273 error = NFSERR_IO;
3274 nfsm_chain_get_32(error, &nmrep, committed);
3275 nfsm_chain_get_64(error, &nmrep, wverf);
3276 nfsmout_if(error);
3277 if (wverfp)
3278 *wverfp = wverf;
3279 lck_mtx_lock(&nmp->nm_lock);
3280 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
3281 nmp->nm_verf = wverf;
3282 nmp->nm_state |= NFSSTA_HASWRITEVERF;
3283 } else if (nmp->nm_verf != wverf) {
3284 nmp->nm_verf = wverf;
3285 }
3286 lck_mtx_unlock(&nmp->nm_lock);
3287 } else {
3288 if (!error)
3289 error = status;
3290 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
3291 nfsmout_if(error);
3292 }
3293 if (updatemtime)
3294 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
3295 nfsmout:
3296 if (!lockerror)
3297 nfs_node_unlock(np);
3298 nfsm_chain_cleanup(&nmrep);
3299 if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
3300 ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
3301 committed = NFS_WRITE_FILESYNC;
3302 *iomodep = committed;
3303 return (error);
3304 }
3305
3306 /*
3307 * NFS mknod vnode op
3308 *
3309 * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
3310 * mode set to specify the file type and the size field for rdev.
3311 */
3312 int
3313 nfs3_vnop_mknod(
3314 struct vnop_mknod_args /* {
3315 struct vnodeop_desc *a_desc;
3316 vnode_t a_dvp;
3317 vnode_t *a_vpp;
3318 struct componentname *a_cnp;
3319 struct vnode_attr *a_vap;
3320 vfs_context_t a_context;
3321 } */ *ap)
3322 {
3323 vnode_t dvp = ap->a_dvp;
3324 vnode_t *vpp = ap->a_vpp;
3325 struct componentname *cnp = ap->a_cnp;
3326 struct vnode_attr *vap = ap->a_vap;
3327 vfs_context_t ctx = ap->a_context;
3328 vnode_t newvp = NULL;
3329 nfsnode_t np = NULL;
3330 struct nfsmount *nmp;
3331 nfsnode_t dnp = VTONFS(dvp);
3332 struct nfs_vattr nvattr;
3333 fhandle_t fh;
3334 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
3335 struct timespec premtime = { 0, 0 };
3336 u_int32_t rdev;
3337 u_int64_t xid, dxid;
3338 int nfsvers, gotuid, gotgid;
3339 struct nfsm_chain nmreq, nmrep;
3340 struct nfsreq rq, *req = &rq;
3341
3342 nmp = VTONMP(dvp);
3343 if (!nmp)
3344 return (ENXIO);
3345 nfsvers = nmp->nm_vers;
3346
3347 if (!VATTR_IS_ACTIVE(vap, va_type))
3348 return (EINVAL);
3349 if (vap->va_type == VCHR || vap->va_type == VBLK) {
3350 if (!VATTR_IS_ACTIVE(vap, va_rdev))
3351 return (EINVAL);
3352 rdev = vap->va_rdev;
3353 } else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
3354 rdev = 0xffffffff;
3355 else {
3356 return (ENOTSUP);
3357 }
3358 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3359 return (ENAMETOOLONG);
3360
3361 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
3362
3363 VATTR_SET_SUPPORTED(vap, va_mode);
3364 VATTR_SET_SUPPORTED(vap, va_uid);
3365 VATTR_SET_SUPPORTED(vap, va_gid);
3366 VATTR_SET_SUPPORTED(vap, va_data_size);
3367 VATTR_SET_SUPPORTED(vap, va_access_time);
3368 VATTR_SET_SUPPORTED(vap, va_modify_time);
3369 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3370 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3371
3372 nfsm_chain_null(&nmreq);
3373 nfsm_chain_null(&nmrep);
3374
3375 nfsm_chain_build_alloc_init(error, &nmreq,
3376 NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
3377 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
3378 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3379 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
3380 if (nfsvers == NFS_VER3) {
3381 nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
3382 nfsm_chain_add_v3sattr(error, &nmreq, vap);
3383 if (vap->va_type == VCHR || vap->va_type == VBLK) {
3384 nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
3385 nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
3386 }
3387 } else {
3388 nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
3389 }
3390 nfsm_chain_build_done(error, &nmreq);
3391 if (!error)
3392 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
3393 nfsmout_if(error);
3394
3395 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKNOD,
3396 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
3397 if (!error)
3398 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3399
3400 if ((lockerror = nfs_node_lock(dnp)))
3401 error = lockerror;
3402 /* XXX no EEXIST kludge here? */
3403 dxid = xid;
3404 if (!error && !status) {
3405 if (dnp->n_flag & NNEGNCENTRIES) {
3406 dnp->n_flag &= ~NNEGNCENTRIES;
3407 cache_purge_negatives(dvp);
3408 }
3409 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
3410 }
3411 if (nfsvers == NFS_VER3)
3412 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3413 if (!error)
3414 error = status;
3415 nfsmout:
3416 nfsm_chain_cleanup(&nmreq);
3417 nfsm_chain_cleanup(&nmrep);
3418
3419 if (!lockerror) {
3420 dnp->n_flag |= NMODIFIED;
3421 /* if directory hadn't changed, update namecache mtime */
3422 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3423 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3424 nfs_node_unlock(dnp);
3425 /* nfs_getattr() will check changed and purge caches */
3426 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
3427 }
3428
3429 if (!error && fh.fh_len)
3430 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
3431 if (!error && !np)
3432 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3433 if (!error && np)
3434 newvp = NFSTOV(np);
3435 if (!busyerror)
3436 nfs_node_clear_busy(dnp);
3437
3438 if (!error && (gotuid || gotgid) &&
3439 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
3440 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3441 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3442 /* clear ID bits if server didn't use them (or we can't tell) */
3443 VATTR_CLEAR_SUPPORTED(vap, va_uid);
3444 VATTR_CLEAR_SUPPORTED(vap, va_gid);
3445 }
3446 if (error) {
3447 if (newvp) {
3448 nfs_node_unlock(np);
3449 vnode_put(newvp);
3450 }
3451 } else {
3452 *vpp = newvp;
3453 nfs_node_unlock(np);
3454 }
3455 return (error);
3456 }
3457
3458 static uint32_t create_verf;
3459 /*
3460 * NFS file create call
3461 */
3462 int
3463 nfs3_vnop_create(
3464 struct vnop_create_args /* {
3465 struct vnodeop_desc *a_desc;
3466 vnode_t a_dvp;
3467 vnode_t *a_vpp;
3468 struct componentname *a_cnp;
3469 struct vnode_attr *a_vap;
3470 vfs_context_t a_context;
3471 } */ *ap)
3472 {
3473 vfs_context_t ctx = ap->a_context;
3474 vnode_t dvp = ap->a_dvp;
3475 struct vnode_attr *vap = ap->a_vap;
3476 struct componentname *cnp = ap->a_cnp;
3477 struct nfs_vattr nvattr;
3478 fhandle_t fh;
3479 nfsnode_t np = NULL;
3480 struct nfsmount *nmp;
3481 nfsnode_t dnp = VTONFS(dvp);
3482 vnode_t newvp = NULL;
3483 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0, fmode = 0;
3484 struct timespec premtime = { 0, 0 };
3485 int nfsvers, gotuid, gotgid;
3486 u_int64_t xid, dxid;
3487 uint32_t val;
3488 struct nfsm_chain nmreq, nmrep;
3489 struct nfsreq rq, *req = &rq;
3490 struct nfs_dulookup dul;
3491
3492 nmp = VTONMP(dvp);
3493 if (!nmp)
3494 return (ENXIO);
3495 nfsvers = nmp->nm_vers;
3496
3497 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3498 return (ENAMETOOLONG);
3499
3500 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
3501
3502 VATTR_SET_SUPPORTED(vap, va_mode);
3503 VATTR_SET_SUPPORTED(vap, va_uid);
3504 VATTR_SET_SUPPORTED(vap, va_gid);
3505 VATTR_SET_SUPPORTED(vap, va_data_size);
3506 VATTR_SET_SUPPORTED(vap, va_access_time);
3507 VATTR_SET_SUPPORTED(vap, va_modify_time);
3508 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3509 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3510
3511 if (vap->va_vaflags & VA_EXCLUSIVE) {
3512 fmode |= O_EXCL;
3513 if (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time))
3514 vap->va_vaflags |= VA_UTIMES_NULL;
3515 }
3516
3517 again:
3518 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
3519 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
3520
3521 nfsm_chain_null(&nmreq);
3522 nfsm_chain_null(&nmrep);
3523
3524 nfsm_chain_build_alloc_init(error, &nmreq,
3525 NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
3526 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
3527 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3528 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
3529 if (nfsvers == NFS_VER3) {
3530 if (fmode & O_EXCL) {
3531 nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
3532 lck_rw_lock_shared(in_ifaddr_rwlock);
3533 if (!TAILQ_EMPTY(&in_ifaddrhead))
3534 val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
3535 else
3536 val = create_verf;
3537 lck_rw_done(in_ifaddr_rwlock);
3538 nfsm_chain_add_32(error, &nmreq, val);
3539 ++create_verf;
3540 nfsm_chain_add_32(error, &nmreq, create_verf);
3541 } else {
3542 nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
3543 nfsm_chain_add_v3sattr(error, &nmreq, vap);
3544 }
3545 } else {
3546 nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
3547 }
3548 nfsm_chain_build_done(error, &nmreq);
3549 nfsmout_if(error);
3550
3551 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
3552 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
3553 if (!error) {
3554 nfs_dulookup_start(&dul, dnp, ctx);
3555 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3556 }
3557
3558 if ((lockerror = nfs_node_lock(dnp)))
3559 error = lockerror;
3560 dxid = xid;
3561 if (!error && !status) {
3562 if (dnp->n_flag & NNEGNCENTRIES) {
3563 dnp->n_flag &= ~NNEGNCENTRIES;
3564 cache_purge_negatives(dvp);
3565 }
3566 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
3567 }
3568 if (nfsvers == NFS_VER3)
3569 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3570 if (!error)
3571 error = status;
3572 nfsmout:
3573 nfsm_chain_cleanup(&nmreq);
3574 nfsm_chain_cleanup(&nmrep);
3575
3576 if (!lockerror) {
3577 dnp->n_flag |= NMODIFIED;
3578 /* if directory hadn't changed, update namecache mtime */
3579 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3580 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3581 nfs_node_unlock(dnp);
3582 /* nfs_getattr() will check changed and purge caches */
3583 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
3584 }
3585
3586 if (!error && fh.fh_len)
3587 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
3588 if (!error && !np)
3589 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3590 if (!error && np)
3591 newvp = NFSTOV(np);
3592
3593 nfs_dulookup_finish(&dul, dnp, ctx);
3594 if (!busyerror)
3595 nfs_node_clear_busy(dnp);
3596
3597 if (error) {
3598 if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
3599 fmode &= ~O_EXCL;
3600 goto again;
3601 }
3602 if (newvp) {
3603 nfs_node_unlock(np);
3604 vnode_put(newvp);
3605 }
3606 } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
3607 nfs_node_unlock(np);
3608 error = nfs3_setattr_rpc(np, vap, ctx);
3609 if (error && (gotuid || gotgid)) {
3610 /* it's possible the server didn't like our attempt to set IDs. */
3611 /* so, let's try it again without those */
3612 VATTR_CLEAR_ACTIVE(vap, va_uid);
3613 VATTR_CLEAR_ACTIVE(vap, va_gid);
3614 error = nfs3_setattr_rpc(np, vap, ctx);
3615 }
3616 if (error)
3617 vnode_put(newvp);
3618 else
3619 nfs_node_lock_force(np);
3620 }
3621 if (!error)
3622 *ap->a_vpp = newvp;
3623 if (!error && (gotuid || gotgid) &&
3624 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
3625 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3626 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3627 /* clear ID bits if server didn't use them (or we can't tell) */
3628 VATTR_CLEAR_SUPPORTED(vap, va_uid);
3629 VATTR_CLEAR_SUPPORTED(vap, va_gid);
3630 }
3631 if (!error)
3632 nfs_node_unlock(np);
3633 return (error);
3634 }
3635
3636 /*
3637 * NFS file remove call
3638 * To try and make NFS semantics closer to UFS semantics, a file that has
3639 * other processes using the vnode is renamed instead of removed and then
3640 * removed later on the last close.
3641 * - If vnode_isinuse()
3642 * If a rename is not already in the works
3643 * call nfs_sillyrename() to set it up
3644 * else
3645 * do the remove RPC
3646 */
3647 int
3648 nfs_vnop_remove(
3649 struct vnop_remove_args /* {
3650 struct vnodeop_desc *a_desc;
3651 vnode_t a_dvp;
3652 vnode_t a_vp;
3653 struct componentname *a_cnp;
3654 int a_flags;
3655 vfs_context_t a_context;
3656 } */ *ap)
3657 {
3658 vfs_context_t ctx = ap->a_context;
3659 vnode_t vp = ap->a_vp;
3660 vnode_t dvp = ap->a_dvp;
3661 struct componentname *cnp = ap->a_cnp;
3662 nfsnode_t dnp = VTONFS(dvp);
3663 nfsnode_t np = VTONFS(vp);
3664 int error = 0, nfsvers, namedattrs, inuse, gotattr = 0, flushed = 0, setsize = 0;
3665 struct nfs_vattr nvattr;
3666 struct nfsmount *nmp;
3667 struct nfs_dulookup dul;
3668
3669 /* XXX prevent removing a sillyrenamed file? */
3670
3671 nmp = NFSTONMP(dnp);
3672 if (!nmp)
3673 return (ENXIO);
3674 nfsvers = nmp->nm_vers;
3675 namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
3676
3677 again_relock:
3678 error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
3679 if (error)
3680 return (error);
3681
3682 /* lock the node while we remove the file */
3683 lck_mtx_lock(nfs_node_hash_mutex);
3684 while (np->n_hflag & NHLOCKED) {
3685 np->n_hflag |= NHLOCKWANT;
3686 msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
3687 }
3688 np->n_hflag |= NHLOCKED;
3689 lck_mtx_unlock(nfs_node_hash_mutex);
3690
3691 if (!namedattrs)
3692 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
3693 again:
3694 inuse = vnode_isinuse(vp, 0);
3695 if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
3696 /* Caller requested Carbon delete semantics, but file is busy */
3697 error = EBUSY;
3698 goto out;
3699 }
3700 if (inuse && !gotattr) {
3701 if (nfs_getattr(np, &nvattr, ctx, NGA_CACHED))
3702 nvattr.nva_nlink = 1;
3703 gotattr = 1;
3704 goto again;
3705 }
3706 if (!inuse || (np->n_sillyrename && (nvattr.nva_nlink > 1))) {
3707
3708 if (!inuse && !flushed) { /* flush all the buffers first */
3709 /* unlock the node */
3710 lck_mtx_lock(nfs_node_hash_mutex);
3711 np->n_hflag &= ~NHLOCKED;
3712 if (np->n_hflag & NHLOCKWANT) {
3713 np->n_hflag &= ~NHLOCKWANT;
3714 wakeup(np);
3715 }
3716 lck_mtx_unlock(nfs_node_hash_mutex);
3717 nfs_node_clear_busy2(dnp, np);
3718 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
3719 FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
3720 flushed = 1;
3721 if (error == EINTR) {
3722 nfs_node_lock_force(np);
3723 NATTRINVALIDATE(np);
3724 nfs_node_unlock(np);
3725 return (error);
3726 }
3727 if (!namedattrs)
3728 nfs_dulookup_finish(&dul, dnp, ctx);
3729 goto again_relock;
3730 }
3731
3732 if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))
3733 nfs4_delegation_return(np, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
3734
3735 /*
3736 * Purge the name cache so that the chance of a lookup for
3737 * the name succeeding while the remove is in progress is
3738 * minimized.
3739 */
3740 nfs_name_cache_purge(dnp, np, cnp, ctx);
3741
3742 if (!namedattrs)
3743 nfs_dulookup_start(&dul, dnp, ctx);
3744
3745 /* Do the rpc */
3746 error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
3747 vfs_context_thread(ctx), vfs_context_ucred(ctx));
3748
3749 /*
3750 * Kludge City: If the first reply to the remove rpc is lost..
3751 * the reply to the retransmitted request will be ENOENT
3752 * since the file was in fact removed
3753 * Therefore, we cheat and return success.
3754 */
3755 if (error == ENOENT)
3756 error = 0;
3757
3758 if (!error && !inuse && !np->n_sillyrename) {
3759 /*
3760 * removal succeeded, it's not in use, and not silly renamed so
3761 * remove nfsnode from hash now so we can't accidentally find it
3762 * again if another object gets created with the same filehandle
3763 * before this vnode gets reclaimed
3764 */
3765 lck_mtx_lock(nfs_node_hash_mutex);
3766 if (np->n_hflag & NHHASHED) {
3767 LIST_REMOVE(np, n_hash);
3768 np->n_hflag &= ~NHHASHED;
3769 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
3770 }
3771 lck_mtx_unlock(nfs_node_hash_mutex);
3772 /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
3773 /* clear all flags other than these */
3774 nfs_node_lock_force(np);
3775 np->n_flag &= (NMODIFIED);
3776 NATTRINVALIDATE(np);
3777 nfs_node_unlock(np);
3778 vnode_recycle(vp);
3779 setsize = 1;
3780 } else {
3781 nfs_node_lock_force(np);
3782 NATTRINVALIDATE(np);
3783 nfs_node_unlock(np);
3784 }
3785 } else if (!np->n_sillyrename) {
3786 if (!namedattrs)
3787 nfs_dulookup_start(&dul, dnp, ctx);
3788 error = nfs_sillyrename(dnp, np, cnp, ctx);
3789 nfs_node_lock_force(np);
3790 NATTRINVALIDATE(np);
3791 nfs_node_unlock(np);
3792 } else {
3793 nfs_node_lock_force(np);
3794 NATTRINVALIDATE(np);
3795 nfs_node_unlock(np);
3796 if (!namedattrs)
3797 nfs_dulookup_start(&dul, dnp, ctx);
3798 }
3799
3800 /* nfs_getattr() will check changed and purge caches */
3801 nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
3802 if (!namedattrs)
3803 nfs_dulookup_finish(&dul, dnp, ctx);
3804 out:
3805 /* unlock the node */
3806 lck_mtx_lock(nfs_node_hash_mutex);
3807 np->n_hflag &= ~NHLOCKED;
3808 if (np->n_hflag & NHLOCKWANT) {
3809 np->n_hflag &= ~NHLOCKWANT;
3810 wakeup(np);
3811 }
3812 lck_mtx_unlock(nfs_node_hash_mutex);
3813 nfs_node_clear_busy2(dnp, np);
3814 if (setsize)
3815 ubc_setsize(vp, 0);
3816 return (error);
3817 }
3818
3819 /*
3820 * NFS silly-renamed file removal function called from nfs_vnop_inactive
3821 */
3822 int
3823 nfs_removeit(struct nfs_sillyrename *nsp)
3824 {
3825 struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp);
3826 if (!nmp)
3827 return (ENXIO);
3828 return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred);
3829 }
3830
3831 /*
3832 * NFS remove rpc, called from nfs_remove() and nfs_removeit().
3833 */
3834 int
3835 nfs3_remove_rpc(
3836 nfsnode_t dnp,
3837 char *name,
3838 int namelen,
3839 thread_t thd,
3840 kauth_cred_t cred)
3841 {
3842 int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
3843 struct timespec premtime = { 0, 0 };
3844 struct nfsmount *nmp;
3845 int nfsvers;
3846 u_int64_t xid;
3847 struct nfsm_chain nmreq, nmrep;
3848
3849 nmp = NFSTONMP(dnp);
3850 if (!nmp)
3851 return (ENXIO);
3852 nfsvers = nmp->nm_vers;
3853 if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN))
3854 return (ENAMETOOLONG);
3855
3856 nfsm_chain_null(&nmreq);
3857 nfsm_chain_null(&nmrep);
3858
3859 nfsm_chain_build_alloc_init(error, &nmreq,
3860 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
3861 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3862 nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
3863 nfsm_chain_build_done(error, &nmreq);
3864 nfsmout_if(error);
3865
3866 error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, NULL, 0, &nmrep, &xid, &status);
3867
3868 if ((lockerror = nfs_node_lock(dnp)))
3869 error = lockerror;
3870 if (nfsvers == NFS_VER3)
3871 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
3872 nfsmout_if(error);
3873 dnp->n_flag |= NMODIFIED;
3874 /* if directory hadn't changed, update namecache mtime */
3875 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3876 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3877 if (!wccpostattr)
3878 NATTRINVALIDATE(dnp);
3879 if (!error)
3880 error = status;
3881 nfsmout:
3882 if (!lockerror)
3883 nfs_node_unlock(dnp);
3884 nfsm_chain_cleanup(&nmreq);
3885 nfsm_chain_cleanup(&nmrep);
3886 return (error);
3887 }
3888
3889 /*
3890 * NFS file rename call
3891 */
3892 int
3893 nfs_vnop_rename(
3894 struct vnop_rename_args /* {
3895 struct vnodeop_desc *a_desc;
3896 vnode_t a_fdvp;
3897 vnode_t a_fvp;
3898 struct componentname *a_fcnp;
3899 vnode_t a_tdvp;
3900 vnode_t a_tvp;
3901 struct componentname *a_tcnp;
3902 vfs_context_t a_context;
3903 } */ *ap)
3904 {
3905 vfs_context_t ctx = ap->a_context;
3906 vnode_t fdvp = ap->a_fdvp;
3907 vnode_t fvp = ap->a_fvp;
3908 vnode_t tdvp = ap->a_tdvp;
3909 vnode_t tvp = ap->a_tvp;
3910 nfsnode_t fdnp, fnp, tdnp, tnp;
3911 struct componentname *tcnp = ap->a_tcnp;
3912 struct componentname *fcnp = ap->a_fcnp;
3913 int error, nfsvers, inuse=0, tvprecycle=0, locked=0;
3914 mount_t fmp, tdmp, tmp;
3915 struct nfs_vattr nvattr;
3916 struct nfsmount *nmp;
3917
3918 fdnp = VTONFS(fdvp);
3919 fnp = VTONFS(fvp);
3920 tdnp = VTONFS(tdvp);
3921 tnp = tvp ? VTONFS(tvp) : NULL;
3922
3923 nmp = NFSTONMP(fdnp);
3924 if (!nmp)
3925 return (ENXIO);
3926 nfsvers = nmp->nm_vers;
3927
3928 error = nfs_node_set_busy4(fdnp, fnp, tdnp, tnp, vfs_context_thread(ctx));
3929 if (error)
3930 return (error);
3931
3932 if (tvp && (tvp != fvp)) {
3933 /* lock the node while we rename over the existing file */
3934 lck_mtx_lock(nfs_node_hash_mutex);
3935 while (tnp->n_hflag & NHLOCKED) {
3936 tnp->n_hflag |= NHLOCKWANT;
3937 msleep(tnp, nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
3938 }
3939 tnp->n_hflag |= NHLOCKED;
3940 lck_mtx_unlock(nfs_node_hash_mutex);
3941 locked = 1;
3942 }
3943
3944 /* Check for cross-device rename */
3945 fmp = vnode_mount(fvp);
3946 tmp = tvp ? vnode_mount(tvp) : NULL;
3947 tdmp = vnode_mount(tdvp);
3948 if ((fmp != tdmp) || (tvp && (fmp != tmp))) {
3949 error = EXDEV;
3950 goto out;
3951 }
3952
3953 /* XXX prevent renaming from/over a sillyrenamed file? */
3954
3955 /*
3956 * If the tvp exists and is in use, sillyrename it before doing the
3957 * rename of the new file over it.
3958 * XXX Can't sillyrename a directory.
3959 * Don't sillyrename if source and target are same vnode (hard
3960 * links or case-variants)
3961 */
3962 if (tvp && (tvp != fvp))
3963 inuse = vnode_isinuse(tvp, 0);
3964 if (inuse && !tnp->n_sillyrename && (vnode_vtype(tvp) != VDIR)) {
3965 error = nfs_sillyrename(tdnp, tnp, tcnp, ctx);
3966 if (error) {
3967 /* sillyrename failed. Instead of pressing on, return error */
3968 goto out; /* should not be ENOENT. */
3969 } else {
3970 /* sillyrename succeeded.*/
3971 tvp = NULL;
3972 }
3973 } else if (tvp && (nmp->nm_vers >= NFS_VER4) && (tnp->n_openflags & N_DELEG_MASK)) {
3974 nfs4_delegation_return(tnp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
3975 }
3976
3977 error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen,
3978 tdnp, tcnp->cn_nameptr, tcnp->cn_namelen, ctx);
3979
3980 /*
3981 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
3982 */
3983 if (error == ENOENT)
3984 error = 0;
3985
3986 if (tvp && (tvp != fvp) && !tnp->n_sillyrename) {
3987 nfs_node_lock_force(tnp);
3988 tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
3989 (nfs_getattrcache(tnp, &nvattr, 0) || (nvattr.nva_nlink == 1)));
3990 nfs_node_unlock(tnp);
3991 lck_mtx_lock(nfs_node_hash_mutex);
3992 if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
3993 /*
3994 * remove nfsnode from hash now so we can't accidentally find it
3995 * again if another object gets created with the same filehandle
3996 * before this vnode gets reclaimed
3997 */
3998 LIST_REMOVE(tnp, n_hash);
3999 tnp->n_hflag &= ~NHHASHED;
4000 FSDBG(266, 0, tnp, tnp->n_flag, 0xb1eb1e);
4001 }
4002 lck_mtx_unlock(nfs_node_hash_mutex);
4003 }
4004
4005 /* purge the old name cache entries and enter the new one */
4006 nfs_name_cache_purge(fdnp, fnp, fcnp, ctx);
4007 if (tvp) {
4008 nfs_name_cache_purge(tdnp, tnp, tcnp, ctx);
4009 if (tvprecycle) {
4010 /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
4011 /* clear all flags other than these */
4012 nfs_node_lock_force(tnp);
4013 tnp->n_flag &= (NMODIFIED);
4014 nfs_node_unlock(tnp);
4015 vnode_recycle(tvp);
4016 }
4017 }
4018 if (!error) {
4019 nfs_node_lock_force(tdnp);
4020 if (tdnp->n_flag & NNEGNCENTRIES) {
4021 tdnp->n_flag &= ~NNEGNCENTRIES;
4022 cache_purge_negatives(tdvp);
4023 }
4024 nfs_node_unlock(tdnp);
4025 nfs_node_lock_force(fnp);
4026 cache_enter(tdvp, fvp, tcnp);
4027 if (tdvp != fdvp) { /* update parent pointer */
4028 if (fnp->n_parent && !vnode_get(fnp->n_parent)) {
4029 /* remove ref from old parent */
4030 vnode_rele(fnp->n_parent);
4031 vnode_put(fnp->n_parent);
4032 }
4033 fnp->n_parent = tdvp;
4034 if (tdvp && !vnode_get(tdvp)) {
4035 /* add ref to new parent */
4036 vnode_ref(tdvp);
4037 vnode_put(tdvp);
4038 } else {
4039 fnp->n_parent = NULL;
4040 }
4041 }
4042 nfs_node_unlock(fnp);
4043 }
4044 out:
4045 /* nfs_getattr() will check changed and purge caches */
4046 nfs_getattr(fdnp, NULL, ctx, NGA_CACHED);
4047 nfs_getattr(tdnp, NULL, ctx, NGA_CACHED);
4048 if (locked) {
4049 /* unlock node */
4050 lck_mtx_lock(nfs_node_hash_mutex);
4051 tnp->n_hflag &= ~NHLOCKED;
4052 if (tnp->n_hflag & NHLOCKWANT) {
4053 tnp->n_hflag &= ~NHLOCKWANT;
4054 wakeup(tnp);
4055 }
4056 lck_mtx_unlock(nfs_node_hash_mutex);
4057 }
4058 nfs_node_clear_busy4(fdnp, fnp, tdnp, tnp);
4059 return (error);
4060 }
4061
4062 /*
4063 * Do an NFS rename rpc. Called from nfs_vnop_rename() and nfs_sillyrename().
4064 */
4065 int
4066 nfs3_rename_rpc(
4067 nfsnode_t fdnp,
4068 char *fnameptr,
4069 int fnamelen,
4070 nfsnode_t tdnp,
4071 char *tnameptr,
4072 int tnamelen,
4073 vfs_context_t ctx)
4074 {
4075 int error = 0, lockerror = ENOENT, status, fwccpostattr = 0, twccpostattr = 0;
4076 struct timespec fpremtime = { 0, 0 }, tpremtime = { 0, 0 };
4077 struct nfsmount *nmp;
4078 int nfsvers;
4079 u_int64_t xid, txid;
4080 struct nfsm_chain nmreq, nmrep;
4081
4082 nmp = NFSTONMP(fdnp);
4083 if (!nmp)
4084 return (ENXIO);
4085 nfsvers = nmp->nm_vers;
4086 if ((nfsvers == NFS_VER2) &&
4087 ((fnamelen > NFS_MAXNAMLEN) || (tnamelen > NFS_MAXNAMLEN)))
4088 return (ENAMETOOLONG);
4089
4090 nfsm_chain_null(&nmreq);
4091 nfsm_chain_null(&nmrep);
4092
4093 nfsm_chain_build_alloc_init(error, &nmreq,
4094 (NFSX_FH(nfsvers) + NFSX_UNSIGNED) * 2 +
4095 nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
4096 nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
4097 nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
4098 nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
4099 nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
4100 nfsm_chain_build_done(error, &nmreq);
4101 nfsmout_if(error);
4102
4103 error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, NULL, &nmrep, &xid, &status);
4104
4105 if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
4106 error = lockerror;
4107 if (nfsvers == NFS_VER3) {
4108 txid = xid;
4109 nfsm_chain_get_wcc_data(error, &nmrep, fdnp, &fpremtime, &fwccpostattr, &xid);
4110 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &tpremtime, &twccpostattr, &txid);
4111 }
4112 if (!error)
4113 error = status;
4114 nfsmout:
4115 nfsm_chain_cleanup(&nmreq);
4116 nfsm_chain_cleanup(&nmrep);
4117 if (!lockerror) {
4118 fdnp->n_flag |= NMODIFIED;
4119 /* if directory hadn't changed, update namecache mtime */
4120 if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==))
4121 NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr);
4122 if (!fwccpostattr)
4123 NATTRINVALIDATE(fdnp);
4124 tdnp->n_flag |= NMODIFIED;
4125 /* if directory hadn't changed, update namecache mtime */
4126 if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==))
4127 NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
4128 if (!twccpostattr)
4129 NATTRINVALIDATE(tdnp);
4130 nfs_node_unlock2(fdnp, tdnp);
4131 }
4132 return (error);
4133 }
4134
4135 /*
4136 * NFS hard link create call
4137 */
4138 int
4139 nfs3_vnop_link(
4140 struct vnop_link_args /* {
4141 struct vnodeop_desc *a_desc;
4142 vnode_t a_vp;
4143 vnode_t a_tdvp;
4144 struct componentname *a_cnp;
4145 vfs_context_t a_context;
4146 } */ *ap)
4147 {
4148 vfs_context_t ctx = ap->a_context;
4149 vnode_t vp = ap->a_vp;
4150 vnode_t tdvp = ap->a_tdvp;
4151 struct componentname *cnp = ap->a_cnp;
4152 int error = 0, lockerror = ENOENT, status, wccpostattr = 0, attrflag = 0;
4153 struct timespec premtime = { 0, 0 };
4154 struct nfsmount *nmp;
4155 nfsnode_t np = VTONFS(vp);
4156 nfsnode_t tdnp = VTONFS(tdvp);
4157 int nfsvers;
4158 u_int64_t xid, txid;
4159 struct nfsm_chain nmreq, nmrep;
4160
4161 if (vnode_mount(vp) != vnode_mount(tdvp))
4162 return (EXDEV);
4163
4164 nmp = VTONMP(vp);
4165 if (!nmp)
4166 return (ENXIO);
4167 nfsvers = nmp->nm_vers;
4168 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
4169 return (ENAMETOOLONG);
4170
4171 /*
4172 * Push all writes to the server, so that the attribute cache
4173 * doesn't get "out of sync" with the server.
4174 * XXX There should be a better way!
4175 */
4176 nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
4177
4178 error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx));
4179 if (error)
4180 return (error);
4181
4182 nfsm_chain_null(&nmreq);
4183 nfsm_chain_null(&nmrep);
4184
4185 nfsm_chain_build_alloc_init(error, &nmreq,
4186 NFSX_FH(nfsvers)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
4187 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4188 nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
4189 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4190 nfsm_chain_build_done(error, &nmreq);
4191 nfsmout_if(error);
4192 error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx, NULL, &nmrep, &xid, &status);
4193
4194 if ((lockerror = nfs_node_lock2(tdnp, np))) {
4195 error = lockerror;
4196 goto nfsmout;
4197 }
4198 if (nfsvers == NFS_VER3) {
4199 txid = xid;
4200 nfsm_chain_postop_attr_update_flag(error, &nmrep, np, attrflag, &xid);
4201 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &premtime, &wccpostattr, &txid);
4202 }
4203 if (!error)
4204 error = status;
4205 nfsmout:
4206 nfsm_chain_cleanup(&nmreq);
4207 nfsm_chain_cleanup(&nmrep);
4208 if (!lockerror) {
4209 if (!attrflag)
4210 NATTRINVALIDATE(np);
4211 tdnp->n_flag |= NMODIFIED;
4212 /* if directory hadn't changed, update namecache mtime */
4213 if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==))
4214 NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
4215 if (!wccpostattr)
4216 NATTRINVALIDATE(tdnp);
4217 if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
4218 tdnp->n_flag &= ~NNEGNCENTRIES;
4219 cache_purge_negatives(tdvp);
4220 }
4221 nfs_node_unlock2(tdnp, np);
4222 }
4223 nfs_node_clear_busy2(tdnp, np);
4224 /*
4225 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
4226 */
4227 if (error == EEXIST)
4228 error = 0;
4229 return (error);
4230 }
4231
4232 /*
4233 * NFS symbolic link create call
4234 */
4235 int
4236 nfs3_vnop_symlink(
4237 struct vnop_symlink_args /* {
4238 struct vnodeop_desc *a_desc;
4239 vnode_t a_dvp;
4240 vnode_t *a_vpp;
4241 struct componentname *a_cnp;
4242 struct vnode_attr *a_vap;
4243 char *a_target;
4244 vfs_context_t a_context;
4245 } */ *ap)
4246 {
4247 vfs_context_t ctx = ap->a_context;
4248 vnode_t dvp = ap->a_dvp;
4249 struct vnode_attr *vap = ap->a_vap;
4250 struct componentname *cnp = ap->a_cnp;
4251 struct nfs_vattr nvattr;
4252 fhandle_t fh;
4253 int slen, error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
4254 struct timespec premtime = { 0, 0 };
4255 vnode_t newvp = NULL;
4256 int nfsvers, gotuid, gotgid;
4257 u_int64_t xid, dxid;
4258 nfsnode_t np = NULL;
4259 nfsnode_t dnp = VTONFS(dvp);
4260 struct nfsmount *nmp;
4261 struct nfsm_chain nmreq, nmrep;
4262 struct nfsreq rq, *req = &rq;
4263 struct nfs_dulookup dul;
4264
4265 nmp = VTONMP(dvp);
4266 if (!nmp)
4267 return (ENXIO);
4268 nfsvers = nmp->nm_vers;
4269
4270 slen = strlen(ap->a_target);
4271 if ((nfsvers == NFS_VER2) &&
4272 ((cnp->cn_namelen > NFS_MAXNAMLEN) || (slen > NFS_MAXPATHLEN)))
4273 return (ENAMETOOLONG);
4274
4275 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4276
4277 VATTR_SET_SUPPORTED(vap, va_mode);
4278 VATTR_SET_SUPPORTED(vap, va_uid);
4279 VATTR_SET_SUPPORTED(vap, va_gid);
4280 VATTR_SET_SUPPORTED(vap, va_data_size);
4281 VATTR_SET_SUPPORTED(vap, va_access_time);
4282 VATTR_SET_SUPPORTED(vap, va_modify_time);
4283 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4284 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4285
4286 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4287 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4288
4289 nfsm_chain_null(&nmreq);
4290 nfsm_chain_null(&nmrep);
4291
4292 nfsm_chain_build_alloc_init(error, &nmreq,
4293 NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
4294 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(nfsvers));
4295 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4296 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4297 if (nfsvers == NFS_VER3)
4298 nfsm_chain_add_v3sattr(error, &nmreq, vap);
4299 nfsm_chain_add_name(error, &nmreq, ap->a_target, slen, nmp);
4300 if (nfsvers == NFS_VER2)
4301 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
4302 nfsm_chain_build_done(error, &nmreq);
4303 nfsmout_if(error);
4304
4305 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK,
4306 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4307 if (!error) {
4308 nfs_dulookup_start(&dul, dnp, ctx);
4309 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4310 }
4311
4312 if ((lockerror = nfs_node_lock(dnp)))
4313 error = lockerror;
4314 dxid = xid;
4315 if (!error && !status) {
4316 if (dnp->n_flag & NNEGNCENTRIES) {
4317 dnp->n_flag &= ~NNEGNCENTRIES;
4318 cache_purge_negatives(dvp);
4319 }
4320 if (nfsvers == NFS_VER3)
4321 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
4322 else
4323 fh.fh_len = 0;
4324 }
4325 if (nfsvers == NFS_VER3)
4326 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
4327 if (!error)
4328 error = status;
4329 nfsmout:
4330 nfsm_chain_cleanup(&nmreq);
4331 nfsm_chain_cleanup(&nmrep);
4332
4333 if (!lockerror) {
4334 dnp->n_flag |= NMODIFIED;
4335 /* if directory hadn't changed, update namecache mtime */
4336 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
4337 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4338 nfs_node_unlock(dnp);
4339 /* nfs_getattr() will check changed and purge caches */
4340 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4341 }
4342
4343 if (!error && fh.fh_len)
4344 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
4345 if (!error && np)
4346 newvp = NFSTOV(np);
4347
4348 nfs_dulookup_finish(&dul, dnp, ctx);
4349
4350 /*
4351 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
4352 * if we can succeed in looking up the symlink.
4353 */
4354 if ((error == EEXIST) || (!error && !newvp)) {
4355 if (newvp) {
4356 nfs_node_unlock(np);
4357 vnode_put(newvp);
4358 newvp = NULL;
4359 }
4360 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
4361 if (!error) {
4362 newvp = NFSTOV(np);
4363 if (vnode_vtype(newvp) != VLNK)
4364 error = EEXIST;
4365 }
4366 }
4367 if (!busyerror)
4368 nfs_node_clear_busy(dnp);
4369 if (!error && (gotuid || gotgid) &&
4370 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
4371 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
4372 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
4373 /* clear ID bits if server didn't use them (or we can't tell) */
4374 VATTR_CLEAR_SUPPORTED(vap, va_uid);
4375 VATTR_CLEAR_SUPPORTED(vap, va_gid);
4376 }
4377 if (error) {
4378 if (newvp) {
4379 nfs_node_unlock(np);
4380 vnode_put(newvp);
4381 }
4382 } else {
4383 nfs_node_unlock(np);
4384 *ap->a_vpp = newvp;
4385 }
4386 return (error);
4387 }
4388
4389 /*
4390 * NFS make dir call
4391 */
4392 int
4393 nfs3_vnop_mkdir(
4394 struct vnop_mkdir_args /* {
4395 struct vnodeop_desc *a_desc;
4396 vnode_t a_dvp;
4397 vnode_t *a_vpp;
4398 struct componentname *a_cnp;
4399 struct vnode_attr *a_vap;
4400 vfs_context_t a_context;
4401 } */ *ap)
4402 {
4403 vfs_context_t ctx = ap->a_context;
4404 vnode_t dvp = ap->a_dvp;
4405 struct vnode_attr *vap = ap->a_vap;
4406 struct componentname *cnp = ap->a_cnp;
4407 struct nfs_vattr nvattr;
4408 nfsnode_t np = NULL;
4409 struct nfsmount *nmp;
4410 nfsnode_t dnp = VTONFS(dvp);
4411 vnode_t newvp = NULL;
4412 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
4413 struct timespec premtime = { 0, 0 };
4414 int nfsvers, gotuid, gotgid;
4415 u_int64_t xid, dxid;
4416 fhandle_t fh;
4417 struct nfsm_chain nmreq, nmrep;
4418 struct nfsreq rq, *req = &rq;
4419 struct nfs_dulookup dul;
4420
4421 nmp = VTONMP(dvp);
4422 if (!nmp)
4423 return (ENXIO);
4424 nfsvers = nmp->nm_vers;
4425 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
4426 return (ENAMETOOLONG);
4427
4428 nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4429
4430 VATTR_SET_SUPPORTED(vap, va_mode);
4431 VATTR_SET_SUPPORTED(vap, va_uid);
4432 VATTR_SET_SUPPORTED(vap, va_gid);
4433 VATTR_SET_SUPPORTED(vap, va_data_size);
4434 VATTR_SET_SUPPORTED(vap, va_access_time);
4435 VATTR_SET_SUPPORTED(vap, va_modify_time);
4436 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4437 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4438
4439 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
4440 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4441
4442 nfsm_chain_null(&nmreq);
4443 nfsm_chain_null(&nmrep);
4444
4445 nfsm_chain_build_alloc_init(error, &nmreq,
4446 NFSX_FH(nfsvers) + NFSX_UNSIGNED +
4447 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
4448 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4449 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4450 if (nfsvers == NFS_VER3)
4451 nfsm_chain_add_v3sattr(error, &nmreq, vap);
4452 else
4453 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
4454 nfsm_chain_build_done(error, &nmreq);
4455 nfsmout_if(error);
4456
4457 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR,
4458 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4459 if (!error) {
4460 nfs_dulookup_start(&dul, dnp, ctx);
4461 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4462 }
4463
4464 if ((lockerror = nfs_node_lock(dnp)))
4465 error = lockerror;
4466 dxid = xid;
4467 if (!error && !status) {
4468 if (dnp->n_flag & NNEGNCENTRIES) {
4469 dnp->n_flag &= ~NNEGNCENTRIES;
4470 cache_purge_negatives(dvp);
4471 }
4472 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
4473 }
4474 if (nfsvers == NFS_VER3)
4475 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
4476 if (!error)
4477 error = status;
4478 nfsmout:
4479 nfsm_chain_cleanup(&nmreq);
4480 nfsm_chain_cleanup(&nmrep);
4481
4482 if (!lockerror) {
4483 dnp->n_flag |= NMODIFIED;
4484 /* if directory hadn't changed, update namecache mtime */
4485 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
4486 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4487 nfs_node_unlock(dnp);
4488 /* nfs_getattr() will check changed and purge caches */
4489 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4490 }
4491
4492 if (!error && fh.fh_len)
4493 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
4494 if (!error && np)
4495 newvp = NFSTOV(np);
4496
4497 nfs_dulookup_finish(&dul, dnp, ctx);
4498
4499 /*
4500 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
4501 * if we can succeed in looking up the directory.
4502 */
4503 if ((error == EEXIST) || (!error && !newvp)) {
4504 if (newvp) {
4505 nfs_node_unlock(np);
4506 vnode_put(newvp);
4507 newvp = NULL;
4508 }
4509 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
4510 if (!error) {
4511 newvp = NFSTOV(np);
4512 if (vnode_vtype(newvp) != VDIR)
4513 error = EEXIST;
4514 }
4515 }
4516 if (!busyerror)
4517 nfs_node_clear_busy(dnp);
4518 if (!error && (gotuid || gotgid) &&
4519 (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
4520 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
4521 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
4522 /* clear ID bits if server didn't use them (or we can't tell) */
4523 VATTR_CLEAR_SUPPORTED(vap, va_uid);
4524 VATTR_CLEAR_SUPPORTED(vap, va_gid);
4525 }
4526 if (error) {
4527 if (newvp) {
4528 nfs_node_unlock(np);
4529 vnode_put(newvp);
4530 }
4531 } else {
4532 nfs_node_unlock(np);
4533 *ap->a_vpp = newvp;
4534 }
4535 return (error);
4536 }
4537
4538 /*
4539 * NFS remove directory call
4540 */
4541 int
4542 nfs3_vnop_rmdir(
4543 struct vnop_rmdir_args /* {
4544 struct vnodeop_desc *a_desc;
4545 vnode_t a_dvp;
4546 vnode_t a_vp;
4547 struct componentname *a_cnp;
4548 vfs_context_t a_context;
4549 } */ *ap)
4550 {
4551 vfs_context_t ctx = ap->a_context;
4552 vnode_t vp = ap->a_vp;
4553 vnode_t dvp = ap->a_dvp;
4554 struct componentname *cnp = ap->a_cnp;
4555 int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
4556 struct timespec premtime = { 0, 0 };
4557 struct nfsmount *nmp;
4558 nfsnode_t np = VTONFS(vp);
4559 nfsnode_t dnp = VTONFS(dvp);
4560 int nfsvers;
4561 u_int64_t xid;
4562 struct nfsm_chain nmreq, nmrep;
4563 struct nfsreq rq, *req = &rq;
4564 struct nfs_dulookup dul;
4565
4566 nmp = VTONMP(vp);
4567 if (!nmp)
4568 return (ENXIO);
4569 nfsvers = nmp->nm_vers;
4570 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
4571 return (ENAMETOOLONG);
4572
4573 if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
4574 return (error);
4575
4576 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4577
4578 nfsm_chain_null(&nmreq);
4579 nfsm_chain_null(&nmrep);
4580
4581 nfsm_chain_build_alloc_init(error, &nmreq,
4582 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
4583 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4584 nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4585 nfsm_chain_build_done(error, &nmreq);
4586 nfsmout_if(error);
4587
4588 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_RMDIR,
4589 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
4590 if (!error) {
4591 nfs_dulookup_start(&dul, dnp, ctx);
4592 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4593 }
4594
4595 if ((lockerror = nfs_node_lock(dnp)))
4596 error = lockerror;
4597 if (nfsvers == NFS_VER3)
4598 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
4599 if (!error)
4600 error = status;
4601 nfsmout:
4602 nfsm_chain_cleanup(&nmreq);
4603 nfsm_chain_cleanup(&nmrep);
4604
4605 if (!lockerror) {
4606 dnp->n_flag |= NMODIFIED;
4607 /* if directory hadn't changed, update namecache mtime */
4608 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
4609 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4610 nfs_node_unlock(dnp);
4611 nfs_name_cache_purge(dnp, np, cnp, ctx);
4612 /* nfs_getattr() will check changed and purge caches */
4613 nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4614 }
4615 nfs_dulookup_finish(&dul, dnp, ctx);
4616 nfs_node_clear_busy2(dnp, np);
4617
4618 /*
4619 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
4620 */
4621 if (error == ENOENT)
4622 error = 0;
4623 if (!error) {
4624 /*
4625 * remove nfsnode from hash now so we can't accidentally find it
4626 * again if another object gets created with the same filehandle
4627 * before this vnode gets reclaimed
4628 */
4629 lck_mtx_lock(nfs_node_hash_mutex);
4630 if (np->n_hflag & NHHASHED) {
4631 LIST_REMOVE(np, n_hash);
4632 np->n_hflag &= ~NHHASHED;
4633 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
4634 }
4635 lck_mtx_unlock(nfs_node_hash_mutex);
4636 }
4637 return (error);
4638 }
4639
4640 /*
4641 * NFS readdir call
4642 *
4643 * The incoming "offset" is a directory cookie indicating where in the
4644 * directory entries should be read from. A zero cookie means start at
4645 * the beginning of the directory. Any other cookie will be a cookie
4646 * returned from the server.
4647 *
4648 * Using that cookie, determine which buffer (and where in that buffer)
4649 * to start returning entries from. Buffer logical block numbers are
4650 * the cookies they start at. If a buffer is found that is not full,
4651 * call into the bio/RPC code to fill it. The RPC code will probably
4652 * fill several buffers (dropping the first, requiring a re-get).
4653 *
4654 * When done copying entries to the buffer, set the offset to the current
4655 * entry's cookie and enter that cookie in the cookie cache.
4656 *
4657 * Note: because the getdirentries(2) API returns a long-typed offset,
4658 * the incoming offset is a potentially truncated cookie (ptc).
4659 * The cookie matching code is aware of this and will fall back to
4660 * matching only 32 bits of the cookie.
4661 */
4662 int
4663 nfs_vnop_readdir(
4664 struct vnop_readdir_args /* {
4665 struct vnodeop_desc *a_desc;
4666 vnode_t a_vp;
4667 struct uio *a_uio;
4668 int a_flags;
4669 int *a_eofflag;
4670 int *a_numdirent;
4671 vfs_context_t a_context;
4672 } */ *ap)
4673 {
4674 vfs_context_t ctx = ap->a_context;
4675 vnode_t dvp = ap->a_vp;
4676 nfsnode_t dnp = VTONFS(dvp);
4677 struct nfsmount *nmp;
4678 uio_t uio = ap->a_uio;
4679 int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
4680 uint16_t i, iptc, rlen, nlen;
4681 uint64_t cookie, nextcookie, lbn = 0;
4682 struct nfsbuf *bp = NULL;
4683 struct nfs_dir_buf_header *ndbhp;
4684 struct direntry *dp, *dpptc;
4685 struct dirent dent;
4686 char *cp = NULL;
4687 thread_t thd;
4688
4689 nmp = VTONMP(dvp);
4690 if (!nmp)
4691 return (ENXIO);
4692 nfsvers = nmp->nm_vers;
4693 bigcookies = (nmp->nm_state & NFSSTA_BIGCOOKIES);
4694 extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
4695
4696 if (vnode_vtype(dvp) != VDIR)
4697 return (EPERM);
4698
4699 if (ap->a_eofflag)
4700 *ap->a_eofflag = 0;
4701
4702 if (uio_resid(uio) == 0)
4703 return (0);
4704
4705 if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
4706 /* trigger directories should never be read, return nothing */
4707 return (0);
4708 }
4709
4710 thd = vfs_context_thread(ctx);
4711 numdirent = done = 0;
4712 nextcookie = uio_offset(uio);
4713 ptc = bigcookies && NFS_DIR_COOKIE_POTENTIALLY_TRUNCATED(nextcookie);
4714
4715 if ((error = nfs_node_lock(dnp)))
4716 goto out;
4717
4718 if (dnp->n_flag & NNEEDINVALIDATE) {
4719 dnp->n_flag &= ~NNEEDINVALIDATE;
4720 nfs_invaldir(dnp);
4721 nfs_node_unlock(dnp);
4722 error = nfs_vinvalbuf(dvp, 0, ctx, 1);
4723 if (!error)
4724 error = nfs_node_lock(dnp);
4725 if (error)
4726 goto out;
4727 }
4728
4729 /*
4730 * check for need to invalidate when (re)starting at beginning
4731 */
4732 if (!nextcookie) {
4733 if (dnp->n_flag & NMODIFIED) {
4734 nfs_invaldir(dnp);
4735 nfs_node_unlock(dnp);
4736 if ((error = nfs_vinvalbuf(dvp, 0, ctx, 1)))
4737 goto out;
4738 } else {
4739 nfs_node_unlock(dnp);
4740 }
4741 /* nfs_getattr() will check changed and purge caches */
4742 if ((error = nfs_getattr(dnp, NULL, ctx, NGA_UNCACHED)))
4743 goto out;
4744 } else {
4745 nfs_node_unlock(dnp);
4746 }
4747
4748 error = nfs_dir_cookie_to_lbn(dnp, nextcookie, &ptc, &lbn);
4749 if (error) {
4750 if (error < 0) { /* just hit EOF cookie */
4751 done = 1;
4752 error = 0;
4753 }
4754 if (ap->a_eofflag)
4755 *ap->a_eofflag = 1;
4756 }
4757
4758 while (!error && !done) {
4759 OSAddAtomic64(1, &nfsstats.biocache_readdirs);
4760 cookie = nextcookie;
4761 getbuffer:
4762 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
4763 if (error)
4764 goto out;
4765 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4766 if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
4767 if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
4768 ndbhp->ndbh_flags = 0;
4769 ndbhp->ndbh_count = 0;
4770 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
4771 ndbhp->ndbh_ncgen = dnp->n_ncgen;
4772 }
4773 error = nfs_buf_readdir(bp, ctx);
4774 if (error == NFSERR_DIRBUFDROPPED)
4775 goto getbuffer;
4776 if (error)
4777 nfs_buf_release(bp, 1);
4778 if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
4779 if (!nfs_node_lock(dnp)) {
4780 nfs_invaldir(dnp);
4781 nfs_node_unlock(dnp);
4782 }
4783 nfs_vinvalbuf(dvp, 0, ctx, 1);
4784 if (error == NFSERR_BAD_COOKIE)
4785 error = ENOENT;
4786 }
4787 if (error)
4788 goto out;
4789 }
4790
4791 /* find next entry to return */
4792 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
4793 i = 0;
4794 if ((lbn != cookie) && !(ptc && NFS_DIR_COOKIE_SAME32(lbn, cookie))) {
4795 dpptc = NULL;
4796 iptc = 0;
4797 for (; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
4798 if (ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
4799 iptc = i;
4800 dpptc = dp;
4801 }
4802 nextcookie = dp->d_seekoff;
4803 dp = NFS_DIRENTRY_NEXT(dp);
4804 }
4805 if ((i == ndbhp->ndbh_count) && dpptc) {
4806 i = iptc;
4807 dp = dpptc;
4808 }
4809 if (i < ndbhp->ndbh_count) {
4810 nextcookie = dp->d_seekoff;
4811 dp = NFS_DIRENTRY_NEXT(dp);
4812 i++;
4813 }
4814 }
4815 ptc = 0; /* only have to deal with ptc on first cookie */
4816
4817 /* return as many entries as we can */
4818 for (; i < ndbhp->ndbh_count; i++) {
4819 if (extended) {
4820 rlen = dp->d_reclen;
4821 cp = (char*)dp;
4822 } else {
4823 if (!cp) {
4824 cp = (char*)&dent;
4825 bzero(cp, sizeof(dent));
4826 }
4827 if (dp->d_namlen > (sizeof(dent.d_name) - 1))
4828 nlen = sizeof(dent.d_name) - 1;
4829 else
4830 nlen = dp->d_namlen;
4831 rlen = NFS_DIRENT_LEN(nlen);
4832 dent.d_reclen = rlen;
4833 dent.d_ino = dp->d_ino;
4834 dent.d_type = dp->d_type;
4835 dent.d_namlen = nlen;
4836 strlcpy(dent.d_name, dp->d_name, nlen + 1);
4837 }
4838 /* check that the record fits */
4839 if (rlen > uio_resid(uio)) {
4840 done = 1;
4841 break;
4842 }
4843 if ((error = uiomove(cp, rlen, uio)))
4844 break;
4845 numdirent++;
4846 nextcookie = dp->d_seekoff;
4847 dp = NFS_DIRENTRY_NEXT(dp);
4848 }
4849
4850 if (i == ndbhp->ndbh_count) {
4851 /* hit end of buffer, move to next buffer */
4852 lbn = nextcookie;
4853 /* if we also hit EOF, we're done */
4854 if (ISSET(ndbhp->ndbh_flags, NDB_EOF)) {
4855 done = 1;
4856 if (ap->a_eofflag)
4857 *ap->a_eofflag = 1;
4858 }
4859 }
4860 if (!error)
4861 uio_setoffset(uio, nextcookie);
4862 if (!error && !done && (nextcookie == cookie)) {
4863 printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
4864 error = EIO;
4865 }
4866 nfs_buf_release(bp, 1);
4867 }
4868
4869 if (!error)
4870 nfs_dir_cookie_cache(dnp, nextcookie, lbn);
4871
4872 if (ap->a_numdirent)
4873 *ap->a_numdirent = numdirent;
4874 out:
4875 return (error);
4876 }
4877
4878
4879 /*
4880 * Invalidate cached directory information, except for the actual directory
4881 * blocks (which are invalidated separately).
4882 */
4883 void
4884 nfs_invaldir(nfsnode_t dnp)
4885 {
4886 if (vnode_vtype(NFSTOV(dnp)) != VDIR)
4887 return;
4888 dnp->n_eofcookie = 0;
4889 dnp->n_cookieverf = 0;
4890 if (!dnp->n_cookiecache)
4891 return;
4892 dnp->n_cookiecache->free = 0;
4893 dnp->n_cookiecache->mru = -1;
4894 memset(dnp->n_cookiecache->next, -1, NFSNUMCOOKIES);
4895 }
4896
4897 /*
4898 * calculate how much space is available for additional directory entries.
4899 */
4900 uint32_t
4901 nfs_dir_buf_freespace(struct nfsbuf *bp, int rdirplus)
4902 {
4903 struct nfs_dir_buf_header *ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4904 uint32_t space;
4905
4906 if (!ndbhp)
4907 return (0);
4908 space = bp->nb_bufsize - ndbhp->ndbh_entry_end;
4909 if (rdirplus)
4910 space -= ndbhp->ndbh_count * sizeof(struct nfs_vattr);
4911 return (space);
4912 }
4913
4914 /*
4915 * add/update a cookie->lbn entry in the directory cookie cache
4916 */
4917 void
4918 nfs_dir_cookie_cache(nfsnode_t dnp, uint64_t cookie, uint64_t lbn)
4919 {
4920 struct nfsdmap *ndcc;
4921 int8_t i, prev;
4922
4923 if (!cookie)
4924 return;
4925
4926 if (nfs_node_lock(dnp))
4927 return;
4928
4929 if (cookie == dnp->n_eofcookie) { /* EOF cookie */
4930 nfs_node_unlock(dnp);
4931 return;
4932 }
4933
4934 ndcc = dnp->n_cookiecache;
4935 if (!ndcc) {
4936 /* allocate the cookie cache structure */
4937 MALLOC_ZONE(dnp->n_cookiecache, struct nfsdmap *,
4938 sizeof(struct nfsdmap), M_NFSDIROFF, M_WAITOK);
4939 if (!dnp->n_cookiecache) {
4940 nfs_node_unlock(dnp);
4941 return;
4942 }
4943 ndcc = dnp->n_cookiecache;
4944 ndcc->free = 0;
4945 ndcc->mru = -1;
4946 memset(ndcc->next, -1, NFSNUMCOOKIES);
4947 }
4948
4949 /*
4950 * Search the list for this cookie.
4951 * Keep track of previous and last entries.
4952 */
4953 prev = -1;
4954 i = ndcc->mru;
4955 while ((i != -1) && (cookie != ndcc->cookies[i].key)) {
4956 if (ndcc->next[i] == -1) /* stop on last entry so we can reuse */
4957 break;
4958 prev = i;
4959 i = ndcc->next[i];
4960 }
4961 if ((i != -1) && (cookie == ndcc->cookies[i].key)) {
4962 /* found it, remove from list */
4963 if (prev != -1)
4964 ndcc->next[prev] = ndcc->next[i];
4965 else
4966 ndcc->mru = ndcc->next[i];
4967 } else {
4968 /* not found, use next free entry or reuse last entry */
4969 if (ndcc->free != NFSNUMCOOKIES)
4970 i = ndcc->free++;
4971 else
4972 ndcc->next[prev] = -1;
4973 ndcc->cookies[i].key = cookie;
4974 ndcc->cookies[i].lbn = lbn;
4975 }
4976 /* insert cookie at head of MRU list */
4977 ndcc->next[i] = ndcc->mru;
4978 ndcc->mru = i;
4979 nfs_node_unlock(dnp);
4980 }
4981
4982 /*
4983 * Try to map the given directory cookie to a directory buffer (return lbn).
4984 * If we have a possibly truncated cookie (ptc), check for 32-bit matches too.
4985 */
4986 int
4987 nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
4988 {
4989 struct nfsdmap *ndcc = dnp->n_cookiecache;
4990 int8_t eofptc, found;
4991 int i, iptc;
4992 struct nfsmount *nmp;
4993 struct nfsbuf *bp, *lastbp;
4994 struct nfsbuflists blist;
4995 struct direntry *dp, *dpptc;
4996 struct nfs_dir_buf_header *ndbhp;
4997
4998 if (!cookie) { /* initial cookie */
4999 *lbnp = 0;
5000 *ptc = 0;
5001 return (0);
5002 }
5003
5004 if (nfs_node_lock(dnp))
5005 return (ENOENT);
5006
5007 if (cookie == dnp->n_eofcookie) { /* EOF cookie */
5008 nfs_node_unlock(dnp);
5009 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5010 *ptc = 0;
5011 return (-1);
5012 }
5013 /* note if cookie is a 32-bit match with the EOF cookie */
5014 eofptc = *ptc ? NFS_DIR_COOKIE_SAME32(cookie, dnp->n_eofcookie) : 0;
5015 iptc = -1;
5016
5017 /* search the list for the cookie */
5018 for (i = ndcc ? ndcc->mru : -1; i >= 0; i = ndcc->next[i]) {
5019 if (ndcc->cookies[i].key == cookie) {
5020 /* found a match for this cookie */
5021 *lbnp = ndcc->cookies[i].lbn;
5022 nfs_node_unlock(dnp);
5023 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5024 *ptc = 0;
5025 return (0);
5026 }
5027 /* check for 32-bit match */
5028 if (*ptc && (iptc == -1) && NFS_DIR_COOKIE_SAME32(ndcc->cookies[i].key, cookie))
5029 iptc = i;
5030 }
5031 /* exact match not found */
5032 if (eofptc) {
5033 /* but 32-bit match hit the EOF cookie */
5034 nfs_node_unlock(dnp);
5035 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5036 return (-1);
5037 }
5038 if (iptc >= 0) {
5039 /* but 32-bit match got a hit */
5040 *lbnp = ndcc->cookies[iptc].lbn;
5041 nfs_node_unlock(dnp);
5042 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5043 return (0);
5044 }
5045 nfs_node_unlock(dnp);
5046
5047 /*
5048 * No match found in the cookie cache... hmm...
5049 * Let's search the directory's buffers for the cookie.
5050 */
5051 nmp = NFSTONMP(dnp);
5052 if (!nmp)
5053 return (ENXIO);
5054 dpptc = NULL;
5055 found = 0;
5056
5057 lck_mtx_lock(nfs_buf_mutex);
5058 /*
5059 * Scan the list of buffers, keeping them in order.
5060 * Note that itercomplete inserts each of the remaining buffers
5061 * into the head of list (thus reversing the elements). So, we
5062 * make sure to iterate through all buffers, inserting them after
5063 * each other, to keep them in order.
5064 * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
5065 * we don't drop nfs_buf_mutex.
5066 */
5067 if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
5068 lastbp = NULL;
5069 while ((bp = LIST_FIRST(&blist))) {
5070 LIST_REMOVE(bp, nb_vnbufs);
5071 if (!lastbp)
5072 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
5073 else
5074 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
5075 lastbp = bp;
5076 if (found)
5077 continue;
5078 nfs_buf_refget(bp);
5079 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
5080 /* just skip this buffer */
5081 nfs_buf_refrele(bp);
5082 continue;
5083 }
5084 nfs_buf_refrele(bp);
5085
5086 /* scan the buffer for the cookie */
5087 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5088 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5089 dpptc = NULL;
5090 for (i=0; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
5091 if (*ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
5092 dpptc = dp;
5093 iptc = i;
5094 }
5095 dp = NFS_DIRENTRY_NEXT(dp);
5096 }
5097 if ((i == ndbhp->ndbh_count) && dpptc) {
5098 /* found only a PTC match */
5099 dp = dpptc;
5100 i = iptc;
5101 } else if (i < ndbhp->ndbh_count) {
5102 *ptc = 0;
5103 }
5104 if (i < (ndbhp->ndbh_count-1)) {
5105 /* next entry is *in* this buffer: return this block */
5106 *lbnp = bp->nb_lblkno;
5107 found = 1;
5108 } else if (i == (ndbhp->ndbh_count-1)) {
5109 /* next entry refers to *next* buffer: return next block */
5110 *lbnp = dp->d_seekoff;
5111 found = 1;
5112 }
5113 nfs_buf_drop(bp);
5114 }
5115 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
5116 }
5117 lck_mtx_unlock(nfs_buf_mutex);
5118 if (found) {
5119 OSAddAtomic64(1, &nfsstats.direofcache_hits);
5120 return (0);
5121 }
5122
5123 /* still not found... oh well, just start a new block */
5124 *lbnp = cookie;
5125 OSAddAtomic64(1, &nfsstats.direofcache_misses);
5126 return (0);
5127 }
5128
5129 /*
5130 * scan a directory buffer for the given name
5131 * Returns: ESRCH if not found, ENOENT if found invalid, 0 if found
5132 * Note: should only be called with RDIRPLUS directory buffers
5133 */
5134
5135 #define NDBS_PURGE 1
5136 #define NDBS_UPDATE 2
5137
5138 int
5139 nfs_dir_buf_search(
5140 struct nfsbuf *bp,
5141 struct componentname *cnp,
5142 fhandle_t *fhp,
5143 struct nfs_vattr *nvap,
5144 uint64_t *xidp,
5145 time_t *attrstampp,
5146 daddr64_t *nextlbnp,
5147 int flags)
5148 {
5149 struct direntry *dp;
5150 struct nfs_dir_buf_header *ndbhp;
5151 struct nfs_vattr *nvattrp;
5152 daddr64_t nextlbn = 0;
5153 int i, error = ESRCH, fhlen;
5154
5155 /* scan the buffer for the name */
5156 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5157 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5158 for (i=0; i < ndbhp->ndbh_count; i++) {
5159 nextlbn = dp->d_seekoff;
5160 if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) {
5161 fhlen = dp->d_name[dp->d_namlen+1];
5162 nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
5163 if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) ||
5164 (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
5165 /* entry is not valid */
5166 error = ENOENT;
5167 break;
5168 }
5169 if (flags == NDBS_PURGE) {
5170 dp->d_fileno = 0;
5171 bzero(nvattrp, sizeof(*nvattrp));
5172 error = ENOENT;
5173 break;
5174 }
5175 if (flags == NDBS_UPDATE) {
5176 /* update direntry's attrs if fh matches */
5177 if ((fhp->fh_len == fhlen) && !bcmp(&dp->d_name[dp->d_namlen+2], fhp->fh_data, fhlen)) {
5178 bcopy(nvap, nvattrp, sizeof(*nvap));
5179 dp->d_fileno = nvattrp->nva_fileid;
5180 nvattrp->nva_fileid = *xidp;
5181 *(time_t*)(&dp->d_name[dp->d_namlen+2+fhp->fh_len]) = *attrstampp;
5182 }
5183 error = 0;
5184 break;
5185 }
5186 /* copy out fh, attrs, attrstamp, and xid */
5187 fhp->fh_len = fhlen;
5188 bcopy(&dp->d_name[dp->d_namlen+2], fhp->fh_data, MAX(fhp->fh_len, (int)sizeof(fhp->fh_data)));
5189 *attrstampp = *(time_t*)(&dp->d_name[dp->d_namlen+2+fhp->fh_len]);
5190 bcopy(nvattrp, nvap, sizeof(*nvap));
5191 *xidp = nvap->nva_fileid;
5192 nvap->nva_fileid = dp->d_fileno;
5193 error = 0;
5194 break;
5195 }
5196 dp = NFS_DIRENTRY_NEXT(dp);
5197 }
5198 if (nextlbnp)
5199 *nextlbnp = nextlbn;
5200 return (error);
5201 }
5202
5203 /*
5204 * Look up a name in a directory's buffers.
5205 * Note: should only be called with RDIRPLUS directory buffers
5206 */
5207 int
5208 nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cnp, vfs_context_t ctx, int purge)
5209 {
5210 nfsnode_t newnp;
5211 struct nfsmount *nmp;
5212 int error = 0, i, found = 0, count = 0;
5213 u_int64_t xid;
5214 struct nfs_vattr nvattr;
5215 fhandle_t fh;
5216 time_t attrstamp = 0;
5217 thread_t thd = vfs_context_thread(ctx);
5218 struct nfsbuf *bp, *lastbp, *foundbp;
5219 struct nfsbuflists blist;
5220 daddr64_t lbn, nextlbn;
5221 int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_');
5222
5223 if (!(nmp = NFSTONMP(dnp)))
5224 return (ENXIO);
5225 if (!purge)
5226 *npp = NULL;
5227
5228 /* first check most recent buffer (and next one too) */
5229 lbn = dnp->n_lastdbl;
5230 for (i=0; i < 2; i++) {
5231 if ((error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ|NBLK_ONLYVALID, &bp)))
5232 return (error);
5233 if (!bp)
5234 break;
5235 count++;
5236 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, &nextlbn, purge ? NDBS_PURGE : 0);
5237 nfs_buf_release(bp, 0);
5238 if (error == ESRCH) {
5239 error = 0;
5240 } else {
5241 found = 1;
5242 break;
5243 }
5244 lbn = nextlbn;
5245 }
5246
5247 lck_mtx_lock(nfs_buf_mutex);
5248 if (found) {
5249 dnp->n_lastdbl = lbn;
5250 goto done;
5251 }
5252
5253 /*
5254 * Scan the list of buffers, keeping them in order.
5255 * Note that itercomplete inserts each of the remaining buffers
5256 * into the head of list (thus reversing the elements). So, we
5257 * make sure to iterate through all buffers, inserting them after
5258 * each other, to keep them in order.
5259 * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
5260 * we don't drop nfs_buf_mutex.
5261 */
5262 if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
5263 lastbp = foundbp = NULL;
5264 while ((bp = LIST_FIRST(&blist))) {
5265 LIST_REMOVE(bp, nb_vnbufs);
5266 if (!lastbp)
5267 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
5268 else
5269 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
5270 lastbp = bp;
5271 if (error || found)
5272 continue;
5273 if (!purge && dotunder && (count > 100)) /* don't waste too much time looking for ._ files */
5274 continue;
5275 nfs_buf_refget(bp);
5276 lbn = bp->nb_lblkno;
5277 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
5278 /* just skip this buffer */
5279 nfs_buf_refrele(bp);
5280 continue;
5281 }
5282 nfs_buf_refrele(bp);
5283 count++;
5284 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, purge ? NDBS_PURGE : 0);
5285 if (error == ESRCH) {
5286 error = 0;
5287 } else {
5288 found = 1;
5289 foundbp = bp;
5290 }
5291 nfs_buf_drop(bp);
5292 }
5293 if (found) {
5294 LIST_REMOVE(foundbp, nb_vnbufs);
5295 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, foundbp, nb_vnbufs);
5296 dnp->n_lastdbl = foundbp->nb_lblkno;
5297 }
5298 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
5299 }
5300 done:
5301 lck_mtx_unlock(nfs_buf_mutex);
5302
5303 if (!error && found && !purge) {
5304 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
5305 &nvattr, &xid, dnp->n_auth, NG_MAKEENTRY, &newnp);
5306 if (error)
5307 return (error);
5308 newnp->n_attrstamp = attrstamp;
5309 *npp = newnp;
5310 nfs_node_unlock(newnp);
5311 /* check if the dir buffer's attrs are out of date */
5312 if (!nfs_getattr(newnp, &nvattr, ctx, NGA_CACHED) &&
5313 (newnp->n_attrstamp != attrstamp)) {
5314 /* they are, so update them */
5315 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ|NBLK_ONLYVALID, &bp);
5316 if (!error && bp) {
5317 attrstamp = newnp->n_attrstamp;
5318 xid = newnp->n_xid;
5319 nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, NDBS_UPDATE);
5320 nfs_buf_release(bp, 0);
5321 }
5322 error = 0;
5323 }
5324 }
5325
5326 return (error);
5327 }
5328
5329 /*
5330 * Purge name cache entries for the given node.
5331 * For RDIRPLUS, also invalidate the entry in the directory's buffers.
5332 */
5333 void
5334 nfs_name_cache_purge(nfsnode_t dnp, nfsnode_t np, struct componentname *cnp, vfs_context_t ctx)
5335 {
5336 struct nfsmount *nmp = NFSTONMP(dnp);
5337
5338 cache_purge(NFSTOV(np));
5339 if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS))
5340 nfs_dir_buf_cache_lookup(dnp, NULL, cnp, ctx, 1);
5341 }
5342
5343 /*
5344 * NFS V3 readdir (plus) RPC.
5345 */
5346 int
5347 nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
5348 {
5349 struct nfsmount *nmp;
5350 int error = 0, lockerror, nfsvers, rdirplus, bigcookies;
5351 int i, status, attrflag, fhflag, more_entries = 1, eof, bp_dropped = 0;
5352 uint32_t nmreaddirsize, nmrsize;
5353 uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
5354 uint64_t cookie, lastcookie, xid, savedxid, fileno;
5355 struct nfsm_chain nmreq, nmrep, nmrepsave;
5356 fhandle_t fh;
5357 struct nfs_vattr *nvattrp;
5358 struct nfs_dir_buf_header *ndbhp;
5359 struct direntry *dp;
5360 char *padstart, padlen;
5361 struct timeval now;
5362
5363 nmp = NFSTONMP(dnp);
5364 if (!nmp)
5365 return (ENXIO);
5366 nfsvers = nmp->nm_vers;
5367 nmreaddirsize = nmp->nm_readdirsize;
5368 nmrsize = nmp->nm_rsize;
5369 bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
5370 noplus:
5371 rdirplus = ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) ? 1 : 0;
5372
5373 if ((lockerror = nfs_node_lock(dnp)))
5374 return (lockerror);
5375
5376 /* determine cookie to use, and move dp to the right offset */
5377 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5378 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5379 if (ndbhp->ndbh_count) {
5380 for (i=0; i < ndbhp->ndbh_count-1; i++)
5381 dp = NFS_DIRENTRY_NEXT(dp);
5382 cookie = dp->d_seekoff;
5383 dp = NFS_DIRENTRY_NEXT(dp);
5384 } else {
5385 cookie = bp->nb_lblkno;
5386 /* increment with every buffer read */
5387 OSAddAtomic64(1, &nfsstats.readdir_bios);
5388 }
5389 lastcookie = cookie;
5390
5391 /*
5392 * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
5393 * the buffer is full (or we hit EOF). Then put the remainder of the
5394 * results in the next buffer(s).
5395 */
5396 nfsm_chain_null(&nmreq);
5397 nfsm_chain_null(&nmrep);
5398 while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
5399 nfsm_chain_build_alloc_init(error, &nmreq,
5400 NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers) + NFSX_UNSIGNED);
5401 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5402 if (nfsvers == NFS_VER3) {
5403 /* opaque values don't need swapping, but as long */
5404 /* as we are consistent about it, it should be ok */
5405 nfsm_chain_add_64(error, &nmreq, cookie);
5406 nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
5407 } else {
5408 nfsm_chain_add_32(error, &nmreq, cookie);
5409 }
5410 nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
5411 if (rdirplus)
5412 nfsm_chain_add_32(error, &nmreq, nmrsize);
5413 nfsm_chain_build_done(error, &nmreq);
5414 nfs_node_unlock(dnp);
5415 lockerror = ENOENT;
5416 nfsmout_if(error);
5417
5418 error = nfs_request(dnp, NULL, &nmreq,
5419 rdirplus ? NFSPROC_READDIRPLUS : NFSPROC_READDIR,
5420 ctx, NULL, &nmrep, &xid, &status);
5421
5422 if ((lockerror = nfs_node_lock(dnp)))
5423 error = lockerror;
5424
5425 savedxid = xid;
5426 if (nfsvers == NFS_VER3)
5427 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
5428 if (!error)
5429 error = status;
5430 if (nfsvers == NFS_VER3)
5431 nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
5432 nfsm_chain_get_32(error, &nmrep, more_entries);
5433
5434 if (!lockerror) {
5435 nfs_node_unlock(dnp);
5436 lockerror = ENOENT;
5437 }
5438 if (error == NFSERR_NOTSUPP) {
5439 /* oops... it doesn't look like readdirplus is supported */
5440 lck_mtx_lock(&nmp->nm_lock);
5441 NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
5442 lck_mtx_unlock(&nmp->nm_lock);
5443 goto noplus;
5444 }
5445 nfsmout_if(error);
5446
5447 if (rdirplus)
5448 microuptime(&now);
5449
5450 /* loop through the entries packing them into the buffer */
5451 while (more_entries) {
5452 if (nfsvers == NFS_VER3)
5453 nfsm_chain_get_64(error, &nmrep, fileno);
5454 else
5455 nfsm_chain_get_32(error, &nmrep, fileno);
5456 nfsm_chain_get_32(error, &nmrep, namlen);
5457 nfsmout_if(error);
5458 /* just truncate names that don't fit in direntry.d_name */
5459 if (namlen <= 0) {
5460 error = EBADRPC;
5461 goto nfsmout;
5462 }
5463 if (namlen > (sizeof(dp->d_name)-1)) {
5464 skiplen = namlen - sizeof(dp->d_name) + 1;
5465 namlen = sizeof(dp->d_name) - 1;
5466 } else {
5467 skiplen = 0;
5468 }
5469 /* guess that fh size will be same as parent */
5470 fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
5471 xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
5472 attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
5473 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
5474 space_needed = reclen + attrlen;
5475 space_free = nfs_dir_buf_freespace(bp, rdirplus);
5476 if (space_needed > space_free) {
5477 /*
5478 * We still have entries to pack, but we've
5479 * run out of room in the current buffer.
5480 * So we need to move to the next buffer.
5481 * The block# for the next buffer is the
5482 * last cookie in the current buffer.
5483 */
5484 nextbuffer:
5485 ndbhp->ndbh_flags |= NDB_FULL;
5486 nfs_buf_release(bp, 0);
5487 bp_dropped = 1;
5488 bp = NULL;
5489 error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
5490 nfsmout_if(error);
5491 /* initialize buffer */
5492 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
5493 ndbhp->ndbh_flags = 0;
5494 ndbhp->ndbh_count = 0;
5495 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
5496 ndbhp->ndbh_ncgen = dnp->n_ncgen;
5497 space_free = nfs_dir_buf_freespace(bp, rdirplus);
5498 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
5499 /* increment with every buffer read */
5500 OSAddAtomic64(1, &nfsstats.readdir_bios);
5501 }
5502 nmrepsave = nmrep;
5503 dp->d_fileno = fileno;
5504 dp->d_namlen = namlen;
5505 dp->d_reclen = reclen;
5506 dp->d_type = DT_UNKNOWN;
5507 nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
5508 nfsmout_if(error);
5509 dp->d_name[namlen] = '\0';
5510 if (skiplen)
5511 nfsm_chain_adv(error, &nmrep,
5512 nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
5513 if (nfsvers == NFS_VER3)
5514 nfsm_chain_get_64(error, &nmrep, cookie);
5515 else
5516 nfsm_chain_get_32(error, &nmrep, cookie);
5517 nfsmout_if(error);
5518 dp->d_seekoff = cookie;
5519 if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
5520 /* we've got a big cookie, make sure flag is set */
5521 lck_mtx_lock(&nmp->nm_lock);
5522 nmp->nm_state |= NFSSTA_BIGCOOKIES;
5523 lck_mtx_unlock(&nmp->nm_lock);
5524 bigcookies = 1;
5525 }
5526 if (rdirplus) {
5527 nvattrp = NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count);
5528 /* check for attributes */
5529 nfsm_chain_get_32(error, &nmrep, attrflag);
5530 nfsmout_if(error);
5531 if (attrflag) {
5532 /* grab attributes */
5533 error = nfs_parsefattr(&nmrep, NFS_VER3, nvattrp);
5534 nfsmout_if(error);
5535 dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
5536 /* fileid is already in d_fileno, so stash xid in attrs */
5537 nvattrp->nva_fileid = savedxid;
5538 } else {
5539 /* mark the attributes invalid */
5540 bzero(nvattrp, sizeof(struct nfs_vattr));
5541 }
5542 /* check for file handle */
5543 nfsm_chain_get_32(error, &nmrep, fhflag);
5544 nfsmout_if(error);
5545 if (fhflag) {
5546 nfsm_chain_get_fh(error, &nmrep, NFS_VER3, &fh);
5547 nfsmout_if(error);
5548 fhlen = fh.fh_len + 1;
5549 xlen = fhlen + sizeof(time_t);
5550 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
5551 space_needed = reclen + attrlen;
5552 if (space_needed > space_free) {
5553 /* didn't actually have the room... move on to next buffer */
5554 nmrep = nmrepsave;
5555 goto nextbuffer;
5556 }
5557 /* pack the file handle into the record */
5558 dp->d_name[dp->d_namlen+1] = fh.fh_len;
5559 bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len);
5560 } else {
5561 /* mark the file handle invalid */
5562 fh.fh_len = 0;
5563 fhlen = fh.fh_len + 1;
5564 xlen = fhlen + sizeof(time_t);
5565 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
5566 bzero(&dp->d_name[dp->d_namlen+1], fhlen);
5567 }
5568 *(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec;
5569 dp->d_reclen = reclen;
5570 }
5571 padstart = dp->d_name + dp->d_namlen + 1 + xlen;
5572 ndbhp->ndbh_count++;
5573 lastcookie = cookie;
5574 /* advance to next direntry in buffer */
5575 dp = NFS_DIRENTRY_NEXT(dp);
5576 ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
5577 /* zero out the pad bytes */
5578 padlen = (char*)dp - padstart;
5579 if (padlen > 0)
5580 bzero(padstart, padlen);
5581 /* check for more entries */
5582 nfsm_chain_get_32(error, &nmrep, more_entries);
5583 nfsmout_if(error);
5584 }
5585 /* Finally, get the eof boolean */
5586 nfsm_chain_get_32(error, &nmrep, eof);
5587 nfsmout_if(error);
5588 if (eof) {
5589 ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF);
5590 nfs_node_lock_force(dnp);
5591 dnp->n_eofcookie = lastcookie;
5592 nfs_node_unlock(dnp);
5593 } else {
5594 more_entries = 1;
5595 }
5596 if (bp_dropped) {
5597 nfs_buf_release(bp, 0);
5598 bp = NULL;
5599 break;
5600 }
5601 if ((lockerror = nfs_node_lock(dnp)))
5602 error = lockerror;
5603 nfsmout_if(error);
5604 nfsm_chain_cleanup(&nmrep);
5605 nfsm_chain_null(&nmreq);
5606 }
5607 nfsmout:
5608 if (bp_dropped && bp)
5609 nfs_buf_release(bp, 0);
5610 if (!lockerror)
5611 nfs_node_unlock(dnp);
5612 nfsm_chain_cleanup(&nmreq);
5613 nfsm_chain_cleanup(&nmrep);
5614 return (bp_dropped ? NFSERR_DIRBUFDROPPED : error);
5615 }
5616
5617 /*
5618 * Silly rename. To make the NFS filesystem that is stateless look a little
5619 * more like the "ufs" a remove of an active vnode is translated to a rename
5620 * to a funny looking filename that is removed by nfs_vnop_inactive on the
5621 * nfsnode. There is the potential for another process on a different client
5622 * to create the same funny name between when the lookitup() fails and the
5623 * rename() completes, but...
5624 */
5625
5626 /* format of "random" silly names - includes a number and pid */
5627 /* (note: shouldn't exceed size of nfs_sillyrename.nsr_name) */
5628 #define NFS_SILLYNAME_FORMAT ".nfs.%08x.%04x"
5629 /* starting from zero isn't silly enough */
5630 static uint32_t nfs_sillyrename_number = 0x20051025;
5631
5632 int
5633 nfs_sillyrename(
5634 nfsnode_t dnp,
5635 nfsnode_t np,
5636 struct componentname *cnp,
5637 vfs_context_t ctx)
5638 {
5639 struct nfs_sillyrename *nsp;
5640 int error;
5641 short pid;
5642 kauth_cred_t cred;
5643 uint32_t num;
5644 struct nfsmount *nmp;
5645
5646 nmp = NFSTONMP(dnp);
5647 if (!nmp)
5648 return (ENXIO);
5649
5650 nfs_name_cache_purge(dnp, np, cnp, ctx);
5651
5652 MALLOC_ZONE(nsp, struct nfs_sillyrename *,
5653 sizeof (struct nfs_sillyrename), M_NFSREQ, M_WAITOK);
5654 if (!nsp)
5655 return (ENOMEM);
5656 cred = vfs_context_ucred(ctx);
5657 kauth_cred_ref(cred);
5658 nsp->nsr_cred = cred;
5659 nsp->nsr_dnp = dnp;
5660 error = vnode_ref(NFSTOV(dnp));
5661 if (error)
5662 goto bad_norele;
5663
5664 /* Fudge together a funny name */
5665 pid = vfs_context_pid(ctx);
5666 num = OSAddAtomic(1, &nfs_sillyrename_number);
5667 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
5668 NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
5669 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name))
5670 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
5671
5672 /* Try lookitups until we get one that isn't there */
5673 while (nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, NULL) == 0) {
5674 num = OSAddAtomic(1, &nfs_sillyrename_number);
5675 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
5676 NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
5677 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name))
5678 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
5679 }
5680
5681 /* now, do the rename */
5682 error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
5683 dnp, nsp->nsr_name, nsp->nsr_namlen, ctx);
5684
5685 /* Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */
5686 if (error == ENOENT)
5687 error = 0;
5688 if (!error) {
5689 nfs_node_lock_force(dnp);
5690 if (dnp->n_flag & NNEGNCENTRIES) {
5691 dnp->n_flag &= ~NNEGNCENTRIES;
5692 cache_purge_negatives(NFSTOV(dnp));
5693 }
5694 nfs_node_unlock(dnp);
5695 }
5696 FSDBG(267, dnp, np, num, error);
5697 if (error)
5698 goto bad;
5699 error = nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, &np);
5700 nfs_node_lock_force(np);
5701 np->n_sillyrename = nsp;
5702 nfs_node_unlock(np);
5703 return (0);
5704 bad:
5705 vnode_rele(NFSTOV(dnp));
5706 bad_norele:
5707 nsp->nsr_cred = NOCRED;
5708 kauth_cred_unref(&cred);
5709 FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ);
5710 return (error);
5711 }
5712
5713 int
5714 nfs3_lookup_rpc_async(
5715 nfsnode_t dnp,
5716 char *name,
5717 int namelen,
5718 vfs_context_t ctx,
5719 struct nfsreq **reqp)
5720 {
5721 struct nfsmount *nmp;
5722 struct nfsm_chain nmreq;
5723 int error = 0, nfsvers;
5724
5725 nmp = NFSTONMP(dnp);
5726 if (!nmp)
5727 return (ENXIO);
5728 nfsvers = nmp->nm_vers;
5729
5730 nfsm_chain_null(&nmreq);
5731
5732 nfsm_chain_build_alloc_init(error, &nmreq,
5733 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
5734 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5735 nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
5736 nfsm_chain_build_done(error, &nmreq);
5737 nfsmout_if(error);
5738 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_LOOKUP,
5739 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, reqp);
5740 nfsmout:
5741 nfsm_chain_cleanup(&nmreq);
5742 return (error);
5743 }
5744
5745 int
5746 nfs3_lookup_rpc_async_finish(
5747 nfsnode_t dnp,
5748 __unused char *name,
5749 __unused int namelen,
5750 vfs_context_t ctx,
5751 struct nfsreq *req,
5752 u_int64_t *xidp,
5753 fhandle_t *fhp,
5754 struct nfs_vattr *nvap)
5755 {
5756 int error = 0, lockerror = ENOENT, status, nfsvers, attrflag;
5757 u_int64_t xid;
5758 struct nfsmount *nmp;
5759 struct nfsm_chain nmrep;
5760
5761 nmp = NFSTONMP(dnp);
5762 nfsvers = nmp->nm_vers;
5763
5764 nfsm_chain_null(&nmrep);
5765
5766 error = nfs_request_async_finish(req, &nmrep, xidp, &status);
5767
5768 if ((lockerror = nfs_node_lock(dnp)))
5769 error = lockerror;
5770 xid = *xidp;
5771 if (error || status) {
5772 if (nfsvers == NFS_VER3)
5773 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
5774 if (!error)
5775 error = status;
5776 goto nfsmout;
5777 }
5778
5779 nfsmout_if(error || !fhp || !nvap);
5780
5781 /* get the file handle */
5782 nfsm_chain_get_fh(error, &nmrep, nfsvers, fhp);
5783
5784 /* get the attributes */
5785 if (nfsvers == NFS_VER3) {
5786 nfsm_chain_postop_attr_get(error, &nmrep, attrflag, nvap);
5787 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
5788 if (!error && !attrflag)
5789 error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp);
5790 } else {
5791 error = nfs_parsefattr(&nmrep, nfsvers, nvap);
5792 }
5793 nfsmout:
5794 if (!lockerror)
5795 nfs_node_unlock(dnp);
5796 nfsm_chain_cleanup(&nmrep);
5797 return (error);
5798 }
5799
5800 /*
5801 * Look up a file name and optionally either update the file handle or
5802 * allocate an nfsnode, depending on the value of npp.
5803 * npp == NULL --> just do the lookup
5804 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
5805 * handled too
5806 * *npp != NULL --> update the file handle in the vnode
5807 */
5808 int
5809 nfs_lookitup(
5810 nfsnode_t dnp,
5811 char *name,
5812 int namelen,
5813 vfs_context_t ctx,
5814 nfsnode_t *npp)
5815 {
5816 int error = 0;
5817 nfsnode_t np, newnp = NULL;
5818 u_int64_t xid;
5819 fhandle_t fh;
5820 struct nfsmount *nmp;
5821 struct nfs_vattr nvattr;
5822 struct nfsreq rq, *req = &rq;
5823
5824 nmp = NFSTONMP(dnp);
5825 if (!nmp)
5826 return (ENXIO);
5827
5828 if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
5829 (namelen > (int)nmp->nm_fsattr.nfsa_maxname))
5830 return (ENAMETOOLONG);
5831
5832 NVATTR_INIT(&nvattr);
5833
5834 /* check for lookup of "." */
5835 if ((name[0] == '.') && (namelen == 1)) {
5836 /* skip lookup, we know who we are */
5837 fh.fh_len = 0;
5838 newnp = dnp;
5839 goto nfsmout;
5840 }
5841
5842 error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, name, namelen, ctx, &req);
5843 nfsmout_if(error);
5844 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, name, namelen, ctx, req, &xid, &fh, &nvattr);
5845 nfsmout_if(!npp || error);
5846
5847 if (*npp) {
5848 np = *npp;
5849 if (fh.fh_len != np->n_fhsize) {
5850 u_char *oldbuf = (np->n_fhsize > NFS_SMALLFH) ? np->n_fhp : NULL;
5851 if (fh.fh_len > NFS_SMALLFH) {
5852 MALLOC_ZONE(np->n_fhp, u_char *, fh.fh_len, M_NFSBIGFH, M_WAITOK);
5853 if (!np->n_fhp) {
5854 np->n_fhp = oldbuf;
5855 error = ENOMEM;
5856 goto nfsmout;
5857 }
5858 } else {
5859 np->n_fhp = &np->n_fh[0];
5860 }
5861 if (oldbuf)
5862 FREE_ZONE(oldbuf, np->n_fhsize, M_NFSBIGFH);
5863 }
5864 bcopy(fh.fh_data, np->n_fhp, fh.fh_len);
5865 np->n_fhsize = fh.fh_len;
5866 nfs_node_lock_force(np);
5867 error = nfs_loadattrcache(np, &nvattr, &xid, 0);
5868 nfs_node_unlock(np);
5869 nfsmout_if(error);
5870 newnp = np;
5871 } else if (NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) {
5872 nfs_node_lock_force(dnp);
5873 if (dnp->n_xid <= xid)
5874 error = nfs_loadattrcache(dnp, &nvattr, &xid, 0);
5875 nfs_node_unlock(dnp);
5876 nfsmout_if(error);
5877 newnp = dnp;
5878 } else {
5879 struct componentname cn, *cnp = &cn;
5880 bzero(cnp, sizeof(*cnp));
5881 cnp->cn_nameptr = name;
5882 cnp->cn_namelen = namelen;
5883 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
5884 &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
5885 nfsmout_if(error);
5886 newnp = np;
5887 }
5888
5889 nfsmout:
5890 if (npp && !*npp && !error)
5891 *npp = newnp;
5892 NVATTR_CLEANUP(&nvattr);
5893 return (error);
5894 }
5895
5896 /*
5897 * set up and initialize a "._" file lookup structure used for
5898 * performing async lookups.
5899 */
5900 void
5901 nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen, vfs_context_t ctx)
5902 {
5903 int error, du_namelen;
5904 vnode_t du_vp;
5905 struct nfsmount *nmp = NFSTONMP(dnp);
5906
5907 /* check for ._ file in name cache */
5908 dulp->du_flags = 0;
5909 bzero(&dulp->du_cn, sizeof(dulp->du_cn));
5910 du_namelen = namelen + 2;
5911 if (!nmp || NMFLAG(nmp, NONEGNAMECACHE))
5912 return;
5913 if ((namelen >= 2) && (name[0] == '.') && (name[1] == '_'))
5914 return;
5915 if (du_namelen >= (int)sizeof(dulp->du_smallname))
5916 MALLOC(dulp->du_cn.cn_nameptr, char *, du_namelen + 1, M_TEMP, M_WAITOK);
5917 else
5918 dulp->du_cn.cn_nameptr = dulp->du_smallname;
5919 if (!dulp->du_cn.cn_nameptr)
5920 return;
5921 dulp->du_cn.cn_namelen = du_namelen;
5922 snprintf(dulp->du_cn.cn_nameptr, du_namelen + 1, "._%s", name);
5923 dulp->du_cn.cn_nameptr[du_namelen] = '\0';
5924 dulp->du_cn.cn_nameiop = LOOKUP;
5925 dulp->du_cn.cn_flags = MAKEENTRY;
5926
5927 error = cache_lookup(NFSTOV(dnp), &du_vp, &dulp->du_cn);
5928 if (error == -1) {
5929 vnode_put(du_vp);
5930 } else if (!error) {
5931 nmp = NFSTONMP(dnp);
5932 if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
5933 /* if rdirplus, try dir buf cache lookup */
5934 nfsnode_t du_np = NULL;
5935 if (!nfs_dir_buf_cache_lookup(dnp, &du_np, &dulp->du_cn, ctx, 0) && du_np) {
5936 /* dir buf cache hit */
5937 du_vp = NFSTOV(du_np);
5938 vnode_put(du_vp);
5939 error = -1;
5940 }
5941 }
5942 if (!error)
5943 dulp->du_flags |= NFS_DULOOKUP_DOIT;
5944 }
5945 }
5946
5947 /*
5948 * start an async "._" file lookup request
5949 */
5950 void
5951 nfs_dulookup_start(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
5952 {
5953 struct nfsmount *nmp = NFSTONMP(dnp);
5954 struct nfsreq *req = &dulp->du_req;
5955
5956 if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT) || (dulp->du_flags & NFS_DULOOKUP_INPROG))
5957 return;
5958 if (!nmp->nm_funcs->nf_lookup_rpc_async(dnp, dulp->du_cn.cn_nameptr,
5959 dulp->du_cn.cn_namelen, ctx, &req))
5960 dulp->du_flags |= NFS_DULOOKUP_INPROG;
5961 }
5962
5963 /*
5964 * finish an async "._" file lookup request and clean up the structure
5965 */
5966 void
5967 nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
5968 {
5969 struct nfsmount *nmp = NFSTONMP(dnp);
5970 int error;
5971 nfsnode_t du_np;
5972 u_int64_t xid;
5973 fhandle_t fh;
5974 struct nfs_vattr nvattr;
5975
5976 if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_INPROG))
5977 goto out;
5978
5979 NVATTR_INIT(&nvattr);
5980 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, dulp->du_cn.cn_nameptr,
5981 dulp->du_cn.cn_namelen, ctx, &dulp->du_req, &xid, &fh, &nvattr);
5982 dulp->du_flags &= ~NFS_DULOOKUP_INPROG;
5983 if (error == ENOENT) {
5984 /* add a negative entry in the name cache */
5985 nfs_node_lock_force(dnp);
5986 cache_enter(NFSTOV(dnp), NULL, &dulp->du_cn);
5987 dnp->n_flag |= NNEGNCENTRIES;
5988 nfs_node_unlock(dnp);
5989 } else if (!error) {
5990 error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len,
5991 &nvattr, &xid, dulp->du_req.r_auth, NG_MAKEENTRY, &du_np);
5992 if (!error) {
5993 nfs_node_unlock(du_np);
5994 vnode_put(NFSTOV(du_np));
5995 }
5996 }
5997 NVATTR_CLEANUP(&nvattr);
5998 out:
5999 if (dulp->du_flags & NFS_DULOOKUP_INPROG)
6000 nfs_request_async_cancel(&dulp->du_req);
6001 if (dulp->du_cn.cn_nameptr && (dulp->du_cn.cn_nameptr != dulp->du_smallname))
6002 FREE(dulp->du_cn.cn_nameptr, M_TEMP);
6003 }
6004
6005
6006 /*
6007 * NFS Version 3 commit RPC
6008 */
6009 int
6010 nfs3_commit_rpc(
6011 nfsnode_t np,
6012 uint64_t offset,
6013 uint64_t count,
6014 kauth_cred_t cred,
6015 uint64_t wverf)
6016 {
6017 struct nfsmount *nmp;
6018 int error = 0, lockerror, status, wccpostattr = 0, nfsvers;
6019 struct timespec premtime = { 0, 0 };
6020 u_int64_t xid, newwverf;
6021 uint32_t count32;
6022 struct nfsm_chain nmreq, nmrep;
6023
6024 nmp = NFSTONMP(np);
6025 FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
6026 if (!nmp)
6027 return (ENXIO);
6028 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF))
6029 return (0);
6030 nfsvers = nmp->nm_vers;
6031
6032 if (count > UINT32_MAX)
6033 count32 = 0;
6034 else
6035 count32 = count;
6036
6037 nfsm_chain_null(&nmreq);
6038 nfsm_chain_null(&nmrep);
6039
6040 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
6041 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6042 nfsm_chain_add_64(error, &nmreq, offset);
6043 nfsm_chain_add_32(error, &nmreq, count32);
6044 nfsm_chain_build_done(error, &nmreq);
6045 nfsmout_if(error);
6046 error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT,
6047 current_thread(), cred, NULL, 0, &nmrep, &xid, &status);
6048 if ((lockerror = nfs_node_lock(np)))
6049 error = lockerror;
6050 /* can we do anything useful with the wcc info? */
6051 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
6052 if (!lockerror)
6053 nfs_node_unlock(np);
6054 if (!error)
6055 error = status;
6056 nfsm_chain_get_64(error, &nmrep, newwverf);
6057 nfsmout_if(error);
6058 lck_mtx_lock(&nmp->nm_lock);
6059 if (nmp->nm_verf != newwverf)
6060 nmp->nm_verf = newwverf;
6061 if (wverf != newwverf)
6062 error = NFSERR_STALEWRITEVERF;
6063 lck_mtx_unlock(&nmp->nm_lock);
6064 nfsmout:
6065 nfsm_chain_cleanup(&nmreq);
6066 nfsm_chain_cleanup(&nmrep);
6067 return (error);
6068 }
6069
6070
6071 int
6072 nfs_vnop_blockmap(
6073 __unused struct vnop_blockmap_args /* {
6074 struct vnodeop_desc *a_desc;
6075 vnode_t a_vp;
6076 off_t a_foffset;
6077 size_t a_size;
6078 daddr64_t *a_bpn;
6079 size_t *a_run;
6080 void *a_poff;
6081 int a_flags;
6082 } */ *ap)
6083 {
6084 return (ENOTSUP);
6085 }
6086
6087
6088 /*
6089 * fsync vnode op. Just call nfs_flush().
6090 */
6091 /* ARGSUSED */
6092 int
6093 nfs_vnop_fsync(
6094 struct vnop_fsync_args /* {
6095 struct vnodeop_desc *a_desc;
6096 vnode_t a_vp;
6097 int a_waitfor;
6098 vfs_context_t a_context;
6099 } */ *ap)
6100 {
6101 return (nfs_flush(VTONFS(ap->a_vp), ap->a_waitfor, vfs_context_thread(ap->a_context), 0));
6102 }
6103
6104
6105 /*
6106 * Do an NFS pathconf RPC.
6107 */
6108 int
6109 nfs3_pathconf_rpc(
6110 nfsnode_t np,
6111 struct nfs_fsattr *nfsap,
6112 vfs_context_t ctx)
6113 {
6114 u_int64_t xid;
6115 int error = 0, lockerror, status, nfsvers;
6116 struct nfsm_chain nmreq, nmrep;
6117 struct nfsmount *nmp = NFSTONMP(np);
6118 uint32_t val = 0;
6119
6120 if (!nmp)
6121 return (ENXIO);
6122 nfsvers = nmp->nm_vers;
6123
6124 nfsm_chain_null(&nmreq);
6125 nfsm_chain_null(&nmrep);
6126
6127 /* fetch pathconf info from server */
6128 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
6129 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6130 nfsm_chain_build_done(error, &nmreq);
6131 nfsmout_if(error);
6132 error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx, NULL, &nmrep, &xid, &status);
6133 if ((lockerror = nfs_node_lock(np)))
6134 error = lockerror;
6135 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
6136 if (!lockerror)
6137 nfs_node_unlock(np);
6138 if (!error)
6139 error = status;
6140 nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink);
6141 nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxname);
6142 nfsm_chain_get_32(error, &nmrep, val);
6143 if (val)
6144 nfsap->nfsa_flags |= NFS_FSFLAG_NO_TRUNC;
6145 nfsm_chain_get_32(error, &nmrep, val);
6146 if (val)
6147 nfsap->nfsa_flags |= NFS_FSFLAG_CHOWN_RESTRICTED;
6148 nfsm_chain_get_32(error, &nmrep, val);
6149 if (val)
6150 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_INSENSITIVE;
6151 nfsm_chain_get_32(error, &nmrep, val);
6152 if (val)
6153 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_PRESERVING;
6154 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK);
6155 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME);
6156 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC);
6157 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
6158 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
6159 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
6160 nfsmout:
6161 nfsm_chain_cleanup(&nmreq);
6162 nfsm_chain_cleanup(&nmrep);
6163 return (error);
6164 }
6165
6166 /* save pathconf info for NFSv3 mount */
6167 void
6168 nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap)
6169 {
6170 nmp->nm_fsattr.nfsa_maxlink = nfsap->nfsa_maxlink;
6171 nmp->nm_fsattr.nfsa_maxname = nfsap->nfsa_maxname;
6172 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC;
6173 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED;
6174 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE;
6175 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING;
6176 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXLINK);
6177 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
6178 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_NO_TRUNC);
6179 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
6180 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
6181 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
6182 nmp->nm_state |= NFSSTA_GOTPATHCONF;
6183 }
6184
6185 /*
6186 * Return POSIX pathconf information applicable to nfs.
6187 *
6188 * The NFS V2 protocol doesn't support this, so just return EINVAL
6189 * for V2.
6190 */
6191 /* ARGSUSED */
6192 int
6193 nfs_vnop_pathconf(
6194 struct vnop_pathconf_args /* {
6195 struct vnodeop_desc *a_desc;
6196 vnode_t a_vp;
6197 int a_name;
6198 int32_t *a_retval;
6199 vfs_context_t a_context;
6200 } */ *ap)
6201 {
6202 vnode_t vp = ap->a_vp;
6203 nfsnode_t np = VTONFS(vp);
6204 struct nfsmount *nmp;
6205 struct nfs_fsattr nfsa, *nfsap;
6206 int error = 0;
6207 uint64_t maxFileSize;
6208 uint nbits;
6209
6210 nmp = VTONMP(vp);
6211 if (!nmp)
6212 return (ENXIO);
6213
6214 switch (ap->a_name) {
6215 case _PC_LINK_MAX:
6216 case _PC_NAME_MAX:
6217 case _PC_CHOWN_RESTRICTED:
6218 case _PC_NO_TRUNC:
6219 case _PC_CASE_SENSITIVE:
6220 case _PC_CASE_PRESERVING:
6221 break;
6222 case _PC_FILESIZEBITS:
6223 if (nmp->nm_vers == NFS_VER2) {
6224 *ap->a_retval = 32;
6225 return (0);
6226 }
6227 break;
6228 case _PC_XATTR_SIZE_BITS:
6229 /* Do we support xattrs natively? */
6230 if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
6231 break; /* Yes */
6232 /* No... so just return an error */
6233 /* FALLTHROUGH */
6234 default:
6235 /* don't bother contacting the server if we know the answer */
6236 return (EINVAL);
6237 }
6238
6239 if (nmp->nm_vers == NFS_VER2)
6240 return (EINVAL);
6241
6242 lck_mtx_lock(&nmp->nm_lock);
6243 if (nmp->nm_vers == NFS_VER3) {
6244 if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) {
6245 /* no pathconf info cached */
6246 lck_mtx_unlock(&nmp->nm_lock);
6247 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
6248 error = nfs3_pathconf_rpc(np, &nfsa, ap->a_context);
6249 if (error)
6250 return (error);
6251 nmp = VTONMP(vp);
6252 if (!nmp)
6253 return (ENXIO);
6254 lck_mtx_lock(&nmp->nm_lock);
6255 if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS) {
6256 /* all files have the same pathconf info, */
6257 /* so cache a copy of the results */
6258 nfs3_pathconf_cache(nmp, &nfsa);
6259 }
6260 nfsap = &nfsa;
6261 } else {
6262 nfsap = &nmp->nm_fsattr;
6263 }
6264 } else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
6265 /* no pathconf info cached */
6266 lck_mtx_unlock(&nmp->nm_lock);
6267 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
6268 error = nfs4_pathconf_rpc(np, &nfsa, ap->a_context);
6269 if (error)
6270 return (error);
6271 nmp = VTONMP(vp);
6272 if (!nmp)
6273 return (ENXIO);
6274 lck_mtx_lock(&nmp->nm_lock);
6275 nfsap = &nfsa;
6276 } else {
6277 nfsap = &nmp->nm_fsattr;
6278 }
6279
6280 switch (ap->a_name) {
6281 case _PC_LINK_MAX:
6282 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK))
6283 *ap->a_retval = nfsap->nfsa_maxlink;
6284 else if ((nmp->nm_vers == NFS_VER4) && NFS_BITMAP_ISSET(np->n_vattr.nva_bitmap, NFS_FATTR_MAXLINK))
6285 *ap->a_retval = np->n_vattr.nva_maxlink;
6286 else
6287 error = EINVAL;
6288 break;
6289 case _PC_NAME_MAX:
6290 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME))
6291 *ap->a_retval = nfsap->nfsa_maxname;
6292 else
6293 error = EINVAL;
6294 break;
6295 case _PC_CHOWN_RESTRICTED:
6296 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED))
6297 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0;
6298 else
6299 error = EINVAL;
6300 break;
6301 case _PC_NO_TRUNC:
6302 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC))
6303 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0;
6304 else
6305 error = EINVAL;
6306 break;
6307 case _PC_CASE_SENSITIVE:
6308 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE))
6309 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1;
6310 else
6311 error = EINVAL;
6312 break;
6313 case _PC_CASE_PRESERVING:
6314 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING))
6315 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0;
6316 else
6317 error = EINVAL;
6318 break;
6319 case _PC_XATTR_SIZE_BITS: /* same as file size bits if named attrs supported */
6320 case _PC_FILESIZEBITS:
6321 if (!NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
6322 *ap->a_retval = 64;
6323 error = 0;
6324 break;
6325 }
6326 maxFileSize = nfsap->nfsa_maxfilesize;
6327 nbits = 1;
6328 if (maxFileSize & 0xffffffff00000000ULL) {
6329 nbits += 32;
6330 maxFileSize >>= 32;
6331 }
6332 if (maxFileSize & 0xffff0000) {
6333 nbits += 16;
6334 maxFileSize >>= 16;
6335 }
6336 if (maxFileSize & 0xff00) {
6337 nbits += 8;
6338 maxFileSize >>= 8;
6339 }
6340 if (maxFileSize & 0xf0) {
6341 nbits += 4;
6342 maxFileSize >>= 4;
6343 }
6344 if (maxFileSize & 0xc) {
6345 nbits += 2;
6346 maxFileSize >>= 2;
6347 }
6348 if (maxFileSize & 0x2) {
6349 nbits += 1;
6350 }
6351 *ap->a_retval = nbits;
6352 break;
6353 default:
6354 error = EINVAL;
6355 }
6356
6357 lck_mtx_unlock(&nmp->nm_lock);
6358
6359 return (error);
6360 }
6361
6362 /*
6363 * Read wrapper for special devices.
6364 */
6365 int
6366 nfsspec_vnop_read(
6367 struct vnop_read_args /* {
6368 struct vnodeop_desc *a_desc;
6369 vnode_t a_vp;
6370 struct uio *a_uio;
6371 int a_ioflag;
6372 vfs_context_t a_context;
6373 } */ *ap)
6374 {
6375 nfsnode_t np = VTONFS(ap->a_vp);
6376 struct timeval now;
6377 int error;
6378
6379 /*
6380 * Set access flag.
6381 */
6382 if ((error = nfs_node_lock(np)))
6383 return (error);
6384 np->n_flag |= NACC;
6385 microtime(&now);
6386 np->n_atim.tv_sec = now.tv_sec;
6387 np->n_atim.tv_nsec = now.tv_usec * 1000;
6388 nfs_node_unlock(np);
6389 return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap));
6390 }
6391
6392 /*
6393 * Write wrapper for special devices.
6394 */
6395 int
6396 nfsspec_vnop_write(
6397 struct vnop_write_args /* {
6398 struct vnodeop_desc *a_desc;
6399 vnode_t a_vp;
6400 struct uio *a_uio;
6401 int a_ioflag;
6402 vfs_context_t a_context;
6403 } */ *ap)
6404 {
6405 nfsnode_t np = VTONFS(ap->a_vp);
6406 struct timeval now;
6407 int error;
6408
6409 /*
6410 * Set update flag.
6411 */
6412 if ((error = nfs_node_lock(np)))
6413 return (error);
6414 np->n_flag |= NUPD;
6415 microtime(&now);
6416 np->n_mtim.tv_sec = now.tv_sec;
6417 np->n_mtim.tv_nsec = now.tv_usec * 1000;
6418 nfs_node_unlock(np);
6419 return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap));
6420 }
6421
6422 /*
6423 * Close wrapper for special devices.
6424 *
6425 * Update the times on the nfsnode then do device close.
6426 */
6427 int
6428 nfsspec_vnop_close(
6429 struct vnop_close_args /* {
6430 struct vnodeop_desc *a_desc;
6431 vnode_t a_vp;
6432 int a_fflag;
6433 vfs_context_t a_context;
6434 } */ *ap)
6435 {
6436 vnode_t vp = ap->a_vp;
6437 nfsnode_t np = VTONFS(vp);
6438 struct vnode_attr vattr;
6439 mount_t mp;
6440 int error;
6441
6442 if ((error = nfs_node_lock(np)))
6443 return (error);
6444 if (np->n_flag & (NACC | NUPD)) {
6445 np->n_flag |= NCHG;
6446 if (!vnode_isinuse(vp, 0) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
6447 VATTR_INIT(&vattr);
6448 if (np->n_flag & NACC) {
6449 vattr.va_access_time = np->n_atim;
6450 VATTR_SET_ACTIVE(&vattr, va_access_time);
6451 }
6452 if (np->n_flag & NUPD) {
6453 vattr.va_modify_time = np->n_mtim;
6454 VATTR_SET_ACTIVE(&vattr, va_modify_time);
6455 }
6456 nfs_node_unlock(np);
6457 vnode_setattr(vp, &vattr, ap->a_context);
6458 } else {
6459 nfs_node_unlock(np);
6460 }
6461 } else {
6462 nfs_node_unlock(np);
6463 }
6464 return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap));
6465 }
6466
6467 #if FIFO
6468 extern vnop_t **fifo_vnodeop_p;
6469
6470 /*
6471 * Read wrapper for fifos.
6472 */
6473 int
6474 nfsfifo_vnop_read(
6475 struct vnop_read_args /* {
6476 struct vnodeop_desc *a_desc;
6477 vnode_t a_vp;
6478 struct uio *a_uio;
6479 int a_ioflag;
6480 vfs_context_t a_context;
6481 } */ *ap)
6482 {
6483 nfsnode_t np = VTONFS(ap->a_vp);
6484 struct timeval now;
6485 int error;
6486
6487 /*
6488 * Set access flag.
6489 */
6490 if ((error = nfs_node_lock(np)))
6491 return (error);
6492 np->n_flag |= NACC;
6493 microtime(&now);
6494 np->n_atim.tv_sec = now.tv_sec;
6495 np->n_atim.tv_nsec = now.tv_usec * 1000;
6496 nfs_node_unlock(np);
6497 return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap));
6498 }
6499
6500 /*
6501 * Write wrapper for fifos.
6502 */
6503 int
6504 nfsfifo_vnop_write(
6505 struct vnop_write_args /* {
6506 struct vnodeop_desc *a_desc;
6507 vnode_t a_vp;
6508 struct uio *a_uio;
6509 int a_ioflag;
6510 vfs_context_t a_context;
6511 } */ *ap)
6512 {
6513 nfsnode_t np = VTONFS(ap->a_vp);
6514 struct timeval now;
6515 int error;
6516
6517 /*
6518 * Set update flag.
6519 */
6520 if ((error = nfs_node_lock(np)))
6521 return (error);
6522 np->n_flag |= NUPD;
6523 microtime(&now);
6524 np->n_mtim.tv_sec = now.tv_sec;
6525 np->n_mtim.tv_nsec = now.tv_usec * 1000;
6526 nfs_node_unlock(np);
6527 return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap));
6528 }
6529
6530 /*
6531 * Close wrapper for fifos.
6532 *
6533 * Update the times on the nfsnode then do fifo close.
6534 */
6535 int
6536 nfsfifo_vnop_close(
6537 struct vnop_close_args /* {
6538 struct vnodeop_desc *a_desc;
6539 vnode_t a_vp;
6540 int a_fflag;
6541 vfs_context_t a_context;
6542 } */ *ap)
6543 {
6544 vnode_t vp = ap->a_vp;
6545 nfsnode_t np = VTONFS(vp);
6546 struct vnode_attr vattr;
6547 struct timeval now;
6548 mount_t mp;
6549 int error;
6550
6551 if ((error = nfs_node_lock(np)))
6552 return (error);
6553 if (np->n_flag & (NACC | NUPD)) {
6554 microtime(&now);
6555 if (np->n_flag & NACC) {
6556 np->n_atim.tv_sec = now.tv_sec;
6557 np->n_atim.tv_nsec = now.tv_usec * 1000;
6558 }
6559 if (np->n_flag & NUPD) {
6560 np->n_mtim.tv_sec = now.tv_sec;
6561 np->n_mtim.tv_nsec = now.tv_usec * 1000;
6562 }
6563 np->n_flag |= NCHG;
6564 if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
6565 VATTR_INIT(&vattr);
6566 if (np->n_flag & NACC) {
6567 vattr.va_access_time = np->n_atim;
6568 VATTR_SET_ACTIVE(&vattr, va_access_time);
6569 }
6570 if (np->n_flag & NUPD) {
6571 vattr.va_modify_time = np->n_mtim;
6572 VATTR_SET_ACTIVE(&vattr, va_modify_time);
6573 }
6574 nfs_node_unlock(np);
6575 vnode_setattr(vp, &vattr, ap->a_context);
6576 } else {
6577 nfs_node_unlock(np);
6578 }
6579 } else {
6580 nfs_node_unlock(np);
6581 }
6582 return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap));
6583 }
6584 #endif /* FIFO */
6585
6586 /*ARGSUSED*/
6587 int
6588 nfs_vnop_ioctl(
6589 struct vnop_ioctl_args /* {
6590 struct vnodeop_desc *a_desc;
6591 vnode_t a_vp;
6592 u_int32_t a_command;
6593 caddr_t a_data;
6594 int a_fflag;
6595 vfs_context_t a_context;
6596 } */ *ap)
6597 {
6598 vfs_context_t ctx = ap->a_context;
6599 vnode_t vp = ap->a_vp;
6600 int error = ENOTTY;
6601
6602 switch (ap->a_command) {
6603
6604 case F_FULLFSYNC:
6605 if (vnode_vfsisrdonly(vp))
6606 return (EROFS);
6607 if (!VTONMP(vp))
6608 return (ENXIO);
6609 error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0);
6610 break;
6611
6612 }
6613
6614 return (error);
6615 }
6616
6617 /*ARGSUSED*/
6618 int
6619 nfs_vnop_select(
6620 __unused struct vnop_select_args /* {
6621 struct vnodeop_desc *a_desc;
6622 vnode_t a_vp;
6623 int a_which;
6624 int a_fflags;
6625 void *a_wql;
6626 vfs_context_t a_context;
6627 } */ *ap)
6628 {
6629
6630 /*
6631 * We were once bogusly seltrue() which returns 1. Is this right?
6632 */
6633 return (1);
6634 }
6635
6636 /*
6637 * vnode OP for pagein using UPL
6638 *
6639 * No buffer I/O, just RPCs straight into the mapped pages.
6640 */
6641 int
6642 nfs_vnop_pagein(
6643 struct vnop_pagein_args /* {
6644 struct vnodeop_desc *a_desc;
6645 vnode_t a_vp;
6646 upl_t a_pl;
6647 vm_offset_t a_pl_offset;
6648 off_t a_f_offset;
6649 size_t a_size;
6650 int a_flags;
6651 vfs_context_t a_context;
6652 } */ *ap)
6653 {
6654 vnode_t vp = ap->a_vp;
6655 upl_t pl = ap->a_pl;
6656 size_t size = ap->a_size;
6657 off_t f_offset = ap->a_f_offset;
6658 vm_offset_t pl_offset = ap->a_pl_offset;
6659 int flags = ap->a_flags;
6660 thread_t thd;
6661 kauth_cred_t cred;
6662 nfsnode_t np = VTONFS(vp);
6663 size_t nmrsize, iosize, txsize, rxsize, retsize;
6664 off_t txoffset;
6665 struct nfsmount *nmp;
6666 int error = 0;
6667 vm_offset_t ioaddr, rxaddr;
6668 uio_t uio;
6669 char uio_buf [ UIO_SIZEOF(1) ];
6670 int nofreeupl = flags & UPL_NOCOMMIT;
6671 upl_page_info_t *plinfo;
6672 #define MAXPAGINGREQS 16 /* max outstanding RPCs for pagein/pageout */
6673 struct nfsreq *req[MAXPAGINGREQS];
6674 int nextsend, nextwait;
6675 uint32_t stategenid = 0, restart = 0;
6676 kern_return_t kret;
6677
6678 FSDBG(322, np, f_offset, size, flags);
6679 if (pl == (upl_t)NULL)
6680 panic("nfs_pagein: no upl");
6681
6682 if (size <= 0) {
6683 printf("nfs_pagein: invalid size %ld", size);
6684 if (!nofreeupl)
6685 (void) ubc_upl_abort_range(pl, pl_offset, size, 0);
6686 return (EINVAL);
6687 }
6688 if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) {
6689 if (!nofreeupl)
6690 ubc_upl_abort_range(pl, pl_offset, size,
6691 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
6692 return (EINVAL);
6693 }
6694
6695 thd = vfs_context_thread(ap->a_context);
6696 cred = ubc_getcred(vp);
6697 if (!IS_VALID_CRED(cred))
6698 cred = vfs_context_ucred(ap->a_context);
6699
6700 uio = uio_createwithbuffer(1, f_offset, UIO_SYSSPACE, UIO_READ,
6701 &uio_buf, sizeof(uio_buf));
6702
6703 nmp = VTONMP(vp);
6704 if (!nmp) {
6705 if (!nofreeupl)
6706 ubc_upl_abort_range(pl, pl_offset, size,
6707 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
6708 return (ENXIO);
6709 }
6710 nmrsize = nmp->nm_rsize;
6711
6712 plinfo = ubc_upl_pageinfo(pl);
6713 kret = ubc_upl_map(pl, &ioaddr);
6714 if (kret != KERN_SUCCESS)
6715 panic("nfs_vnop_pagein: ubc_upl_map() failed with (%d)", kret);
6716 ioaddr += pl_offset;
6717
6718 tryagain:
6719 if (nmp->nm_vers >= NFS_VER4)
6720 stategenid = nmp->nm_stategenid;
6721 txsize = rxsize = size;
6722 txoffset = f_offset;
6723 rxaddr = ioaddr;
6724
6725 bzero(req, sizeof(req));
6726 nextsend = nextwait = 0;
6727 do {
6728 if (np->n_flag & NREVOKE) {
6729 error = EIO;
6730 break;
6731 }
6732 /* send requests while we need to and have available slots */
6733 while ((txsize > 0) && (req[nextsend] == NULL)) {
6734 iosize = MIN(nmrsize, txsize);
6735 if ((error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, iosize, thd, cred, NULL, &req[nextsend]))) {
6736 req[nextsend] = NULL;
6737 break;
6738 }
6739 txoffset += iosize;
6740 txsize -= iosize;
6741 nextsend = (nextsend + 1) % MAXPAGINGREQS;
6742 }
6743 /* wait while we need to and break out if more requests to send */
6744 while ((rxsize > 0) && req[nextwait]) {
6745 iosize = retsize = MIN(nmrsize, rxsize);
6746 uio_reset(uio, uio_offset(uio), UIO_SYSSPACE, UIO_READ);
6747 uio_addiov(uio, CAST_USER_ADDR_T(rxaddr), iosize);
6748 FSDBG(322, uio_offset(uio), uio_resid(uio), rxaddr, rxsize);
6749 #if UPL_DEBUG
6750 upl_ubc_alias_set(pl, (uintptr_t) current_thread(), (uintptr_t) 2);
6751 #endif /* UPL_DEBUG */
6752 OSAddAtomic64(1, &nfsstats.pageins);
6753 error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL);
6754 req[nextwait] = NULL;
6755 nextwait = (nextwait + 1) % MAXPAGINGREQS;
6756 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
6757 lck_mtx_lock(&nmp->nm_lock);
6758 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
6759 NP(np, "nfs_vnop_pagein: error %d, initiating recovery", error);
6760 nfs_need_recover(nmp, error);
6761 }
6762 lck_mtx_unlock(&nmp->nm_lock);
6763 restart++;
6764 goto cancel;
6765 }
6766 if (error) {
6767 FSDBG(322, uio_offset(uio), uio_resid(uio), error, -1);
6768 break;
6769 }
6770 if (retsize < iosize) {
6771 /* Just zero fill the rest of the valid area. */
6772 int zcnt = iosize - retsize;
6773 bzero((char *)rxaddr + retsize, zcnt);
6774 FSDBG(324, uio_offset(uio), retsize, zcnt, rxaddr);
6775 uio_update(uio, zcnt);
6776 }
6777 rxaddr += iosize;
6778 rxsize -= iosize;
6779 if (txsize)
6780 break;
6781 }
6782 } while (!error && (txsize || rxsize));
6783
6784 restart = 0;
6785
6786 if (error) {
6787 cancel:
6788 /* cancel any outstanding requests */
6789 while (req[nextwait]) {
6790 nfs_request_async_cancel(req[nextwait]);
6791 req[nextwait] = NULL;
6792 nextwait = (nextwait + 1) % MAXPAGINGREQS;
6793 }
6794 if (np->n_flag & NREVOKE) {
6795 error = EIO;
6796 } else if (restart) {
6797 if (restart <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
6798 if (error == NFSERR_GRACE)
6799 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
6800 if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
6801 goto tryagain;
6802 } else {
6803 NP(np, "nfs_pagein: too many restarts, aborting");
6804 }
6805 }
6806 }
6807
6808 ubc_upl_unmap(pl);
6809
6810 if (!nofreeupl) {
6811 if (error)
6812 ubc_upl_abort_range(pl, pl_offset, size,
6813 UPL_ABORT_ERROR |
6814 UPL_ABORT_FREE_ON_EMPTY);
6815 else
6816 ubc_upl_commit_range(pl, pl_offset, size,
6817 UPL_COMMIT_CLEAR_DIRTY |
6818 UPL_COMMIT_FREE_ON_EMPTY);
6819 }
6820 return (error);
6821 }
6822
6823
6824 /*
6825 * the following are needed only by nfs_pageout to know how to handle errors
6826 * see nfs_pageout comments on explanation of actions.
6827 * the errors here are copied from errno.h and errors returned by servers
6828 * are expected to match the same numbers here. If not, our actions maybe
6829 * erroneous.
6830 */
6831 char nfs_pageouterrorhandler(int);
6832 enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER};
6833 #define NFS_ELAST 88
6834 static u_char errorcount[NFS_ELAST+1]; /* better be zeros when initialized */
6835 static const char errortooutcome[NFS_ELAST+1] = {
6836 NOACTION,
6837 DUMP, /* EPERM 1 Operation not permitted */
6838 DUMP, /* ENOENT 2 No such file or directory */
6839 DUMPANDLOG, /* ESRCH 3 No such process */
6840 RETRY, /* EINTR 4 Interrupted system call */
6841 DUMP, /* EIO 5 Input/output error */
6842 DUMP, /* ENXIO 6 Device not configured */
6843 DUMPANDLOG, /* E2BIG 7 Argument list too long */
6844 DUMPANDLOG, /* ENOEXEC 8 Exec format error */
6845 DUMPANDLOG, /* EBADF 9 Bad file descriptor */
6846 DUMPANDLOG, /* ECHILD 10 No child processes */
6847 DUMPANDLOG, /* EDEADLK 11 Resource deadlock avoided - was EAGAIN */
6848 RETRY, /* ENOMEM 12 Cannot allocate memory */
6849 DUMP, /* EACCES 13 Permission denied */
6850 DUMPANDLOG, /* EFAULT 14 Bad address */
6851 DUMPANDLOG, /* ENOTBLK 15 POSIX - Block device required */
6852 RETRY, /* EBUSY 16 Device busy */
6853 DUMP, /* EEXIST 17 File exists */
6854 DUMP, /* EXDEV 18 Cross-device link */
6855 DUMP, /* ENODEV 19 Operation not supported by device */
6856 DUMP, /* ENOTDIR 20 Not a directory */
6857 DUMP, /* EISDIR 21 Is a directory */
6858 DUMP, /* EINVAL 22 Invalid argument */
6859 DUMPANDLOG, /* ENFILE 23 Too many open files in system */
6860 DUMPANDLOG, /* EMFILE 24 Too many open files */
6861 DUMPANDLOG, /* ENOTTY 25 Inappropriate ioctl for device */
6862 DUMPANDLOG, /* ETXTBSY 26 Text file busy - POSIX */
6863 DUMP, /* EFBIG 27 File too large */
6864 DUMP, /* ENOSPC 28 No space left on device */
6865 DUMPANDLOG, /* ESPIPE 29 Illegal seek */
6866 DUMP, /* EROFS 30 Read-only file system */
6867 DUMP, /* EMLINK 31 Too many links */
6868 RETRY, /* EPIPE 32 Broken pipe */
6869 /* math software */
6870 DUMPANDLOG, /* EDOM 33 Numerical argument out of domain */
6871 DUMPANDLOG, /* ERANGE 34 Result too large */
6872 RETRY, /* EAGAIN/EWOULDBLOCK 35 Resource temporarily unavailable */
6873 DUMPANDLOG, /* EINPROGRESS 36 Operation now in progress */
6874 DUMPANDLOG, /* EALREADY 37 Operation already in progress */
6875 /* ipc/network software -- argument errors */
6876 DUMPANDLOG, /* ENOTSOC 38 Socket operation on non-socket */
6877 DUMPANDLOG, /* EDESTADDRREQ 39 Destination address required */
6878 DUMPANDLOG, /* EMSGSIZE 40 Message too long */
6879 DUMPANDLOG, /* EPROTOTYPE 41 Protocol wrong type for socket */
6880 DUMPANDLOG, /* ENOPROTOOPT 42 Protocol not available */
6881 DUMPANDLOG, /* EPROTONOSUPPORT 43 Protocol not supported */
6882 DUMPANDLOG, /* ESOCKTNOSUPPORT 44 Socket type not supported */
6883 DUMPANDLOG, /* ENOTSUP 45 Operation not supported */
6884 DUMPANDLOG, /* EPFNOSUPPORT 46 Protocol family not supported */
6885 DUMPANDLOG, /* EAFNOSUPPORT 47 Address family not supported by protocol family */
6886 DUMPANDLOG, /* EADDRINUSE 48 Address already in use */
6887 DUMPANDLOG, /* EADDRNOTAVAIL 49 Can't assign requested address */
6888 /* ipc/network software -- operational errors */
6889 RETRY, /* ENETDOWN 50 Network is down */
6890 RETRY, /* ENETUNREACH 51 Network is unreachable */
6891 RETRY, /* ENETRESET 52 Network dropped connection on reset */
6892 RETRY, /* ECONNABORTED 53 Software caused connection abort */
6893 RETRY, /* ECONNRESET 54 Connection reset by peer */
6894 RETRY, /* ENOBUFS 55 No buffer space available */
6895 RETRY, /* EISCONN 56 Socket is already connected */
6896 RETRY, /* ENOTCONN 57 Socket is not connected */
6897 RETRY, /* ESHUTDOWN 58 Can't send after socket shutdown */
6898 RETRY, /* ETOOMANYREFS 59 Too many references: can't splice */
6899 RETRY, /* ETIMEDOUT 60 Operation timed out */
6900 RETRY, /* ECONNREFUSED 61 Connection refused */
6901
6902 DUMPANDLOG, /* ELOOP 62 Too many levels of symbolic links */
6903 DUMP, /* ENAMETOOLONG 63 File name too long */
6904 RETRY, /* EHOSTDOWN 64 Host is down */
6905 RETRY, /* EHOSTUNREACH 65 No route to host */
6906 DUMP, /* ENOTEMPTY 66 Directory not empty */
6907 /* quotas & mush */
6908 DUMPANDLOG, /* PROCLIM 67 Too many processes */
6909 DUMPANDLOG, /* EUSERS 68 Too many users */
6910 DUMPANDLOG, /* EDQUOT 69 Disc quota exceeded */
6911 /* Network File System */
6912 DUMP, /* ESTALE 70 Stale NFS file handle */
6913 DUMP, /* EREMOTE 71 Too many levels of remote in path */
6914 DUMPANDLOG, /* EBADRPC 72 RPC struct is bad */
6915 DUMPANDLOG, /* ERPCMISMATCH 73 RPC version wrong */
6916 DUMPANDLOG, /* EPROGUNAVAIL 74 RPC prog. not avail */
6917 DUMPANDLOG, /* EPROGMISMATCH 75 Program version wrong */
6918 DUMPANDLOG, /* EPROCUNAVAIL 76 Bad procedure for program */
6919
6920 DUMPANDLOG, /* ENOLCK 77 No locks available */
6921 DUMPANDLOG, /* ENOSYS 78 Function not implemented */
6922 DUMPANDLOG, /* EFTYPE 79 Inappropriate file type or format */
6923 DUMPANDLOG, /* EAUTH 80 Authentication error */
6924 DUMPANDLOG, /* ENEEDAUTH 81 Need authenticator */
6925 /* Intelligent device errors */
6926 DUMPANDLOG, /* EPWROFF 82 Device power is off */
6927 DUMPANDLOG, /* EDEVERR 83 Device error, e.g. paper out */
6928 DUMPANDLOG, /* EOVERFLOW 84 Value too large to be stored in data type */
6929 /* Program loading errors */
6930 DUMPANDLOG, /* EBADEXEC 85 Bad executable */
6931 DUMPANDLOG, /* EBADARCH 86 Bad CPU type in executable */
6932 DUMPANDLOG, /* ESHLIBVERS 87 Shared library version mismatch */
6933 DUMPANDLOG, /* EBADMACHO 88 Malformed Macho file */
6934 };
6935
6936 char
6937 nfs_pageouterrorhandler(int error)
6938 {
6939 if (error > NFS_ELAST)
6940 return(DUMP);
6941 else
6942 return(errortooutcome[error]);
6943 }
6944
6945
6946 /*
6947 * vnode OP for pageout using UPL
6948 *
6949 * No buffer I/O, just RPCs straight from the mapped pages.
6950 * File size changes are not permitted in pageout.
6951 */
6952 int
6953 nfs_vnop_pageout(
6954 struct vnop_pageout_args /* {
6955 struct vnodeop_desc *a_desc;
6956 vnode_t a_vp;
6957 upl_t a_pl;
6958 vm_offset_t a_pl_offset;
6959 off_t a_f_offset;
6960 size_t a_size;
6961 int a_flags;
6962 vfs_context_t a_context;
6963 } */ *ap)
6964 {
6965 vnode_t vp = ap->a_vp;
6966 upl_t pl = ap->a_pl;
6967 size_t size = ap->a_size;
6968 off_t f_offset = ap->a_f_offset;
6969 vm_offset_t pl_offset = ap->a_pl_offset;
6970 int flags = ap->a_flags;
6971 nfsnode_t np = VTONFS(vp);
6972 thread_t thd;
6973 kauth_cred_t cred;
6974 struct nfsbuf *bp;
6975 struct nfsmount *nmp = VTONMP(vp);
6976 daddr64_t lbn;
6977 int error = 0, iomode;
6978 off_t off, txoffset, rxoffset;
6979 vm_offset_t ioaddr, txaddr, rxaddr;
6980 uio_t auio;
6981 char uio_buf [ UIO_SIZEOF(1) ];
6982 int nofreeupl = flags & UPL_NOCOMMIT;
6983 size_t nmwsize, biosize, iosize, pgsize, txsize, rxsize, xsize, remsize;
6984 struct nfsreq *req[MAXPAGINGREQS];
6985 int nextsend, nextwait, wverfset, commit;
6986 uint64_t wverf, wverf2;
6987 uint32_t stategenid = 0, vrestart = 0, restart = 0, vrestarts = 0, restarts = 0;
6988 kern_return_t kret;
6989
6990 FSDBG(323, f_offset, size, pl, pl_offset);
6991
6992 if (pl == (upl_t)NULL)
6993 panic("nfs_pageout: no upl");
6994
6995 if (size <= 0) {
6996 printf("nfs_pageout: invalid size %ld", size);
6997 if (!nofreeupl)
6998 ubc_upl_abort_range(pl, pl_offset, size, 0);
6999 return (EINVAL);
7000 }
7001
7002 if (!nmp) {
7003 if (!nofreeupl)
7004 ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
7005 return (ENXIO);
7006 }
7007 biosize = nmp->nm_biosize;
7008 nmwsize = nmp->nm_wsize;
7009
7010 nfs_data_lock_noupdate(np, NFS_DATA_LOCK_SHARED);
7011
7012 /*
7013 * Check to see whether the buffer is incore.
7014 * If incore and not busy, invalidate it from the cache.
7015 */
7016 for (iosize = 0; iosize < size; iosize += xsize) {
7017 off = f_offset + iosize;
7018 /* need make sure we do things on block boundaries */
7019 xsize = biosize - (off % biosize);
7020 if (off + xsize > f_offset + size)
7021 xsize = f_offset + size - off;
7022 lbn = (daddr64_t)(off / biosize);
7023 lck_mtx_lock(nfs_buf_mutex);
7024 if ((bp = nfs_buf_incore(np, lbn))) {
7025 FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags);
7026 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
7027 lck_mtx_unlock(nfs_buf_mutex);
7028 nfs_data_unlock_noupdate(np);
7029 /* no panic. just tell vm we are busy */
7030 if (!nofreeupl)
7031 ubc_upl_abort_range(pl, pl_offset, size, 0);
7032 return (EBUSY);
7033 }
7034 if (bp->nb_dirtyend > 0) {
7035 /*
7036 * if there's a dirty range in the buffer, check
7037 * to see if it extends beyond the pageout region
7038 *
7039 * if the dirty region lies completely within the
7040 * pageout region, we just invalidate the buffer
7041 * because it's all being written out now anyway.
7042 *
7043 * if any of the dirty region lies outside the
7044 * pageout region, we'll try to clip the dirty
7045 * region to eliminate the portion that's being
7046 * paged out. If that's not possible, because
7047 * the dirty region extends before and after the
7048 * pageout region, then we'll just return EBUSY.
7049 */
7050 off_t boff, start, end;
7051 boff = NBOFF(bp);
7052 start = off;
7053 end = off + xsize;
7054 /* clip end to EOF */
7055 if (end > (off_t)np->n_size)
7056 end = np->n_size;
7057 start -= boff;
7058 end -= boff;
7059 if ((bp->nb_dirtyoff < start) &&
7060 (bp->nb_dirtyend > end)) {
7061 /*
7062 * not gonna be able to clip the dirty region
7063 *
7064 * But before returning the bad news, move the
7065 * buffer to the start of the delwri list and
7066 * give the list a push to try to flush the
7067 * buffer out.
7068 */
7069 FSDBG(323, np, bp, 0xd00deebc, EBUSY);
7070 nfs_buf_remfree(bp);
7071 TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free);
7072 nfsbufdelwricnt++;
7073 nfs_buf_drop(bp);
7074 nfs_buf_delwri_push(1);
7075 lck_mtx_unlock(nfs_buf_mutex);
7076 nfs_data_unlock_noupdate(np);
7077 if (!nofreeupl)
7078 ubc_upl_abort_range(pl, pl_offset, size, 0);
7079 return (EBUSY);
7080 }
7081 if ((bp->nb_dirtyoff < start) ||
7082 (bp->nb_dirtyend > end)) {
7083 /* clip dirty region, if necessary */
7084 if (bp->nb_dirtyoff < start)
7085 bp->nb_dirtyend = min(bp->nb_dirtyend, start);
7086 if (bp->nb_dirtyend > end)
7087 bp->nb_dirtyoff = max(bp->nb_dirtyoff, end);
7088 FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
7089 /* we're leaving this block dirty */
7090 nfs_buf_drop(bp);
7091 lck_mtx_unlock(nfs_buf_mutex);
7092 continue;
7093 }
7094 }
7095 nfs_buf_remfree(bp);
7096 lck_mtx_unlock(nfs_buf_mutex);
7097 SET(bp->nb_flags, NB_INVAL);
7098 nfs_node_lock_force(np);
7099 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
7100 CLR(bp->nb_flags, NB_NEEDCOMMIT);
7101 np->n_needcommitcnt--;
7102 CHECK_NEEDCOMMITCNT(np);
7103 }
7104 nfs_node_unlock(np);
7105 nfs_buf_release(bp, 1);
7106 } else {
7107 lck_mtx_unlock(nfs_buf_mutex);
7108 }
7109 }
7110
7111 thd = vfs_context_thread(ap->a_context);
7112 cred = ubc_getcred(vp);
7113 if (!IS_VALID_CRED(cred))
7114 cred = vfs_context_ucred(ap->a_context);
7115
7116 nfs_node_lock_force(np);
7117 if (np->n_flag & NWRITEERR) {
7118 error = np->n_error;
7119 nfs_node_unlock(np);
7120 nfs_data_unlock_noupdate(np);
7121 if (!nofreeupl)
7122 ubc_upl_abort_range(pl, pl_offset, size,
7123 UPL_ABORT_FREE_ON_EMPTY);
7124 return (error);
7125 }
7126 nfs_node_unlock(np);
7127
7128 if (f_offset < 0 || f_offset >= (off_t)np->n_size ||
7129 f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) {
7130 nfs_data_unlock_noupdate(np);
7131 if (!nofreeupl)
7132 ubc_upl_abort_range(pl, pl_offset, size,
7133 UPL_ABORT_FREE_ON_EMPTY);
7134 return (EINVAL);
7135 }
7136
7137 kret = ubc_upl_map(pl, &ioaddr);
7138 if (kret != KERN_SUCCESS)
7139 panic("nfs_vnop_pageout: ubc_upl_map() failed with (%d)", kret);
7140 ioaddr += pl_offset;
7141
7142 if ((u_quad_t)f_offset + size > np->n_size)
7143 xsize = np->n_size - f_offset;
7144 else
7145 xsize = size;
7146
7147 pgsize = round_page_64(xsize);
7148 if ((size > pgsize) && !nofreeupl)
7149 ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
7150 UPL_ABORT_FREE_ON_EMPTY);
7151
7152 /*
7153 * check for partial page and clear the
7154 * contents past end of the file before
7155 * releasing it in the VM page cache
7156 */
7157 if ((u_quad_t)f_offset < np->n_size && (u_quad_t)f_offset + size > np->n_size) {
7158 size_t io = np->n_size - f_offset;
7159 bzero((caddr_t)(ioaddr + io), size - io);
7160 FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
7161 }
7162 nfs_data_unlock_noupdate(np);
7163
7164 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE,
7165 &uio_buf, sizeof(uio_buf));
7166
7167 tryagain:
7168 if (nmp->nm_vers >= NFS_VER4)
7169 stategenid = nmp->nm_stategenid;
7170 wverf = wverf2 = wverfset = 0;
7171 txsize = rxsize = xsize;
7172 txoffset = rxoffset = f_offset;
7173 txaddr = rxaddr = ioaddr;
7174 commit = NFS_WRITE_FILESYNC;
7175
7176 bzero(req, sizeof(req));
7177 nextsend = nextwait = 0;
7178 do {
7179 if (np->n_flag & NREVOKE) {
7180 error = EIO;
7181 break;
7182 }
7183 /* send requests while we need to and have available slots */
7184 while ((txsize > 0) && (req[nextsend] == NULL)) {
7185 iosize = MIN(nmwsize, txsize);
7186 uio_reset(auio, txoffset, UIO_SYSSPACE, UIO_WRITE);
7187 uio_addiov(auio, CAST_USER_ADDR_T(txaddr), iosize);
7188 FSDBG(323, uio_offset(auio), iosize, txaddr, txsize);
7189 OSAddAtomic64(1, &nfsstats.pageouts);
7190 nfs_node_lock_force(np);
7191 np->n_numoutput++;
7192 nfs_node_unlock(np);
7193 vnode_startwrite(vp);
7194 iomode = NFS_WRITE_UNSTABLE;
7195 if ((error = nmp->nm_funcs->nf_write_rpc_async(np, auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) {
7196 req[nextsend] = NULL;
7197 vnode_writedone(vp);
7198 nfs_node_lock_force(np);
7199 np->n_numoutput--;
7200 nfs_node_unlock(np);
7201 break;
7202 }
7203 txaddr += iosize;
7204 txoffset += iosize;
7205 txsize -= iosize;
7206 nextsend = (nextsend + 1) % MAXPAGINGREQS;
7207 }
7208 /* wait while we need to and break out if more requests to send */
7209 while ((rxsize > 0) && req[nextwait]) {
7210 iosize = remsize = MIN(nmwsize, rxsize);
7211 error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req[nextwait], &iomode, &iosize, &wverf2);
7212 req[nextwait] = NULL;
7213 nextwait = (nextwait + 1) % MAXPAGINGREQS;
7214 vnode_writedone(vp);
7215 nfs_node_lock_force(np);
7216 np->n_numoutput--;
7217 nfs_node_unlock(np);
7218 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
7219 lck_mtx_lock(&nmp->nm_lock);
7220 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
7221 NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
7222 nfs_need_recover(nmp, error);
7223 }
7224 lck_mtx_unlock(&nmp->nm_lock);
7225 restart = 1;
7226 goto cancel;
7227 }
7228 if (error) {
7229 FSDBG(323, rxoffset, rxsize, error, -1);
7230 break;
7231 }
7232 if (!wverfset) {
7233 wverf = wverf2;
7234 wverfset = 1;
7235 } else if (wverf != wverf2) {
7236 /* verifier changed, so we need to restart all the writes */
7237 vrestart = 1;
7238 goto cancel;
7239 }
7240 /* Retain the lowest commitment level returned. */
7241 if (iomode < commit)
7242 commit = iomode;
7243 rxaddr += iosize;
7244 rxoffset += iosize;
7245 rxsize -= iosize;
7246 remsize -= iosize;
7247 if (remsize > 0) {
7248 /* need to try sending the remainder */
7249 iosize = remsize;
7250 uio_reset(auio, rxoffset, UIO_SYSSPACE, UIO_WRITE);
7251 uio_addiov(auio, CAST_USER_ADDR_T(rxaddr), remsize);
7252 iomode = NFS_WRITE_UNSTABLE;
7253 error = nfs_write_rpc2(np, auio, thd, cred, &iomode, &wverf2);
7254 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
7255 NP(np, "nfs_vnop_pageout: restart: error %d", error);
7256 lck_mtx_lock(&nmp->nm_lock);
7257 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
7258 NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
7259 nfs_need_recover(nmp, error);
7260 }
7261 lck_mtx_unlock(&nmp->nm_lock);
7262 restart = 1;
7263 goto cancel;
7264 }
7265 if (error) {
7266 FSDBG(323, rxoffset, rxsize, error, -1);
7267 break;
7268 }
7269 if (wverf != wverf2) {
7270 /* verifier changed, so we need to restart all the writes */
7271 vrestart = 1;
7272 goto cancel;
7273 }
7274 if (iomode < commit)
7275 commit = iomode;
7276 rxaddr += iosize;
7277 rxoffset += iosize;
7278 rxsize -= iosize;
7279 }
7280 if (txsize)
7281 break;
7282 }
7283 } while (!error && (txsize || rxsize));
7284
7285 vrestart = 0;
7286
7287 if (!error && (commit != NFS_WRITE_FILESYNC)) {
7288 error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred, wverf);
7289 if (error == NFSERR_STALEWRITEVERF) {
7290 vrestart = 1;
7291 error = EIO;
7292 }
7293 }
7294
7295 if (error) {
7296 cancel:
7297 /* cancel any outstanding requests */
7298 while (req[nextwait]) {
7299 nfs_request_async_cancel(req[nextwait]);
7300 req[nextwait] = NULL;
7301 nextwait = (nextwait + 1) % MAXPAGINGREQS;
7302 vnode_writedone(vp);
7303 nfs_node_lock_force(np);
7304 np->n_numoutput--;
7305 nfs_node_unlock(np);
7306 }
7307 if (np->n_flag & NREVOKE) {
7308 error = EIO;
7309 } else {
7310 if (vrestart) {
7311 if (++vrestarts <= 100) /* guard against no progress */
7312 goto tryagain;
7313 NP(np, "nfs_pageout: too many restarts, aborting");
7314 FSDBG(323, f_offset, xsize, ERESTART, -1);
7315 }
7316 if (restart) {
7317 if (restarts <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
7318 if (error == NFSERR_GRACE)
7319 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
7320 if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
7321 goto tryagain;
7322 } else {
7323 NP(np, "nfs_pageout: too many restarts, aborting");
7324 FSDBG(323, f_offset, xsize, ERESTART, -1);
7325 }
7326 }
7327 }
7328 }
7329
7330 ubc_upl_unmap(pl);
7331
7332 /*
7333 * We've had several different solutions on what to do when the pageout
7334 * gets an error. If we don't handle it, and return an error to the
7335 * caller, vm, it will retry . This can end in endless looping
7336 * between vm and here doing retries of the same page. Doing a dump
7337 * back to vm, will get it out of vm's knowledge and we lose whatever
7338 * data existed. This is risky, but in some cases necessary. For
7339 * example, the initial fix here was to do that for ESTALE. In that case
7340 * the server is telling us that the file is no longer the same. We
7341 * would not want to keep paging out to that. We also saw some 151
7342 * errors from Auspex server and NFSv3 can return errors higher than
7343 * ELAST. Those along with NFS known server errors we will "dump" from
7344 * vm. Errors we don't expect to occur, we dump and log for further
7345 * analysis. Errors that could be transient, networking ones,
7346 * we let vm "retry". Lastly, errors that we retry, but may have potential
7347 * to storm the network, we "retrywithsleep". "sever" will be used in
7348 * in the future to dump all pages of object for cases like ESTALE.
7349 * All this is the basis for the states returned and first guesses on
7350 * error handling. Tweaking expected as more statistics are gathered.
7351 * Note, in the long run we may need another more robust solution to
7352 * have some kind of persistant store when the vm cannot dump nor keep
7353 * retrying as a solution, but this would be a file architectural change
7354 */
7355 if (!nofreeupl) { /* otherwise stacked file system has to handle this */
7356 if (error) {
7357 int abortflags = 0;
7358 char action = nfs_pageouterrorhandler(error);
7359
7360 switch (action) {
7361 case DUMP:
7362 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
7363 break;
7364 case DUMPANDLOG:
7365 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
7366 if (error <= NFS_ELAST) {
7367 if ((errorcount[error] % 100) == 0)
7368 NP(np, "nfs_pageout: unexpected error %d. dumping vm page", error);
7369 errorcount[error]++;
7370 }
7371 break;
7372 case RETRY:
7373 abortflags = UPL_ABORT_FREE_ON_EMPTY;
7374 break;
7375 case RETRYWITHSLEEP:
7376 abortflags = UPL_ABORT_FREE_ON_EMPTY;
7377 /* pri unused. PSOCK for placeholder. */
7378 tsleep(&lbolt, PSOCK, "nfspageout", 0);
7379 break;
7380 case SEVER: /* not implemented */
7381 default:
7382 NP(np, "nfs_pageout: action %d not expected", action);
7383 break;
7384 }
7385
7386 ubc_upl_abort_range(pl, pl_offset, pgsize, abortflags);
7387 /* return error in all cases above */
7388
7389 } else {
7390 ubc_upl_commit_range(pl, pl_offset, pgsize,
7391 UPL_COMMIT_CLEAR_DIRTY |
7392 UPL_COMMIT_FREE_ON_EMPTY);
7393 }
7394 }
7395 return (error);
7396 }
7397
7398 /* Blktooff derives file offset given a logical block number */
7399 int
7400 nfs_vnop_blktooff(
7401 struct vnop_blktooff_args /* {
7402 struct vnodeop_desc *a_desc;
7403 vnode_t a_vp;
7404 daddr64_t a_lblkno;
7405 off_t *a_offset;
7406 } */ *ap)
7407 {
7408 int biosize;
7409 vnode_t vp = ap->a_vp;
7410 struct nfsmount *nmp = VTONMP(vp);
7411
7412 if (!nmp)
7413 return (ENXIO);
7414 biosize = nmp->nm_biosize;
7415
7416 *ap->a_offset = (off_t)(ap->a_lblkno * biosize);
7417
7418 return (0);
7419 }
7420
7421 int
7422 nfs_vnop_offtoblk(
7423 struct vnop_offtoblk_args /* {
7424 struct vnodeop_desc *a_desc;
7425 vnode_t a_vp;
7426 off_t a_offset;
7427 daddr64_t *a_lblkno;
7428 } */ *ap)
7429 {
7430 int biosize;
7431 vnode_t vp = ap->a_vp;
7432 struct nfsmount *nmp = VTONMP(vp);
7433
7434 if (!nmp)
7435 return (ENXIO);
7436 biosize = nmp->nm_biosize;
7437
7438 *ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize);
7439
7440 return (0);
7441 }
7442
7443 /*
7444 * vnode change monitoring
7445 */
7446 int
7447 nfs_vnop_monitor(
7448 struct vnop_monitor_args /* {
7449 struct vnodeop_desc *a_desc;
7450 vnode_t a_vp;
7451 uint32_t a_events;
7452 uint32_t a_flags;
7453 void *a_handle;
7454 vfs_context_t a_context;
7455 } */ *ap)
7456 {
7457 nfsnode_t np = VTONFS(ap->a_vp);
7458 struct nfsmount *nmp = VTONMP(ap->a_vp);
7459 int error = 0;
7460
7461 if (!nmp)
7462 return (ENXIO);
7463
7464 /* make sure that the vnode's monitoring status is up to date */
7465 lck_mtx_lock(&nmp->nm_lock);
7466 if (vnode_ismonitored(ap->a_vp)) {
7467 /* This vnode is currently being monitored, make sure we're tracking it. */
7468 if (np->n_monlink.le_next == NFSNOLIST) {
7469 LIST_INSERT_HEAD(&nmp->nm_monlist, np, n_monlink);
7470 nfs_mount_sock_thread_wake(nmp);
7471 }
7472 } else {
7473 /* This vnode is no longer being monitored, make sure we're not tracking it. */
7474 /* Wait for any in-progress getattr to complete first. */
7475 while (np->n_mflag & NMMONSCANINPROG) {
7476 struct timespec ts = { 1, 0 };
7477 np->n_mflag |= NMMONSCANWANT;
7478 msleep(&np->n_mflag, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
7479 }
7480 if (np->n_monlink.le_next != NFSNOLIST) {
7481 LIST_REMOVE(np, n_monlink);
7482 np->n_monlink.le_next = NFSNOLIST;
7483 }
7484 }
7485 lck_mtx_unlock(&nmp->nm_lock);
7486
7487 return (error);
7488 }
7489
7490 /*
7491 * Send a vnode notification for the given events.
7492 */
7493 void
7494 nfs_vnode_notify(nfsnode_t np, uint32_t events)
7495 {
7496 struct nfsmount *nmp = NFSTONMP(np);
7497 struct nfs_vattr nvattr;
7498 struct vnode_attr vattr, *vap = NULL;
7499 struct timeval now;
7500
7501 microuptime(&now);
7502 if ((np->n_evtstamp == now.tv_sec) || !nmp) {
7503 /* delay sending this notify */
7504 np->n_events |= events;
7505 return;
7506 }
7507 events |= np->n_events;
7508 np->n_events = 0;
7509 np->n_evtstamp = now.tv_sec;
7510
7511 vfs_get_notify_attributes(&vattr);
7512 if (!nfs_getattrcache(np, &nvattr, 0)) {
7513 vap = &vattr;
7514 VATTR_INIT(vap);
7515 VATTR_RETURN(vap, va_fsid, vfs_statfs(nmp->nm_mountp)->f_fsid.val[0]);
7516 VATTR_RETURN(vap, va_fileid, nvattr.nva_fileid);
7517 VATTR_RETURN(vap, va_mode, nvattr.nva_mode);
7518 VATTR_RETURN(vap, va_uid, nvattr.nva_uid);
7519 VATTR_RETURN(vap, va_gid, nvattr.nva_gid);
7520 VATTR_RETURN(vap, va_nlink, nvattr.nva_nlink);
7521 }
7522 vnode_notify(NFSTOV(np), events, vap);
7523 }