]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/nfs/nfs_vnops.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_vnops.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
65 * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
66 */
67
68
69/*
70 * vnode op calls for Sun NFS version 2 and 3
71 */
72#include <sys/param.h>
73#include <sys/kernel.h>
74#include <sys/systm.h>
75#include <sys/resourcevar.h>
76#include <sys/proc_internal.h>
77#include <sys/kauth.h>
78#include <sys/mount_internal.h>
79#include <sys/malloc.h>
80#include <sys/kpi_mbuf.h>
81#include <sys/conf.h>
82#include <sys/vnode_internal.h>
83#include <sys/dirent.h>
84#include <sys/fcntl.h>
85#include <sys/lockf.h>
86#include <sys/ubc_internal.h>
87#include <sys/attr.h>
88#include <sys/signalvar.h>
89#include <sys/uio_internal.h>
90
91#include <vfs/vfs_support.h>
92
93#include <sys/vm.h>
94
95#include <sys/time.h>
96#include <kern/clock.h>
97#include <libkern/OSAtomic.h>
98
99#include <miscfs/fifofs/fifo.h>
100#include <miscfs/specfs/specdev.h>
101
102#include <nfs/rpcv2.h>
103#include <nfs/nfsproto.h>
104#include <nfs/nfs.h>
105#include <nfs/nfsnode.h>
106#include <nfs/nfs_gss.h>
107#include <nfs/nfsmount.h>
108#include <nfs/nfs_lock.h>
109#include <nfs/xdr_subs.h>
110#include <nfs/nfsm_subs.h>
111
112#include <net/if.h>
113#include <netinet/in.h>
114#include <netinet/in_var.h>
115
116#include <vm/vm_kern.h>
117#include <vm/vm_pageout.h>
118
119#include <kern/task.h>
120#include <kern/sched_prim.h>
121
122/*
123 * NFS vnode ops
124 */
125int nfs_vnop_lookup(struct vnop_lookup_args *);
126int nfsspec_vnop_read(struct vnop_read_args *);
127int nfsspec_vnop_write(struct vnop_write_args *);
128int nfsspec_vnop_close(struct vnop_close_args *);
129#if FIFO
130int nfsfifo_vnop_read(struct vnop_read_args *);
131int nfsfifo_vnop_write(struct vnop_write_args *);
132int nfsfifo_vnop_close(struct vnop_close_args *);
133#endif
134int nfs_vnop_ioctl(struct vnop_ioctl_args *);
135int nfs_vnop_select(struct vnop_select_args *);
136int nfs_vnop_setattr(struct vnop_setattr_args *);
137int nfs_vnop_read(struct vnop_read_args *);
138int nfs_vnop_write(struct vnop_write_args *);
139int nfs_vnop_mmap(struct vnop_mmap_args *);
140int nfs_vnop_fsync(struct vnop_fsync_args *);
141int nfs_vnop_remove(struct vnop_remove_args *);
142int nfs_vnop_rename(struct vnop_rename_args *);
143int nfs_vnop_readdir(struct vnop_readdir_args *);
144int nfs_vnop_readlink(struct vnop_readlink_args *);
145int nfs_vnop_pathconf(struct vnop_pathconf_args *);
146int nfs_vnop_pagein(struct vnop_pagein_args *);
147int nfs_vnop_pageout(struct vnop_pageout_args *);
148int nfs_vnop_blktooff(struct vnop_blktooff_args *);
149int nfs_vnop_offtoblk(struct vnop_offtoblk_args *);
150int nfs_vnop_blockmap(struct vnop_blockmap_args *);
151
152int nfs3_vnop_create(struct vnop_create_args *);
153int nfs3_vnop_mknod(struct vnop_mknod_args *);
154int nfs3_vnop_getattr(struct vnop_getattr_args *);
155int nfs3_vnop_link(struct vnop_link_args *);
156int nfs3_vnop_mkdir(struct vnop_mkdir_args *);
157int nfs3_vnop_rmdir(struct vnop_rmdir_args *);
158int nfs3_vnop_symlink(struct vnop_symlink_args *);
159
160vnop_t **nfsv2_vnodeop_p;
161static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
162 { &vnop_default_desc, (vnop_t *)vn_default_error },
163 { &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup }, /* lookup */
164 { &vnop_create_desc, (vnop_t *)nfs3_vnop_create }, /* create */
165 { &vnop_mknod_desc, (vnop_t *)nfs3_vnop_mknod }, /* mknod */
166 { &vnop_open_desc, (vnop_t *)nfs3_vnop_open }, /* open */
167 { &vnop_close_desc, (vnop_t *)nfs3_vnop_close }, /* close */
168 { &vnop_access_desc, (vnop_t *)nfs_vnop_access }, /* access */
169 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
170 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
171 { &vnop_read_desc, (vnop_t *)nfs_vnop_read }, /* read */
172 { &vnop_write_desc, (vnop_t *)nfs_vnop_write }, /* write */
173 { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl }, /* ioctl */
174 { &vnop_select_desc, (vnop_t *)nfs_vnop_select }, /* select */
175 { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke }, /* revoke */
176 { &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap }, /* mmap */
177 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
178 { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove }, /* remove */
179 { &vnop_link_desc, (vnop_t *)nfs3_vnop_link }, /* link */
180 { &vnop_rename_desc, (vnop_t *)nfs_vnop_rename }, /* rename */
181 { &vnop_mkdir_desc, (vnop_t *)nfs3_vnop_mkdir }, /* mkdir */
182 { &vnop_rmdir_desc, (vnop_t *)nfs3_vnop_rmdir }, /* rmdir */
183 { &vnop_symlink_desc, (vnop_t *)nfs3_vnop_symlink }, /* symlink */
184 { &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir }, /* readdir */
185 { &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink }, /* readlink */
186 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
187 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
188 { &vnop_strategy_desc, (vnop_t *)err_strategy }, /* strategy */
189 { &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf }, /* pathconf */
190 { &vnop_advlock_desc, (vnop_t *)nfs3_vnop_advlock }, /* advlock */
191 { &vnop_bwrite_desc, (vnop_t *)err_bwrite }, /* bwrite */
192 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
193 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
194 { &vnop_copyfile_desc, (vnop_t *)err_copyfile }, /* Copyfile */
195 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
196 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
197 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
198 { NULL, NULL }
199};
200struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
201 { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
202
203vnop_t **nfsv4_vnodeop_p;
204static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
205 { &vnop_default_desc, (vnop_t *)vn_default_error },
206 { &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup }, /* lookup */
207 { &vnop_create_desc, (vnop_t *)nfs4_vnop_create }, /* create */
208 { &vnop_mknod_desc, (vnop_t *)nfs4_vnop_mknod }, /* mknod */
209 { &vnop_open_desc, (vnop_t *)nfs4_vnop_open }, /* open */
210 { &vnop_close_desc, (vnop_t *)nfs4_vnop_close }, /* close */
211 { &vnop_access_desc, (vnop_t *)nfs_vnop_access }, /* access */
212 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
213 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
214 { &vnop_read_desc, (vnop_t *)nfs4_vnop_read }, /* read */
215 { &vnop_write_desc, (vnop_t *)nfs_vnop_write }, /* write */
216 { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl }, /* ioctl */
217 { &vnop_select_desc, (vnop_t *)nfs_vnop_select }, /* select */
218 { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke }, /* revoke */
219 { &vnop_mmap_desc, (vnop_t *)nfs4_vnop_mmap }, /* mmap */
220 { &vnop_mnomap_desc, (vnop_t *)nfs4_vnop_mnomap }, /* mnomap */
221 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
222 { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove }, /* remove */
223 { &vnop_link_desc, (vnop_t *)nfs4_vnop_link }, /* link */
224 { &vnop_rename_desc, (vnop_t *)nfs_vnop_rename }, /* rename */
225 { &vnop_mkdir_desc, (vnop_t *)nfs4_vnop_mkdir }, /* mkdir */
226 { &vnop_rmdir_desc, (vnop_t *)nfs4_vnop_rmdir }, /* rmdir */
227 { &vnop_symlink_desc, (vnop_t *)nfs4_vnop_symlink }, /* symlink */
228 { &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir }, /* readdir */
229 { &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink }, /* readlink */
230 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
231 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
232 { &vnop_strategy_desc, (vnop_t *)err_strategy }, /* strategy */
233 { &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf }, /* pathconf */
234 { &vnop_advlock_desc, (vnop_t *)nfs4_vnop_advlock }, /* advlock */
235 { &vnop_bwrite_desc, (vnop_t *)err_bwrite }, /* bwrite */
236 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
237 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
238 { &vnop_copyfile_desc, (vnop_t *)err_copyfile }, /* Copyfile */
239 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
240 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
241 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
242 { NULL, NULL }
243};
244struct vnodeopv_desc nfsv4_vnodeop_opv_desc =
245 { &nfsv4_vnodeop_p, nfsv4_vnodeop_entries };
246
247/*
248 * Special device vnode ops
249 */
250vnop_t **spec_nfsv2nodeop_p;
251static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
252 { &vnop_default_desc, (vnop_t *)vn_default_error },
253 { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */
254 { &vnop_create_desc, (vnop_t *)spec_create }, /* create */
255 { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */
256 { &vnop_open_desc, (vnop_t *)spec_open }, /* open */
257 { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close }, /* close */
258 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
259 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
260 { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read }, /* read */
261 { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write }, /* write */
262 { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */
263 { &vnop_select_desc, (vnop_t *)spec_select }, /* select */
264 { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */
265 { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */
266 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
267 { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */
268 { &vnop_link_desc, (vnop_t *)spec_link }, /* link */
269 { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */
270 { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */
271 { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */
272 { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */
273 { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */
274 { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */
275 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
276 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
277 { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */
278 { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */
279 { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */
280 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
281 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
282 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
283 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
284 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
285 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
286 { NULL, NULL }
287};
288struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
289 { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
290vnop_t **spec_nfsv4nodeop_p;
291static struct vnodeopv_entry_desc spec_nfsv4nodeop_entries[] = {
292 { &vnop_default_desc, (vnop_t *)vn_default_error },
293 { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */
294 { &vnop_create_desc, (vnop_t *)spec_create }, /* create */
295 { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */
296 { &vnop_open_desc, (vnop_t *)spec_open }, /* open */
297 { &vnop_close_desc, (vnop_t *)nfsspec_vnop_close }, /* close */
298 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
299 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
300 { &vnop_read_desc, (vnop_t *)nfsspec_vnop_read }, /* read */
301 { &vnop_write_desc, (vnop_t *)nfsspec_vnop_write }, /* write */
302 { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */
303 { &vnop_select_desc, (vnop_t *)spec_select }, /* select */
304 { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */
305 { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */
306 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
307 { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */
308 { &vnop_link_desc, (vnop_t *)spec_link }, /* link */
309 { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */
310 { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */
311 { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */
312 { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */
313 { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */
314 { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */
315 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
316 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
317 { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */
318 { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */
319 { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */
320 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
321 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
322 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
323 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
324 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
325 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
326 { NULL, NULL }
327};
328struct vnodeopv_desc spec_nfsv4nodeop_opv_desc =
329 { &spec_nfsv4nodeop_p, spec_nfsv4nodeop_entries };
330
331#if FIFO
332vnop_t **fifo_nfsv2nodeop_p;
333static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
334 { &vnop_default_desc, (vnop_t *)vn_default_error },
335 { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */
336 { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */
337 { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */
338 { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */
339 { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close }, /* close */
340 { &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr }, /* getattr */
341 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
342 { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read }, /* read */
343 { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write }, /* write */
344 { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */
345 { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */
346 { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */
347 { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */
348 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
349 { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */
350 { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */
351 { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */
352 { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */
353 { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */
354 { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */
355 { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */
356 { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */
357 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
358 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
359 { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */
360 { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */
361 { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */
362 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
363 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
364 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
365 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
366 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
367 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
368 { NULL, NULL }
369};
370struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
371 { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
372
373vnop_t **fifo_nfsv4nodeop_p;
374static struct vnodeopv_entry_desc fifo_nfsv4nodeop_entries[] = {
375 { &vnop_default_desc, (vnop_t *)vn_default_error },
376 { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */
377 { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */
378 { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */
379 { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */
380 { &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close }, /* close */
381 { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */
382 { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */
383 { &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read }, /* read */
384 { &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write }, /* write */
385 { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */
386 { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */
387 { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */
388 { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */
389 { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */
390 { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */
391 { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */
392 { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */
393 { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */
394 { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */
395 { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */
396 { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */
397 { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */
398 { &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive }, /* inactive */
399 { &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim }, /* reclaim */
400 { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */
401 { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */
402 { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */
403 { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */
404 { &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein }, /* Pagein */
405 { &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout }, /* Pageout */
406 { &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff }, /* blktooff */
407 { &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk }, /* offtoblk */
408 { &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap }, /* blockmap */
409 { NULL, NULL }
410};
411struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc =
412 { &fifo_nfsv4nodeop_p, fifo_nfsv4nodeop_entries };
413#endif /* FIFO */
414
415
416int nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t);
417
418/*
419 * Find the slot in the access cache for this UID.
420 * If adding and no existing slot is found, reuse slots in FIFO order.
421 * The index of the next slot to use is kept in the last entry of the n_mode array.
422 */
423int
424nfs_node_mode_slot(nfsnode_t np, uid_t uid, int add)
425{
426 int slot;
427
428 for (slot=0; slot < NFS_ACCESS_CACHE_SIZE; slot++)
429 if (np->n_modeuid[slot] == uid)
430 break;
431 if (slot == NFS_ACCESS_CACHE_SIZE) {
432 if (!add)
433 return (-1);
434 slot = np->n_mode[NFS_ACCESS_CACHE_SIZE];
435 np->n_mode[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
436 }
437 return (slot);
438}
439
440int
441nfs3_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
442{
443 int error = 0, lockerror = ENOENT, status, slot;
444 uint32_t access = 0;
445 u_int64_t xid;
446 struct nfsm_chain nmreq, nmrep;
447 struct timeval now;
448 uid_t uid;
449
450 nfsm_chain_null(&nmreq);
451 nfsm_chain_null(&nmrep);
452
453 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3) + NFSX_UNSIGNED);
454 nfsm_chain_add_fh(error, &nmreq, NFS_VER3, np->n_fhp, np->n_fhsize);
455 nfsm_chain_add_32(error, &nmreq, *mode);
456 nfsm_chain_build_done(error, &nmreq);
457 nfsmout_if(error);
458 error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx,
459 &nmrep, &xid, &status);
460 if ((lockerror = nfs_node_lock(np)))
461 error = lockerror;
462 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
463 if (!error)
464 error = status;
465 nfsm_chain_get_32(error, &nmrep, access);
466 nfsmout_if(error);
467
468 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
469 slot = nfs_node_mode_slot(np, uid, 1);
470 np->n_modeuid[slot] = uid;
471 microuptime(&now);
472 np->n_modestamp[slot] = now.tv_sec;
473 np->n_mode[slot] = access;
474
475 /*
476 * If we asked for DELETE but didn't get it, the server
477 * may simply not support returning that bit (possible
478 * on UNIX systems). So, we'll assume that it is OK,
479 * and just let any subsequent delete action fail if it
480 * really isn't deletable.
481 */
482 if ((*mode & NFS_ACCESS_DELETE) &&
483 !(np->n_mode[slot] & NFS_ACCESS_DELETE))
484 np->n_mode[slot] |= NFS_ACCESS_DELETE;
485 /* pass back the mode returned with this request */
486 *mode = np->n_mode[slot];
487nfsmout:
488 if (!lockerror)
489 nfs_node_unlock(np);
490 nfsm_chain_cleanup(&nmreq);
491 nfsm_chain_cleanup(&nmrep);
492 return (error);
493}
494
495/*
496 * NFS access vnode op.
497 * For NFS version 2, just return ok. File accesses may fail later.
498 * For NFS version 3+, use the access RPC to check accessibility. If file modes
499 * are changed on the server, accesses might still fail later.
500 */
501int
502nfs_vnop_access(
503 struct vnop_access_args /* {
504 struct vnodeop_desc *a_desc;
505 vnode_t a_vp;
506 int a_action;
507 vfs_context_t a_context;
508 } */ *ap)
509{
510 vfs_context_t ctx = ap->a_context;
511 vnode_t vp = ap->a_vp;
512 int error = 0, slot, dorpc;
513 u_int32_t mode, wmode;
514 nfsnode_t np = VTONFS(vp);
515 struct nfsmount *nmp;
516 int nfsvers;
517 struct timeval now;
518 uid_t uid;
519
520 nmp = VTONMP(vp);
521 if (!nmp)
522 return (ENXIO);
523 nfsvers = nmp->nm_vers;
524
525 if (nfsvers == NFS_VER2) {
526 if ((ap->a_action & KAUTH_VNODE_WRITE_RIGHTS) &&
527 vfs_isrdonly(vnode_mount(vp)))
528 return (EROFS);
529 return (0);
530 }
531
532 /*
533 * For NFS v3, do an access rpc, otherwise you are stuck emulating
534 * ufs_access() locally using the vattr. This may not be correct,
535 * since the server may apply other access criteria such as
536 * client uid-->server uid mapping that we do not know about, but
537 * this is better than just returning anything that is lying about
538 * in the cache.
539 */
540
541 /*
542 * Convert KAUTH primitives to NFS access rights.
543 */
544 mode = 0;
545 if (vnode_isdir(vp)) {
546 /* directory */
547 if (ap->a_action &
548 (KAUTH_VNODE_LIST_DIRECTORY |
549 KAUTH_VNODE_READ_EXTATTRIBUTES))
550 mode |= NFS_ACCESS_READ;
551 if (ap->a_action & KAUTH_VNODE_SEARCH)
552 mode |= NFS_ACCESS_LOOKUP;
553 if (ap->a_action &
554 (KAUTH_VNODE_ADD_FILE |
555 KAUTH_VNODE_ADD_SUBDIRECTORY))
556 mode |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
557 if (ap->a_action & KAUTH_VNODE_DELETE_CHILD)
558 mode |= NFS_ACCESS_MODIFY;
559 } else {
560 /* file */
561 if (ap->a_action &
562 (KAUTH_VNODE_READ_DATA |
563 KAUTH_VNODE_READ_EXTATTRIBUTES))
564 mode |= NFS_ACCESS_READ;
565 if (ap->a_action & KAUTH_VNODE_WRITE_DATA)
566 mode |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
567 if (ap->a_action & KAUTH_VNODE_APPEND_DATA)
568 mode |= NFS_ACCESS_EXTEND;
569 if (ap->a_action & KAUTH_VNODE_EXECUTE)
570 mode |= NFS_ACCESS_EXECUTE;
571 }
572 /* common */
573 if (ap->a_action & KAUTH_VNODE_DELETE)
574 mode |= NFS_ACCESS_DELETE;
575 if (ap->a_action &
576 (KAUTH_VNODE_WRITE_ATTRIBUTES |
577 KAUTH_VNODE_WRITE_EXTATTRIBUTES |
578 KAUTH_VNODE_WRITE_SECURITY))
579 mode |= NFS_ACCESS_MODIFY;
580 /* XXX this is pretty dubious */
581 if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER)
582 mode |= NFS_ACCESS_MODIFY;
583
584 /* if caching, always ask for every right */
585 if (nfs_access_cache_timeout > 0) {
586 wmode = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
587 NFS_ACCESS_EXTEND | NFS_ACCESS_EXECUTE |
588 NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
589 } else {
590 wmode = mode;
591 }
592
593 if ((error = nfs_node_lock(np)))
594 return (error);
595
596 /*
597 * Does our cached result allow us to give a definite yes to
598 * this request?
599 */
600 uid = kauth_cred_getuid(vfs_context_ucred(ctx));
601 slot = nfs_node_mode_slot(np, uid, 0);
602 dorpc = 1;
603 if (mode == 0) {
604 /* not asking for any rights understood by NFS, so don't bother doing an RPC */
605 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
606 dorpc = 0;
607 wmode = 0;
608 } else if (NMODEVALID(np, slot)) {
609 microuptime(&now);
610 if ((now.tv_sec < (np->n_modestamp[slot] + nfs_access_cache_timeout)) &&
611 ((np->n_mode[slot] & mode) == mode)) {
612 /* OSAddAtomic(1, &nfsstats.accesscache_hits); */
613 dorpc = 0;
614 wmode = np->n_mode[slot];
615 }
616 }
617 nfs_node_unlock(np);
618 if (dorpc) {
619 /* Either a no, or a don't know. Go to the wire. */
620 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
621 error = nmp->nm_funcs->nf_access_rpc(np, &wmode, ctx);
622 }
623 if (!error && ((wmode & mode) != mode))
624 error = EACCES;
625
626 return (error);
627}
628
629/*
630 * NFS open vnode op
631 */
632int
633nfs3_vnop_open(
634 struct vnop_open_args /* {
635 struct vnodeop_desc *a_desc;
636 vnode_t a_vp;
637 int a_mode;
638 vfs_context_t a_context;
639 } */ *ap)
640{
641 vfs_context_t ctx = ap->a_context;
642 vnode_t vp = ap->a_vp;
643 nfsnode_t np = VTONFS(vp);
644 struct nfsmount *nmp;
645 struct nfs_vattr nvattr;
646 enum vtype vtype;
647 int error;
648
649 nmp = VTONMP(vp);
650 if (!nmp)
651 return (ENXIO);
652
653 vtype = vnode_vtype(vp);
654 if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK))
655 return (EACCES);
656 if (ISSET(np->n_flag, NUPDATESIZE))
657 nfs_data_update_size(np, 0);
658 if ((error = nfs_node_lock(np)))
659 return (error);
660 if (np->n_flag & NNEEDINVALIDATE) {
661 np->n_flag &= ~NNEEDINVALIDATE;
662 if (vtype == VDIR)
663 nfs_invaldir(np);
664 nfs_node_unlock(np);
665 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
666 if ((error = nfs_node_lock(np)))
667 return (error);
668 }
669 if (vnode_vtype(NFSTOV(np)) == VREG)
670 np->n_lastrahead = -1;
671 if (np->n_flag & NMODIFIED) {
672 if (vtype == VDIR)
673 nfs_invaldir(np);
674 nfs_node_unlock(np);
675 if ((error = nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1)))
676 return (error);
677 } else {
678 nfs_node_unlock(np);
679 }
680 /* nfs_getattr() will check changed and purge caches */
681 return (nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED));
682}
683
684/*
685 * NFS close vnode op
686 * What an NFS client should do upon close after writing is a debatable issue.
687 * Most NFS clients push delayed writes to the server upon close, basically for
688 * two reasons:
689 * 1 - So that any write errors may be reported back to the client process
690 * doing the close system call. By far the two most likely errors are
691 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
692 * 2 - To put a worst case upper bound on cache inconsistency between
693 * multiple clients for the file.
694 * There is also a consistency problem for Version 2 of the protocol w.r.t.
695 * not being able to tell if other clients are writing a file concurrently,
696 * since there is no way of knowing if the changed modify time in the reply
697 * is only due to the write for this client.
698 * (NFS Version 3 provides weak cache consistency data in the reply that
699 * should be sufficient to detect and handle this case.)
700 *
701 * The current code does the following:
702 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
703 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
704 * them.
705 */
706int
707nfs3_vnop_close(
708 struct vnop_close_args /* {
709 struct vnodeop_desc *a_desc;
710 vnode_t a_vp;
711 int a_fflag;
712 vfs_context_t a_context;
713 } */ *ap)
714{
715 vfs_context_t ctx = ap->a_context;
716 vnode_t vp = ap->a_vp;
717 nfsnode_t np = VTONFS(vp);
718 struct nfsmount *nmp;
719 int nfsvers;
720 int error = 0;
721
722 if (vnode_vtype(vp) != VREG)
723 return (0);
724 nmp = VTONMP(vp);
725 if (!nmp)
726 return (ENXIO);
727 nfsvers = nmp->nm_vers;
728
729 if (ISSET(np->n_flag, NUPDATESIZE))
730 nfs_data_update_size(np, 0);
731 if ((error = nfs_node_lock(np)))
732 return (error);
733 if (np->n_flag & NNEEDINVALIDATE) {
734 np->n_flag &= ~NNEEDINVALIDATE;
735 nfs_node_unlock(np);
736 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
737 if ((error = nfs_node_lock(np)))
738 return (error);
739 }
740 if (np->n_flag & NMODIFIED) {
741 nfs_node_unlock(np);
742 if (nfsvers != NFS_VER2)
743 error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0);
744 else
745 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
746 if (error)
747 return (error);
748 nfs_node_lock_force(np);
749 NATTRINVALIDATE(np);
750 }
751 if (np->n_flag & NWRITEERR) {
752 np->n_flag &= ~NWRITEERR;
753 error = np->n_error;
754 }
755 nfs_node_unlock(np);
756 return (error);
757}
758
759
760int
761nfs3_getattr_rpc(
762 nfsnode_t np,
763 mount_t mp,
764 u_char *fhp,
765 size_t fhsize,
766 vfs_context_t ctx,
767 struct nfs_vattr *nvap,
768 u_int64_t *xidp)
769{
770 struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
771 int error = 0, status, nfsvers;
772 struct nfsm_chain nmreq, nmrep;
773
774 if (!nmp)
775 return (ENXIO);
776 nfsvers = nmp->nm_vers;
777
778 nfsm_chain_null(&nmreq);
779 nfsm_chain_null(&nmrep);
780
781 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
782 if (nfsvers != NFS_VER2)
783 nfsm_chain_add_32(error, &nmreq, fhsize);
784 nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
785 nfsm_chain_build_done(error, &nmreq);
786 nfsmout_if(error);
787 error = nfs_request(np, mp, &nmreq, NFSPROC_GETATTR, ctx,
788 &nmrep, xidp, &status);
789 if (!error)
790 error = status;
791 nfsmout_if(error);
792 error = nfs_parsefattr(&nmrep, nfsvers, nvap);
793nfsmout:
794 nfsm_chain_cleanup(&nmreq);
795 nfsm_chain_cleanup(&nmrep);
796 return (error);
797}
798
799
800int
801nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncached)
802{
803 struct nfsmount *nmp;
804 int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods;
805 struct timespec ts = { 2, 0 };
806 u_int64_t xid;
807
808 FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
809
810 if (!(nmp = NFSTONMP(np)))
811 return (ENXIO);
812 nfsvers = nmp->nm_vers;
813
814 /* Update local times for special files. */
815 if (np->n_flag & (NACC | NUPD)) {
816 nfs_node_lock_force(np);
817 np->n_flag |= NCHG;
818 nfs_node_unlock(np);
819 }
820 /* Update size, if necessary */
821 if (ISSET(np->n_flag, NUPDATESIZE))
822 nfs_data_update_size(np, 0);
823
824 error = nfs_node_lock(np);
825 nfsmout_if(error);
826 if (!uncached) {
827 while (1) {
828 error = nfs_getattrcache(np, nvap);
829 if (!error || (error != ENOENT)) {
830 nfs_node_unlock(np);
831 goto nfsmout;
832 }
833 if (!ISSET(np->n_flag, NGETATTRINPROG))
834 break;
835 SET(np->n_flag, NGETATTRWANT);
836 msleep(np, &np->n_lock, PZERO-1, "nfsgetattrwant", &ts);
837 if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
838 nfs_node_unlock(np);
839 goto nfsmout;
840 }
841 }
842 SET(np->n_flag, NGETATTRINPROG);
843 inprogset = 1;
844 } else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
845 SET(np->n_flag, NGETATTRINPROG);
846 inprogset = 1;
847 }
848 nfs_node_unlock(np);
849
850 nmp = NFSTONMP(np);
851 if (!nmp) {
852 error = ENXIO;
853 goto nfsmout;
854 }
855
856 /*
857 * Try to get both the attributes and access info by making an
858 * ACCESS call and seeing if it returns updated attributes.
859 * But don't bother if we aren't caching access info or if the
860 * attributes returned wouldn't be cached.
861 */
862 if ((nfsvers != NFS_VER2) && (nfs_access_cache_timeout > 0)) {
863 if (nfs_attrcachetimeout(np) > 0) {
864 /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
865 u_int32_t mode = NFS_ACCESS_ALL;
866 error = nmp->nm_funcs->nf_access_rpc(np, &mode, ctx);
867 if (error)
868 goto nfsmout;
869 nfs_node_lock_force(np);
870 error = nfs_getattrcache(np, nvap);
871 nfs_node_unlock(np);
872 if (!error || (error != ENOENT))
873 goto nfsmout;
874 /* Well, that didn't work... just do a getattr... */
875 error = 0;
876 }
877 }
878
879 avoidfloods = 0;
880tryagain:
881 error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, ctx, nvap, &xid);
882 if (!error) {
883 nfs_node_lock_force(np);
884 error = nfs_loadattrcache(np, nvap, &xid, 0);
885 nfs_node_unlock(np);
886 }
887 nfsmout_if(error);
888 if (!xid) { /* out-of-order rpc - attributes were dropped */
889 FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
890 if (avoidfloods++ < 20)
891 goto tryagain;
892 /* avoidfloods>1 is bizarre. at 20 pull the plug */
893 /* just return the last attributes we got */
894 }
895nfsmout:
896 nfs_node_lock_force(np);
897 if (inprogset) {
898 wanted = ISSET(np->n_flag, NGETATTRWANT);
899 CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT));
900 }
901 if (!error) {
902 /* check if the node changed on us */
903 vnode_t vp = NFSTOV(np);
904 enum vtype vtype = vnode_vtype(vp);
905 if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) {
906 FSDBG(513, -1, np, 0, np);
907 np->n_flag &= ~NNEGNCENTRIES;
908 cache_purge(vp);
909 np->n_ncgen++;
910 NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
911 }
912 if (NFS_CHANGED(nfsvers, np, nvap)) {
913 FSDBG(513, -1, np, -1, np);
914 if (vtype == VDIR)
915 nfs_invaldir(np);
916 nfs_node_unlock(np);
917 if (wanted)
918 wakeup(np);
919 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
920 FSDBG(513, -1, np, -2, error);
921 if (!error) {
922 nfs_node_lock_force(np);
923 NFS_CHANGED_UPDATE(nfsvers, np, nvap);
924 nfs_node_unlock(np);
925 }
926 } else {
927 nfs_node_unlock(np);
928 if (wanted)
929 wakeup(np);
930 }
931 } else {
932 nfs_node_unlock(np);
933 if (wanted)
934 wakeup(np);
935 }
936 FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
937 return (error);
938}
939
940/*
941 * NFS getattr call from vfs.
942 */
943int
944nfs3_vnop_getattr(
945 struct vnop_getattr_args /* {
946 struct vnodeop_desc *a_desc;
947 vnode_t a_vp;
948 struct vnode_attr *a_vap;
949 vfs_context_t a_context;
950 } */ *ap)
951{
952 int error;
953 struct nfs_vattr nva;
954 struct vnode_attr *vap = ap->a_vap;
955 dev_t rdev;
956
957 error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
958 if (error)
959 return (error);
960
961 /* copy nva to *a_vap */
962 VATTR_RETURN(vap, va_type, nva.nva_type);
963 VATTR_RETURN(vap, va_mode, nva.nva_mode);
964 rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
965 VATTR_RETURN(vap, va_rdev, rdev);
966 VATTR_RETURN(vap, va_uid, nva.nva_uid);
967 VATTR_RETURN(vap, va_gid, nva.nva_gid);
968 VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
969 VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
970 VATTR_RETURN(vap, va_data_size, nva.nva_size);
971 VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes);
972 VATTR_RETURN(vap, va_iosize, nfs_iosize);
973 vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
974 vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
975 VATTR_SET_SUPPORTED(vap, va_access_time);
976 vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
977 vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
978 VATTR_SET_SUPPORTED(vap, va_modify_time);
979 vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
980 vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
981 VATTR_SET_SUPPORTED(vap, va_change_time);
982
983 // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
984 return (error);
985}
986
987/*
988 * NFS setattr call.
989 */
990int
991nfs_vnop_setattr(
992 struct vnop_setattr_args /* {
993 struct vnodeop_desc *a_desc;
994 vnode_t a_vp;
995 struct vnode_attr *a_vap;
996 vfs_context_t a_context;
997 } */ *ap)
998{
999 vfs_context_t ctx = ap->a_context;
1000 vnode_t vp = ap->a_vp;
1001 nfsnode_t np = VTONFS(vp);
1002 struct nfsmount *nmp;
1003 struct vnode_attr *vap = ap->a_vap;
1004 int error = 0;
1005 int biosize, nfsvers;
1006 u_quad_t origsize;
1007 struct nfs_dulookup dul;
1008 nfsnode_t dnp = NULL;
1009 vnode_t dvp = NULL;
1010 const char *vname = NULL;
1011 struct nfs_open_owner *noop = NULL;
1012 struct nfs_open_file *nofp = NULL;
1013 struct nfs_vattr nvattr;
1014
1015 nmp = VTONMP(vp);
1016 if (!nmp)
1017 return (ENXIO);
1018 nfsvers = nmp->nm_vers;
1019 biosize = nmp->nm_biosize;
1020
1021 /* Disallow write attempts if the filesystem is mounted read-only. */
1022 if (vnode_vfsisrdonly(vp))
1023 return (EROFS);
1024
1025 origsize = np->n_size;
1026 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1027 switch (vnode_vtype(vp)) {
1028 case VDIR:
1029 return (EISDIR);
1030 case VCHR:
1031 case VBLK:
1032 case VSOCK:
1033 case VFIFO:
1034 if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
1035 !VATTR_IS_ACTIVE(vap, va_access_time) &&
1036 !VATTR_IS_ACTIVE(vap, va_mode) &&
1037 !VATTR_IS_ACTIVE(vap, va_uid) &&
1038 !VATTR_IS_ACTIVE(vap, va_gid)) {
1039 return (0);
1040 }
1041 VATTR_CLEAR_ACTIVE(vap, va_data_size);
1042 break;
1043 default:
1044 /*
1045 * Disallow write attempts if the filesystem is
1046 * mounted read-only.
1047 */
1048 if (vnode_vfsisrdonly(vp))
1049 return (EROFS);
1050 FSDBG_TOP(512, np->n_size, vap->va_data_size,
1051 np->n_vattr.nva_size, np->n_flag);
1052 /* clear NNEEDINVALIDATE, if set */
1053 if ((error = nfs_node_lock(np)))
1054 return (error);
1055 if (np->n_flag & NNEEDINVALIDATE)
1056 np->n_flag &= ~NNEEDINVALIDATE;
1057 nfs_node_unlock(np);
1058 /* flush everything */
1059 error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0) , ctx, 1);
1060 if (error) {
1061 printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
1062 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
1063 return (error);
1064 }
1065 if (nfsvers >= NFS_VER4) {
1066 /* setting file size requires having the file open for write access */
1067 noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
1068 if (!noop)
1069 return (ENOMEM);
1070retryopen:
1071 error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
1072 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))
1073 error = EIO;
1074 if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
1075 nfs4_reopen(nofp, vfs_context_thread(ctx));
1076 nofp = NULL;
1077 goto retryopen;
1078 }
1079 if (error) {
1080 nfs_open_owner_rele(noop);
1081 return (error);
1082 }
1083 if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
1084 /* we don't have the file open for write access, so open it */
1085 error = nfs_mount_state_in_use_start(nmp);
1086 if (!error)
1087 error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
1088 if (error) {
1089 nfs_open_owner_rele(noop);
1090 return (error);
1091 }
1092 error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
1093 if (!error)
1094 nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
1095 if (nfs_mount_state_error_should_restart(error)) {
1096 nfs_open_file_clear_busy(nofp);
1097 nofp = NULL;
1098 }
1099 if (nfs_mount_state_in_use_end(nmp, error))
1100 goto retryopen;
1101 }
1102 }
1103 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
1104 if (np->n_size > vap->va_data_size) { /* shrinking? */
1105 daddr64_t obn, bn;
1106 int neweofoff, mustwrite;
1107 struct nfsbuf *bp;
1108
1109 obn = (np->n_size - 1) / biosize;
1110 bn = vap->va_data_size / biosize;
1111 for ( ; obn >= bn; obn--) {
1112 if (!nfs_buf_is_incore(np, obn))
1113 continue;
1114 error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
1115 if (error)
1116 continue;
1117 if (obn != bn) {
1118 FSDBG(512, bp, bp->nb_flags, 0, obn);
1119 SET(bp->nb_flags, NB_INVAL);
1120 nfs_buf_release(bp, 1);
1121 continue;
1122 }
1123 mustwrite = 0;
1124 neweofoff = vap->va_data_size - NBOFF(bp);
1125 /* check for any dirty data before the new EOF */
1126 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
1127 /* clip dirty range to EOF */
1128 if (bp->nb_dirtyend > neweofoff) {
1129 bp->nb_dirtyend = neweofoff;
1130 if (bp->nb_dirtyoff >= bp->nb_dirtyend)
1131 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
1132 }
1133 if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff))
1134 mustwrite++;
1135 }
1136 bp->nb_dirty &= (1 << round_page_32(neweofoff)/PAGE_SIZE) - 1;
1137 if (bp->nb_dirty)
1138 mustwrite++;
1139 if (!mustwrite) {
1140 FSDBG(512, bp, bp->nb_flags, 0, obn);
1141 SET(bp->nb_flags, NB_INVAL);
1142 nfs_buf_release(bp, 1);
1143 continue;
1144 }
1145 /* gotta write out dirty data before invalidating */
1146 /* (NB_STABLE indicates that data writes should be FILESYNC) */
1147 /* (NB_NOCACHE indicates buffer should be discarded) */
1148 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
1149 SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
1150 if (!IS_VALID_CRED(bp->nb_wcred)) {
1151 kauth_cred_t cred = vfs_context_ucred(ctx);
1152 kauth_cred_ref(cred);
1153 bp->nb_wcred = cred;
1154 }
1155 error = nfs_buf_write(bp);
1156 // Note: bp has been released
1157 if (error) {
1158 FSDBG(512, bp, 0xd00dee, 0xbad, error);
1159 nfs_node_lock_force(np);
1160 np->n_error = error;
1161 np->n_flag |= NWRITEERR;
1162 /*
1163 * There was a write error and we need to
1164 * invalidate attrs and flush buffers in
1165 * order to sync up with the server.
1166 * (if this write was extending the file,
1167 * we may no longer know the correct size)
1168 */
1169 NATTRINVALIDATE(np);
1170 nfs_node_unlock(np);
1171 nfs_data_unlock(np);
1172 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
1173 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
1174 error = 0;
1175 }
1176 }
1177 }
1178 if (vap->va_data_size != np->n_size)
1179 ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
1180 origsize = np->n_size;
1181 np->n_size = np->n_vattr.nva_size = vap->va_data_size;
1182 nfs_node_lock_force(np);
1183 CLR(np->n_flag, NUPDATESIZE);
1184 nfs_node_unlock(np);
1185 FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
1186 }
1187 } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
1188 VATTR_IS_ACTIVE(vap, va_access_time) ||
1189 (vap->va_vaflags & VA_UTIMES_NULL)) {
1190 if ((error = nfs_node_lock(np)))
1191 return (error);
1192 if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
1193 nfs_node_unlock(np);
1194 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1195 if (error == EINTR)
1196 return (error);
1197 } else {
1198 nfs_node_unlock(np);
1199 }
1200 }
1201 if (VATTR_IS_ACTIVE(vap, va_mode) ||
1202 VATTR_IS_ACTIVE(vap, va_uid) ||
1203 VATTR_IS_ACTIVE(vap, va_gid)) {
1204 if ((error = nfs_node_lock(np))) {
1205 if (VATTR_IS_ACTIVE(vap, va_data_size))
1206 nfs_data_unlock(np);
1207 return (error);
1208 }
1209 NMODEINVALIDATE(np);
1210 nfs_node_unlock(np);
1211 dvp = vnode_getparent(vp);
1212 vname = vnode_getname(vp);
1213 dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
1214 if (dnp) {
1215 error = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
1216 if (error) {
1217 dnp = NULL;
1218 error = 0;
1219 }
1220 }
1221 if (dnp) {
1222 nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
1223 nfs_dulookup_start(&dul, dnp, ctx);
1224 }
1225 }
1226
1227retrysetattr:
1228 if (VATTR_IS_ACTIVE(vap, va_data_size) && (nfsvers >= NFS_VER4))
1229 error = nfs_mount_state_in_use_start(nmp);
1230
1231 if (!error) {
1232 error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
1233
1234 if (VATTR_IS_ACTIVE(vap, va_data_size) && (nfsvers >= NFS_VER4))
1235 if (nfs_mount_state_in_use_end(nmp, error))
1236 goto retrysetattr;
1237 }
1238
1239 if (VATTR_IS_ACTIVE(vap, va_mode) ||
1240 VATTR_IS_ACTIVE(vap, va_uid) ||
1241 VATTR_IS_ACTIVE(vap, va_gid)) {
1242 if (dnp) {
1243 nfs_dulookup_finish(&dul, dnp, ctx);
1244 nfs_node_clear_busy(dnp);
1245 }
1246 if (dvp != NULLVP)
1247 vnode_put(dvp);
1248 if (vname != NULL)
1249 vnode_putname(vname);
1250 }
1251
1252 FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
1253 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1254 if (error && (origsize != np->n_size)) {
1255 /* make every effort to resync file size w/ server... */
1256 int err; /* preserve "error" for return */
1257 np->n_size = np->n_vattr.nva_size = origsize;
1258 nfs_node_lock_force(np);
1259 CLR(np->n_flag, NUPDATESIZE);
1260 nfs_node_unlock(np);
1261 FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
1262 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
1263 vap->va_data_size = origsize;
1264 err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
1265 if (err)
1266 printf("nfs_vnop_setattr: nfs%d_setattr_rpc %d %d\n", nfsvers, error, err);
1267 }
1268 nfs_node_lock_force(np);
1269 /*
1270 * The size was just set. If the size is already marked for update, don't
1271 * trust the newsize (it may have been set while the setattr was in progress).
1272 * Clear the update flag and make sure we fetch new attributes so we are sure
1273 * we have the latest size.
1274 */
1275 if (ISSET(np->n_flag, NUPDATESIZE)) {
1276 CLR(np->n_flag, NUPDATESIZE);
1277 NATTRINVALIDATE(np);
1278 nfs_node_unlock(np);
1279 nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED);
1280 } else {
1281 nfs_node_unlock(np);
1282 }
1283 nfs_data_unlock(np);
1284 if (nfsvers >= NFS_VER4) {
1285 if (nofp->nof_flags & NFS_OPEN_FILE_SETATTR) {
1286 int err = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
1287 if (err) {
1288 vname = vnode_getname(NFSTOV(np));
1289 printf("nfs_vnop_setattr: close error: %d, %s\n", err, vname);
1290 vnode_putname(vname);
1291 }
1292 nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
1293 nfs_open_file_clear_busy(nofp);
1294 }
1295 nfs_open_owner_rele(noop);
1296 }
1297 }
1298 return (error);
1299}
1300
1301/*
1302 * Do an NFS setattr RPC.
1303 */
1304int
1305nfs3_setattr_rpc(
1306 nfsnode_t np,
1307 struct vnode_attr *vap,
1308 vfs_context_t ctx)
1309{
1310 struct nfsmount *nmp = NFSTONMP(np);
1311 int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers;
1312 u_int64_t xid, nextxid;
1313 struct nfsm_chain nmreq, nmrep;
1314
1315 if (!nmp)
1316 return (ENXIO);
1317 nfsvers = nmp->nm_vers;
1318
1319 VATTR_SET_SUPPORTED(vap, va_mode);
1320 VATTR_SET_SUPPORTED(vap, va_uid);
1321 VATTR_SET_SUPPORTED(vap, va_gid);
1322 VATTR_SET_SUPPORTED(vap, va_data_size);
1323 VATTR_SET_SUPPORTED(vap, va_access_time);
1324 VATTR_SET_SUPPORTED(vap, va_modify_time);
1325
1326 if (VATTR_IS_ACTIVE(vap, va_flags)) {
1327 if (vap->va_flags) { /* we don't support setting flags */
1328 if (vap->va_active & ~VNODE_ATTR_va_flags)
1329 return (EINVAL); /* return EINVAL if other attributes also set */
1330 else
1331 return (ENOTSUP); /* return ENOTSUP for chflags(2) */
1332 }
1333 /* no flags set, so we'll just ignore it */
1334 if (!(vap->va_active & ~VNODE_ATTR_va_flags))
1335 return (0); /* no (other) attributes to set, so nothing to do */
1336 }
1337
1338 nfsm_chain_null(&nmreq);
1339 nfsm_chain_null(&nmrep);
1340
1341 nfsm_chain_build_alloc_init(error, &nmreq,
1342 NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
1343 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1344 if (nfsvers == NFS_VER3) {
1345 if (VATTR_IS_ACTIVE(vap, va_mode)) {
1346 nfsm_chain_add_32(error, &nmreq, TRUE);
1347 nfsm_chain_add_32(error, &nmreq, vap->va_mode);
1348 } else {
1349 nfsm_chain_add_32(error, &nmreq, FALSE);
1350 }
1351 if (VATTR_IS_ACTIVE(vap, va_uid)) {
1352 nfsm_chain_add_32(error, &nmreq, TRUE);
1353 nfsm_chain_add_32(error, &nmreq, vap->va_uid);
1354 } else {
1355 nfsm_chain_add_32(error, &nmreq, FALSE);
1356 }
1357 if (VATTR_IS_ACTIVE(vap, va_gid)) {
1358 nfsm_chain_add_32(error, &nmreq, TRUE);
1359 nfsm_chain_add_32(error, &nmreq, vap->va_gid);
1360 } else {
1361 nfsm_chain_add_32(error, &nmreq, FALSE);
1362 }
1363 if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1364 nfsm_chain_add_32(error, &nmreq, TRUE);
1365 nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
1366 } else {
1367 nfsm_chain_add_32(error, &nmreq, FALSE);
1368 }
1369 if (vap->va_vaflags & VA_UTIMES_NULL) {
1370 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
1371 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
1372 } else {
1373 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
1374 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
1375 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
1376 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
1377 } else {
1378 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
1379 }
1380 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
1381 nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
1382 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
1383 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
1384 } else {
1385 nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
1386 }
1387 }
1388 nfsm_chain_add_32(error, &nmreq, FALSE);
1389 } else {
1390 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
1391 vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
1392 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
1393 vap->va_uid : (uint32_t)-1);
1394 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
1395 vap->va_gid : (uint32_t)-1);
1396 nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
1397 vap->va_data_size : (uint32_t)-1);
1398 if (VATTR_IS_ACTIVE(vap, va_access_time)) {
1399 nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
1400 nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
1401 ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
1402 } else {
1403 nfsm_chain_add_32(error, &nmreq, -1);
1404 nfsm_chain_add_32(error, &nmreq, -1);
1405 }
1406 if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
1407 nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
1408 nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
1409 ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
1410 } else {
1411 nfsm_chain_add_32(error, &nmreq, -1);
1412 nfsm_chain_add_32(error, &nmreq, -1);
1413 }
1414 }
1415 nfsm_chain_build_done(error, &nmreq);
1416 nfsmout_if(error);
1417 error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx,
1418 &nmrep, &xid, &status);
1419 if ((lockerror = nfs_node_lock(np)))
1420 error = lockerror;
1421 if (nfsvers == NFS_VER3) {
1422 struct timespec premtime = { 0, 0 };
1423 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
1424 nfsmout_if(error);
1425 /* if file hadn't changed, update cached mtime */
1426 if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
1427 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
1428 /* if directory hadn't changed, update namecache mtime */
1429 if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
1430 nfstimespeccmp(&np->n_ncmtime, &premtime, ==))
1431 NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
1432 if (!wccpostattr)
1433 NATTRINVALIDATE(np);
1434 error = status;
1435 } else {
1436 if (!error)
1437 error = status;
1438 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
1439 }
1440 /*
1441 * We just changed the attributes and we want to make sure that we
1442 * see the latest attributes. Get the next XID. If it's not the
1443 * next XID after the SETATTR XID, then it's possible that another
1444 * RPC was in flight at the same time and it might put stale attributes
1445 * in the cache. In that case, we invalidate the attributes and set
1446 * the attribute cache XID to guarantee that newer attributes will
1447 * get loaded next.
1448 */
1449 nextxid = 0;
1450 nfs_get_xid(&nextxid);
1451 if (nextxid != (xid + 1)) {
1452 np->n_xid = nextxid;
1453 NATTRINVALIDATE(np);
1454 }
1455nfsmout:
1456 if (!lockerror)
1457 nfs_node_unlock(np);
1458 nfsm_chain_cleanup(&nmreq);
1459 nfsm_chain_cleanup(&nmrep);
1460 return (error);
1461}
1462
1463/*
1464 * NFS lookup call, one step at a time...
1465 * First look in cache
1466 * If not found, unlock the directory nfsnode and do the RPC
1467 */
1468int
1469nfs_vnop_lookup(
1470 struct vnop_lookup_args /* {
1471 struct vnodeop_desc *a_desc;
1472 vnode_t a_dvp;
1473 vnode_t *a_vpp;
1474 struct componentname *a_cnp;
1475 vfs_context_t a_context;
1476 } */ *ap)
1477{
1478 vfs_context_t ctx = ap->a_context;
1479 struct componentname *cnp = ap->a_cnp;
1480 vnode_t dvp = ap->a_dvp;
1481 vnode_t *vpp = ap->a_vpp;
1482 int flags = cnp->cn_flags;
1483 vnode_t newvp;
1484 nfsnode_t dnp, np;
1485 struct nfsmount *nmp;
1486 mount_t mp;
1487 int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
1488 u_int64_t xid;
1489 struct nfs_vattr nvattr;
1490 int ngflags;
1491 struct vnop_access_args naa;
1492 fhandle_t fh;
1493 struct nfsreq rq, *req = &rq;
1494
1495 *vpp = NULLVP;
1496
1497 dnp = VTONFS(dvp);
1498
1499 mp = vnode_mount(dvp);
1500 nmp = VFSTONFS(mp);
1501 if (!nmp) {
1502 error = ENXIO;
1503 goto error_return;
1504 }
1505 nfsvers = nmp->nm_vers;
1506 negnamecache = !(nmp->nm_flag & NFSMNT_NONEGNAMECACHE);
1507
1508 if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx))))
1509 goto error_return;
1510 /* nfs_getattr() will check changed and purge caches */
1511 if ((error = nfs_getattr(dnp, &nvattr, ctx, NGA_CACHED)))
1512 goto error_return;
1513
1514 error = cache_lookup(dvp, vpp, cnp);
1515 switch (error) {
1516 case ENOENT:
1517 /* negative cache entry */
1518 goto error_return;
1519 case 0:
1520 /* cache miss */
1521 if ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) {
1522 /* if rdirplus, try dir buf cache lookup */
1523 error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0);
1524 if (!error && np) {
1525 /* dir buf cache hit */
1526 *vpp = NFSTOV(np);
1527 error = -1;
1528 }
1529 }
1530 if (error != -1) /* cache miss */
1531 break;
1532 /* FALLTHROUGH */
1533 case -1:
1534 /* cache hit, not really an error */
1535 OSAddAtomic(1, &nfsstats.lookupcache_hits);
1536
1537 nfs_node_clear_busy(dnp);
1538
1539 /* check for directory access */
1540 naa.a_vp = dvp;
1541 naa.a_action = KAUTH_VNODE_SEARCH;
1542 naa.a_context = ctx;
1543
1544 /* compute actual success/failure based on accessibility */
1545 error = nfs_vnop_access(&naa);
1546 /* FALLTHROUGH */
1547 default:
1548 /* unexpected error from cache_lookup */
1549 goto error_return;
1550 }
1551
1552 /* skip lookup, if we know who we are: "." or ".." */
1553 isdot = isdotdot = 0;
1554 if (cnp->cn_nameptr[0] == '.') {
1555 if (cnp->cn_namelen == 1)
1556 isdot = 1;
1557 if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.'))
1558 isdotdot = 1;
1559 }
1560 if (isdotdot || isdot) {
1561 fh.fh_len = 0;
1562 goto found;
1563 }
1564
1565 /* do we know this name is too long? */
1566 nmp = VTONMP(dvp);
1567 if (!nmp) {
1568 error = ENXIO;
1569 goto error_return;
1570 }
1571 if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
1572 (cnp->cn_namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
1573 error = ENAMETOOLONG;
1574 goto error_return;
1575 }
1576
1577 error = 0;
1578 newvp = NULLVP;
1579
1580 OSAddAtomic(1, &nfsstats.lookupcache_misses);
1581
1582 error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
1583 nfsmout_if(error);
1584 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, req, &xid, &fh, &nvattr);
1585 nfsmout_if(error);
1586
1587 /* is the file handle the same as this directory's file handle? */
1588 isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len);
1589
1590found:
1591 if (flags & ISLASTCN) {
1592 switch (cnp->cn_nameiop) {
1593 case DELETE:
1594 cnp->cn_flags &= ~MAKEENTRY;
1595 break;
1596 case RENAME:
1597 cnp->cn_flags &= ~MAKEENTRY;
1598 if (isdot) {
1599 error = EISDIR;
1600 goto error_return;
1601 }
1602 break;
1603 }
1604 }
1605
1606 if (isdotdot) {
1607 newvp = vnode_getparent(dvp);
1608 if (!newvp) {
1609 error = ENOENT;
1610 goto error_return;
1611 }
1612 } else if (isdot) {
1613 error = vnode_get(dvp);
1614 if (error)
1615 goto error_return;
1616 newvp = dvp;
1617 nfs_node_lock_force(dnp);
1618 if (fh.fh_len && (dnp->n_xid <= xid))
1619 nfs_loadattrcache(dnp, &nvattr, &xid, 0);
1620 nfs_node_unlock(dnp);
1621 } else {
1622 ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
1623 error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, ngflags, &np);
1624 if (error)
1625 goto error_return;
1626 newvp = NFSTOV(np);
1627 nfs_node_unlock(np);
1628 }
1629 *vpp = newvp;
1630
1631nfsmout:
1632 if (error) {
1633 if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
1634 (flags & ISLASTCN) && (error == ENOENT)) {
1635 if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp))
1636 error = EROFS;
1637 else
1638 error = EJUSTRETURN;
1639 }
1640 }
1641 if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
1642 (cnp->cn_nameiop != CREATE) && negnamecache) {
1643 /* add a negative entry in the name cache */
1644 nfs_node_lock_force(dnp);
1645 cache_enter(dvp, NULL, cnp);
1646 dnp->n_flag |= NNEGNCENTRIES;
1647 nfs_node_unlock(dnp);
1648 }
1649error_return:
1650 if (!busyerror)
1651 nfs_node_clear_busy(dnp);
1652 if (error && *vpp) {
1653 vnode_put(*vpp);
1654 *vpp = NULLVP;
1655 }
1656 return (error);
1657}
1658
1659/*
1660 * NFS read call.
1661 * Just call nfs_bioread() to do the work.
1662 */
1663int
1664nfs_vnop_read(
1665 struct vnop_read_args /* {
1666 struct vnodeop_desc *a_desc;
1667 vnode_t a_vp;
1668 struct uio *a_uio;
1669 int a_ioflag;
1670 vfs_context_t a_context;
1671 } */ *ap)
1672{
1673 if (vnode_vtype(ap->a_vp) != VREG)
1674 return (EPERM);
1675 return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
1676}
1677
1678
1679/*
1680 * NFS readlink call
1681 */
1682int
1683nfs_vnop_readlink(
1684 struct vnop_readlink_args /* {
1685 struct vnodeop_desc *a_desc;
1686 vnode_t a_vp;
1687 struct uio *a_uio;
1688 vfs_context_t a_context;
1689 } */ *ap)
1690{
1691 vfs_context_t ctx = ap->a_context;
1692 nfsnode_t np = VTONFS(ap->a_vp);
1693 struct nfsmount *nmp;
1694 int error = 0, nfsvers;
1695 uint32_t buflen;
1696 uio_t uio = ap->a_uio;
1697 struct nfs_vattr nvattr;
1698 struct nfsbuf *bp = NULL;
1699
1700 if (vnode_vtype(ap->a_vp) != VLNK)
1701 return (EPERM);
1702
1703 if (uio_resid(uio) == 0)
1704 return (0);
1705 if (uio_offset(uio) < 0)
1706 return (EINVAL);
1707
1708 nmp = VTONMP(ap->a_vp);
1709 if (!nmp)
1710 return (ENXIO);
1711 nfsvers = nmp->nm_vers;
1712
1713 /* nfs_getattr() will check changed and purge caches */
1714 if ((error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED))) {
1715 FSDBG(531, np, 0xd1e0001, 0, error);
1716 return (error);
1717 }
1718
1719 OSAddAtomic(1, &nfsstats.biocache_readlinks);
1720 error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_READ, &bp);
1721 if (error) {
1722 FSDBG(531, np, 0xd1e0002, 0, error);
1723 return (error);
1724 }
1725 if (!ISSET(bp->nb_flags, NB_CACHE)) {
1726 OSAddAtomic(1, &nfsstats.readlink_bios);
1727 buflen = bp->nb_bufsize;
1728 error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
1729 if (error) {
1730 SET(bp->nb_flags, NB_ERROR);
1731 bp->nb_error = error;
1732 } else {
1733 bp->nb_validoff = 0;
1734 bp->nb_validend = buflen;
1735 }
1736 }
1737 if (!error && (bp->nb_validend > 0))
1738 error = uiomove(bp->nb_data, bp->nb_validend, uio);
1739 FSDBG(531, np, bp->nb_validend, 0, error);
1740 nfs_buf_release(bp, 1);
1741 return (error);
1742}
1743
1744/*
1745 * Do a readlink RPC.
1746 */
1747int
1748nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
1749{
1750 struct nfsmount *nmp;
1751 int error = 0, lockerror = ENOENT, nfsvers, status;
1752 uint32_t len;
1753 u_int64_t xid;
1754 struct nfsm_chain nmreq, nmrep;
1755
1756 nmp = NFSTONMP(np);
1757 if (!nmp)
1758 return (ENXIO);
1759 nfsvers = nmp->nm_vers;
1760 nfsm_chain_null(&nmreq);
1761 nfsm_chain_null(&nmrep);
1762
1763 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
1764 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1765 nfsm_chain_build_done(error, &nmreq);
1766 nfsmout_if(error);
1767 error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx,
1768 &nmrep, &xid, &status);
1769 if ((lockerror = nfs_node_lock(np)))
1770 error = lockerror;
1771 if (nfsvers == NFS_VER3)
1772 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
1773 if (!error)
1774 error = status;
1775 nfsm_chain_get_32(error, &nmrep, len);
1776 nfsmout_if(error);
1777 if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
1778 error = EBADRPC;
1779 goto nfsmout;
1780 }
1781 if (len >= *buflenp) {
1782 if (np->n_size && (np->n_size < *buflenp))
1783 len = np->n_size;
1784 else
1785 len = *buflenp - 1;
1786 }
1787 nfsm_chain_get_opaque(error, &nmrep, len, buf);
1788 if (!error)
1789 *buflenp = len;
1790nfsmout:
1791 if (!lockerror)
1792 nfs_node_unlock(np);
1793 nfsm_chain_cleanup(&nmreq);
1794 nfsm_chain_cleanup(&nmrep);
1795 return (error);
1796}
1797
1798/*
1799 * NFS read RPC call
1800 * Ditto above
1801 */
1802int
1803nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
1804{
1805 struct nfsmount *nmp;
1806 int error = 0, nfsvers, eof = 0;
1807 size_t nmrsize, len, retlen;
1808 user_ssize_t tsiz;
1809 off_t txoffset;
1810 struct nfsreq rq, *req = &rq;
1811 uint32_t stategenid = 0, restart = 0;
1812
1813 FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
1814 nmp = NFSTONMP(np);
1815 if (!nmp)
1816 return (ENXIO);
1817 nfsvers = nmp->nm_vers;
1818 nmrsize = nmp->nm_rsize;
1819
1820 txoffset = uio_offset(uio);
1821 tsiz = uio_resid(uio);
1822 if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
1823 FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
1824 return (EFBIG);
1825 }
1826
1827 while (tsiz > 0) {
1828 len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
1829 FSDBG(536, np, txoffset, len, 0);
1830 if (nmp->nm_vers >= NFS_VER4)
1831 stategenid = nmp->nm_stategenid;
1832 error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
1833 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
1834 if (!error)
1835 error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
1836 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
1837 (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
1838 lck_mtx_lock(&nmp->nm_lock);
1839 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
1840 printf("nfs_read_rpc: error %d, initiating recovery\n", error);
1841 nmp->nm_state |= NFSSTA_RECOVER;
1842 nfs_mount_sock_thread_wake(nmp);
1843 }
1844 lck_mtx_unlock(&nmp->nm_lock);
1845 if (error == NFSERR_GRACE)
1846 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
1847 if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
1848 continue;
1849 }
1850 if (error)
1851 break;
1852 txoffset += retlen;
1853 tsiz -= retlen;
1854 if (nfsvers != NFS_VER2) {
1855 if (eof || (retlen == 0))
1856 tsiz = 0;
1857 } else if (retlen < len)
1858 tsiz = 0;
1859 }
1860
1861 FSDBG_BOT(536, np, eof, uio_resid(uio), error);
1862 return (error);
1863}
1864
1865int
1866nfs3_read_rpc_async(
1867 nfsnode_t np,
1868 off_t offset,
1869 size_t len,
1870 thread_t thd,
1871 kauth_cred_t cred,
1872 struct nfsreq_cbinfo *cb,
1873 struct nfsreq **reqp)
1874{
1875 struct nfsmount *nmp;
1876 int error = 0, nfsvers;
1877 struct nfsm_chain nmreq;
1878
1879 nmp = NFSTONMP(np);
1880 if (!nmp)
1881 return (ENXIO);
1882 nfsvers = nmp->nm_vers;
1883
1884 nfsm_chain_null(&nmreq);
1885 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
1886 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1887 if (nfsvers == NFS_VER3) {
1888 nfsm_chain_add_64(error, &nmreq, offset);
1889 nfsm_chain_add_32(error, &nmreq, len);
1890 } else {
1891 nfsm_chain_add_32(error, &nmreq, offset);
1892 nfsm_chain_add_32(error, &nmreq, len);
1893 nfsm_chain_add_32(error, &nmreq, 0);
1894 }
1895 nfsm_chain_build_done(error, &nmreq);
1896 nfsmout_if(error);
1897 error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, cb, reqp);
1898nfsmout:
1899 nfsm_chain_cleanup(&nmreq);
1900 return (error);
1901}
1902
1903int
1904nfs3_read_rpc_async_finish(
1905 nfsnode_t np,
1906 struct nfsreq *req,
1907 uio_t uio,
1908 size_t *lenp,
1909 int *eofp)
1910{
1911 int error = 0, lockerror, nfsvers, status, eof = 0;
1912 size_t retlen = 0;
1913 uint64_t xid;
1914 struct nfsmount *nmp;
1915 struct nfsm_chain nmrep;
1916
1917 nmp = NFSTONMP(np);
1918 if (!nmp) {
1919 nfs_request_async_cancel(req);
1920 return (ENXIO);
1921 }
1922 nfsvers = nmp->nm_vers;
1923
1924 nfsm_chain_null(&nmrep);
1925
1926 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
1927 if (error == EINPROGRESS) /* async request restarted */
1928 return (error);
1929
1930 if ((lockerror = nfs_node_lock(np)))
1931 error = lockerror;
1932 if (nfsvers == NFS_VER3)
1933 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
1934 if (!error)
1935 error = status;
1936 if (nfsvers == NFS_VER3) {
1937 nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
1938 nfsm_chain_get_32(error, &nmrep, eof);
1939 } else {
1940 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
1941 }
1942 if (!lockerror)
1943 nfs_node_unlock(np);
1944 nfsm_chain_get_32(error, &nmrep, retlen);
1945 if ((nfsvers == NFS_VER2) && (retlen > *lenp))
1946 error = EBADRPC;
1947 nfsmout_if(error);
1948 error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
1949 if (eofp) {
1950 if (nfsvers == NFS_VER3) {
1951 if (!eof && !retlen)
1952 eof = 1;
1953 } else if (retlen < *lenp) {
1954 eof = 1;
1955 }
1956 *eofp = eof;
1957 }
1958 *lenp = MIN(retlen, *lenp);
1959nfsmout:
1960 nfsm_chain_cleanup(&nmrep);
1961 return (error);
1962}
1963
1964/*
1965 * NFS write call
1966 */
1967int
1968nfs_vnop_write(
1969 struct vnop_write_args /* {
1970 struct vnodeop_desc *a_desc;
1971 vnode_t a_vp;
1972 struct uio *a_uio;
1973 int a_ioflag;
1974 vfs_context_t a_context;
1975 } */ *ap)
1976{
1977 vfs_context_t ctx = ap->a_context;
1978 uio_t uio = ap->a_uio;
1979 vnode_t vp = ap->a_vp;
1980 nfsnode_t np = VTONFS(vp);
1981 int ioflag = ap->a_ioflag;
1982 struct nfsbuf *bp;
1983 struct nfs_vattr nvattr;
1984 struct nfsmount *nmp = VTONMP(vp);
1985 daddr64_t lbn;
1986 int biosize;
1987 int n, on, error = 0;
1988 off_t boff, start, end;
1989 uio_t auio;
1990 char auio_buf [ UIO_SIZEOF(1) ];
1991 thread_t thd;
1992 kauth_cred_t cred;
1993
1994 FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
1995
1996 if (vnode_vtype(vp) != VREG) {
1997 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
1998 return (EIO);
1999 }
2000
2001 thd = vfs_context_thread(ctx);
2002 cred = vfs_context_ucred(ctx);
2003
2004 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
2005
2006 if ((error = nfs_node_lock(np))) {
2007 nfs_data_unlock(np);
2008 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
2009 return (error);
2010 }
2011 np->n_wrbusy++;
2012
2013 if (np->n_flag & NWRITEERR) {
2014 error = np->n_error;
2015 np->n_flag &= ~NWRITEERR;
2016 }
2017 if (np->n_flag & NNEEDINVALIDATE) {
2018 np->n_flag &= ~NNEEDINVALIDATE;
2019 nfs_node_unlock(np);
2020 nfs_data_unlock(np);
2021 nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
2022 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
2023 } else {
2024 nfs_node_unlock(np);
2025 }
2026 if (error)
2027 goto out;
2028
2029 biosize = nmp->nm_biosize;
2030
2031 if (ioflag & (IO_APPEND | IO_SYNC)) {
2032 nfs_node_lock_force(np);
2033 if (np->n_flag & NMODIFIED) {
2034 NATTRINVALIDATE(np);
2035 nfs_node_unlock(np);
2036 nfs_data_unlock(np);
2037 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
2038 nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
2039 if (error) {
2040 FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
2041 goto out;
2042 }
2043 } else {
2044 nfs_node_unlock(np);
2045 }
2046 if (ioflag & IO_APPEND) {
2047 nfs_data_unlock(np);
2048 /* nfs_getattr() will check changed and purge caches */
2049 error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED);
2050 /* we'll be extending the file, so take the data lock exclusive */
2051 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2052 if (error) {
2053 FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
2054 goto out;
2055 }
2056 uio_setoffset(uio, np->n_size);
2057 }
2058 }
2059 if (uio_offset(uio) < 0) {
2060 error = EINVAL;
2061 FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
2062 goto out;
2063 }
2064 if (uio_resid(uio) == 0)
2065 goto out;
2066
2067 if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
2068 /* it looks like we'll be extending the file, so take the data lock exclusive */
2069 nfs_data_unlock(np);
2070 nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
2071 }
2072
2073 do {
2074 OSAddAtomic(1, &nfsstats.biocache_writes);
2075 lbn = uio_offset(uio) / biosize;
2076 on = uio_offset(uio) % biosize;
2077 n = biosize - on;
2078 if (uio_resid(uio) < n)
2079 n = uio_resid(uio);
2080again:
2081 /*
2082 * Get a cache block for writing. The range to be written is
2083 * (off..off+n) within the block. We ensure that the block
2084 * either has no dirty region or that the given range is
2085 * contiguous with the existing dirty region.
2086 */
2087 error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
2088 if (error)
2089 goto out;
2090 /* map the block because we know we're going to write to it */
2091 NFS_BUF_MAP(bp);
2092
2093 if (ioflag & IO_NOCACHE)
2094 SET(bp->nb_flags, NB_NOCACHE);
2095
2096 if (!IS_VALID_CRED(bp->nb_wcred)) {
2097 kauth_cred_ref(cred);
2098 bp->nb_wcred = cred;
2099 }
2100
2101 /*
2102 * If there's already a dirty range AND dirty pages in this block we
2103 * need to send a commit AND write the dirty pages before continuing.
2104 *
2105 * If there's already a dirty range OR dirty pages in this block
2106 * and the new write range is not contiguous with the existing range,
2107 * then force the buffer to be written out now.
2108 * (We used to just extend the dirty range to cover the valid,
2109 * but unwritten, data in between also. But writing ranges
2110 * of data that weren't actually written by an application
2111 * risks overwriting some other client's data with stale data
2112 * that's just masquerading as new written data.)
2113 */
2114 if (bp->nb_dirtyend > 0) {
2115 if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
2116 FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
2117 /* write/commit buffer "synchronously" */
2118 /* (NB_STABLE indicates that data writes should be FILESYNC) */
2119 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
2120 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
2121 error = nfs_buf_write(bp);
2122 if (error)
2123 goto out;
2124 goto again;
2125 }
2126 } else if (bp->nb_dirty) {
2127 int firstpg, lastpg;
2128 u_int32_t pagemask;
2129 /* calculate write range pagemask */
2130 firstpg = on/PAGE_SIZE;
2131 lastpg = (on+n-1)/PAGE_SIZE;
2132 pagemask = ((1 << (lastpg+1)) - 1) & ~((1 << firstpg) - 1);
2133 /* check if there are dirty pages outside the write range */
2134 if (bp->nb_dirty & ~pagemask) {
2135 FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
2136 /* write/commit buffer "synchronously" */
2137 /* (NB_STABLE indicates that data writes should be FILESYNC) */
2138 CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
2139 SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
2140 error = nfs_buf_write(bp);
2141 if (error)
2142 goto out;
2143 goto again;
2144 }
2145 /* if the first or last pages are already dirty */
2146 /* make sure that the dirty range encompasses those pages */
2147 if (NBPGDIRTY(bp,firstpg) || NBPGDIRTY(bp,lastpg)) {
2148 FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
2149 bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
2150 if (NBPGDIRTY(bp,lastpg)) {
2151 bp->nb_dirtyend = (lastpg+1) * PAGE_SIZE;
2152 /* clip to EOF */
2153 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
2154 bp->nb_dirtyend = np->n_size - NBOFF(bp);
2155 if (bp->nb_dirtyoff >= bp->nb_dirtyend)
2156 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2157 }
2158 } else
2159 bp->nb_dirtyend = on+n;
2160 }
2161 }
2162
2163 /*
2164 * Are we extending the size of the file with this write?
2165 * If so, update file size now that we have the block.
2166 * If there was a partial buf at the old eof, validate
2167 * and zero the new bytes.
2168 */
2169 if ((uio_offset(uio) + n) > (off_t)np->n_size) {
2170 struct nfsbuf *eofbp = NULL;
2171 daddr64_t eofbn = np->n_size / biosize;
2172 int eofoff = np->n_size % biosize;
2173 int neweofoff = (uio_offset(uio) + n) % biosize;
2174
2175 FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
2176
2177 if (eofoff && (eofbn < lbn) &&
2178 ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp))))
2179 goto out;
2180
2181 /* if we're extending within the same last block */
2182 /* and the block is flagged as being cached... */
2183 if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
2184 /* ...check that all pages in buffer are valid */
2185 int endpg = ((neweofoff ? neweofoff : biosize) - 1)/PAGE_SIZE;
2186 u_int32_t pagemask;
2187 /* pagemask only has to extend to last page being written to */
2188 pagemask = (1 << (endpg+1)) - 1;
2189 FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
2190 if ((bp->nb_valid & pagemask) != pagemask) {
2191 /* zerofill any hole */
2192 if (on > bp->nb_validend) {
2193 int i;
2194 for (i=bp->nb_validend/PAGE_SIZE; i <= (on - 1)/PAGE_SIZE; i++)
2195 NBPGVALID_SET(bp, i);
2196 NFS_BUF_MAP(bp);
2197 FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
2198 bzero((char *)bp->nb_data + bp->nb_validend,
2199 on - bp->nb_validend);
2200 }
2201 /* zerofill any trailing data in the last page */
2202 if (neweofoff) {
2203 NFS_BUF_MAP(bp);
2204 FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
2205 bzero((char *)bp->nb_data + neweofoff,
2206 PAGE_SIZE - (neweofoff & PAGE_MASK));
2207 }
2208 }
2209 }
2210 np->n_size = uio_offset(uio) + n;
2211 nfs_node_lock_force(np);
2212 CLR(np->n_flag, NUPDATESIZE);
2213 np->n_flag |= NMODIFIED;
2214 nfs_node_unlock(np);
2215 FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
2216 ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
2217 if (eofbp) {
2218 /*
2219 * We may need to zero any previously invalid data
2220 * after the old EOF in the previous EOF buffer.
2221 *
2222 * For the old last page, don't zero bytes if there
2223 * are invalid bytes in that page (i.e. the page isn't
2224 * currently valid).
2225 * For pages after the old last page, zero them and
2226 * mark them as valid.
2227 */
2228 char *d;
2229 int i;
2230 if (ioflag & IO_NOCACHE)
2231 SET(eofbp->nb_flags, NB_NOCACHE);
2232 NFS_BUF_MAP(eofbp);
2233 FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
2234 d = eofbp->nb_data;
2235 i = eofoff/PAGE_SIZE;
2236 while (eofoff < biosize) {
2237 int poff = eofoff & PAGE_MASK;
2238 if (!poff || NBPGVALID(eofbp,i)) {
2239 bzero(d + eofoff, PAGE_SIZE - poff);
2240 NBPGVALID_SET(eofbp, i);
2241 }
2242 if (bp->nb_validend == eofoff)
2243 bp->nb_validend += PAGE_SIZE - poff;
2244 eofoff += PAGE_SIZE - poff;
2245 i++;
2246 }
2247 nfs_buf_release(eofbp, 1);
2248 }
2249 }
2250 /*
2251 * If dirtyend exceeds file size, chop it down. This should
2252 * not occur unless there is a race.
2253 */
2254 if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
2255 bp->nb_dirtyend = np->n_size - NBOFF(bp);
2256 if (bp->nb_dirtyoff >= bp->nb_dirtyend)
2257 bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2258 }
2259 /*
2260 * UBC doesn't handle partial pages, so we need to make sure
2261 * that any pages left in the page cache are completely valid.
2262 *
2263 * Writes that are smaller than a block are delayed if they
2264 * don't extend to the end of the block.
2265 *
2266 * If the block isn't (completely) cached, we may need to read
2267 * in some parts of pages that aren't covered by the write.
2268 * If the write offset (on) isn't page aligned, we'll need to
2269 * read the start of the first page being written to. Likewise,
2270 * if the offset of the end of the write (on+n) isn't page aligned,
2271 * we'll need to read the end of the last page being written to.
2272 *
2273 * Notes:
2274 * We don't want to read anything we're just going to write over.
2275 * We don't want to issue multiple I/Os if we don't have to
2276 * (because they're synchronous rpcs).
2277 * We don't want to read anything we already have modified in the
2278 * page cache.
2279 */
2280 if (!ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
2281 int firstpg, lastpg, dirtypg;
2282 int firstpgoff, lastpgoff;
2283 start = end = -1;
2284 firstpg = on/PAGE_SIZE;
2285 firstpgoff = on & PAGE_MASK;
2286 lastpg = (on+n-1)/PAGE_SIZE;
2287 lastpgoff = (on+n) & PAGE_MASK;
2288 if (firstpgoff && !NBPGVALID(bp,firstpg)) {
2289 /* need to read start of first page */
2290 start = firstpg * PAGE_SIZE;
2291 end = start + firstpgoff;
2292 }
2293 if (lastpgoff && !NBPGVALID(bp,lastpg)) {
2294 /* need to read end of last page */
2295 if (start < 0)
2296 start = (lastpg * PAGE_SIZE) + lastpgoff;
2297 end = (lastpg + 1) * PAGE_SIZE;
2298 }
2299 if (end > start) {
2300 /* need to read the data in range: start...end-1 */
2301
2302 /* first, check for dirty pages in between */
2303 /* if there are, we'll have to do two reads because */
2304 /* we don't want to overwrite the dirty pages. */
2305 for (dirtypg=start/PAGE_SIZE; dirtypg <= (end-1)/PAGE_SIZE; dirtypg++)
2306 if (NBPGDIRTY(bp,dirtypg))
2307 break;
2308
2309 /* if start is at beginning of page, try */
2310 /* to get any preceeding pages as well. */
2311 if (!(start & PAGE_MASK)) {
2312 /* stop at next dirty/valid page or start of block */
2313 for (; start > 0; start-=PAGE_SIZE)
2314 if (NBPGVALID(bp,((start-1)/PAGE_SIZE)))
2315 break;
2316 }
2317
2318 NFS_BUF_MAP(bp);
2319 /* setup uio for read(s) */
2320 boff = NBOFF(bp);
2321 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
2322 &auio_buf, sizeof(auio_buf));
2323
2324 if (dirtypg <= (end-1)/PAGE_SIZE) {
2325 /* there's a dirty page in the way, so just do two reads */
2326 /* we'll read the preceding data here */
2327 uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
2328 uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
2329 error = nfs_read_rpc(np, auio, ctx);
2330 if (error) /* couldn't read the data, so treat buffer as NOCACHE */
2331 SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
2332 if (uio_resid(auio) > 0) {
2333 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
2334 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
2335 }
2336 if (!error) {
2337 /* update validoff/validend if necessary */
2338 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
2339 bp->nb_validoff = start;
2340 if ((bp->nb_validend < 0) || (bp->nb_validend < on))
2341 bp->nb_validend = on;
2342 if ((off_t)np->n_size > boff + bp->nb_validend)
2343 bp->nb_validend = min(np->n_size - (boff + start), biosize);
2344 /* validate any pages before the write offset */
2345 for (; start < on/PAGE_SIZE; start+=PAGE_SIZE)
2346 NBPGVALID_SET(bp, start/PAGE_SIZE);
2347 }
2348 /* adjust start to read any trailing data */
2349 start = on+n;
2350 }
2351
2352 /* if end is at end of page, try to */
2353 /* get any following pages as well. */
2354 if (!(end & PAGE_MASK)) {
2355 /* stop at next valid page or end of block */
2356 for (; end < biosize; end+=PAGE_SIZE)
2357 if (NBPGVALID(bp,end/PAGE_SIZE))
2358 break;
2359 }
2360
2361 if (((boff+start) >= (off_t)np->n_size) ||
2362 ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
2363 /*
2364 * Either this entire read is beyond the current EOF
2365 * or the range that we won't be modifying (on+n...end)
2366 * is all beyond the current EOF.
2367 * No need to make a trip across the network to
2368 * read nothing. So, just zero the buffer instead.
2369 */
2370 FSDBG(516, bp, start, end - start, 0xd00dee00);
2371 bzero(bp->nb_data + start, end - start);
2372 error = 0;
2373 } else if (!ISSET(bp->nb_flags, NB_NOCACHE)) {
2374 /* now we'll read the (rest of the) data */
2375 uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
2376 uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
2377 error = nfs_read_rpc(np, auio, ctx);
2378 if (error) /* couldn't read the data, so treat buffer as NOCACHE */
2379 SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
2380 if (uio_resid(auio) > 0) {
2381 FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
2382 bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
2383 }
2384 }
2385 if (!error) {
2386 /* update validoff/validend if necessary */
2387 if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
2388 bp->nb_validoff = start;
2389 if ((bp->nb_validend < 0) || (bp->nb_validend < end))
2390 bp->nb_validend = end;
2391 if ((off_t)np->n_size > boff + bp->nb_validend)
2392 bp->nb_validend = min(np->n_size - (boff + start), biosize);
2393 /* validate any pages before the write offset's page */
2394 for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE)
2395 NBPGVALID_SET(bp, start/PAGE_SIZE);
2396 /* validate any pages after the range of pages being written to */
2397 for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE)
2398 NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
2399 }
2400 /* Note: pages being written to will be validated when written */
2401 }
2402 }
2403
2404 if (ISSET(bp->nb_flags, NB_ERROR)) {
2405 error = bp->nb_error;
2406 nfs_buf_release(bp, 1);
2407 goto out;
2408 }
2409
2410 nfs_node_lock_force(np);
2411 np->n_flag |= NMODIFIED;
2412 nfs_node_unlock(np);
2413
2414 NFS_BUF_MAP(bp);
2415 error = uiomove((char *)bp->nb_data + on, n, uio);
2416 if (error) {
2417 SET(bp->nb_flags, NB_ERROR);
2418 nfs_buf_release(bp, 1);
2419 goto out;
2420 }
2421
2422 /* validate any pages written to */
2423 start = on & ~PAGE_MASK;
2424 for (; start < on+n; start += PAGE_SIZE) {
2425 NBPGVALID_SET(bp, start/PAGE_SIZE);
2426 /*
2427 * This may seem a little weird, but we don't actually set the
2428 * dirty bits for writes. This is because we keep the dirty range
2429 * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
2430 * delayed writes, when we give the pages back to the VM we don't
2431 * want to keep them marked dirty, because when we later write the
2432 * buffer we won't be able to tell which pages were written dirty
2433 * and which pages were mmapped and dirtied.
2434 */
2435 }
2436 if (bp->nb_dirtyend > 0) {
2437 bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
2438 bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
2439 } else {
2440 bp->nb_dirtyoff = on;
2441 bp->nb_dirtyend = on + n;
2442 }
2443 if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
2444 bp->nb_validoff > bp->nb_dirtyend) {
2445 bp->nb_validoff = bp->nb_dirtyoff;
2446 bp->nb_validend = bp->nb_dirtyend;
2447 } else {
2448 bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
2449 bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
2450 }
2451 if (!ISSET(bp->nb_flags, NB_CACHE))
2452 nfs_buf_normalize_valid_range(np, bp);
2453
2454 /*
2455 * Since this block is being modified, it must be written
2456 * again and not just committed.
2457 */
2458 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
2459 nfs_node_lock_force(np);
2460 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
2461 np->n_needcommitcnt--;
2462 CHECK_NEEDCOMMITCNT(np);
2463 }
2464 CLR(bp->nb_flags, NB_NEEDCOMMIT);
2465 nfs_node_unlock(np);
2466 }
2467
2468 if (ioflag & IO_SYNC) {
2469 error = nfs_buf_write(bp);
2470 if (error)
2471 goto out;
2472 } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
2473 (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
2474 SET(bp->nb_flags, NB_ASYNC);
2475 error = nfs_buf_write(bp);
2476 if (error)
2477 goto out;
2478 } else {
2479 /* If the block wasn't already delayed: charge for the write */
2480 if (!ISSET(bp->nb_flags, NB_DELWRI)) {
2481 proc_t p = vfs_context_proc(ctx);
2482 if (p && p->p_stats)
2483 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
2484 }
2485 nfs_buf_write_delayed(bp);
2486 }
2487 if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS)
2488 nfs_flushcommits(np, 1);
2489
2490 } while (uio_resid(uio) > 0 && n > 0);
2491
2492out:
2493 nfs_node_lock_force(np);
2494 np->n_wrbusy--;
2495 nfs_node_unlock(np);
2496 nfs_data_unlock(np);
2497 FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
2498 return (error);
2499}
2500
2501
2502/*
2503 * NFS write call
2504 */
2505int
2506nfs_write_rpc(
2507 nfsnode_t np,
2508 uio_t uio,
2509 vfs_context_t ctx,
2510 int *iomodep,
2511 uint64_t *wverfp)
2512{
2513 return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
2514}
2515
2516int
2517nfs_write_rpc2(
2518 nfsnode_t np,
2519 uio_t uio,
2520 thread_t thd,
2521 kauth_cred_t cred,
2522 int *iomodep,
2523 uint64_t *wverfp)
2524{
2525 struct nfsmount *nmp;
2526 int error = 0, nfsvers;
2527 int backup, wverfset, commit, committed;
2528 uint64_t wverf = 0, wverf2;
2529 size_t nmwsize, totalsize, tsiz, len, rlen;
2530 struct nfsreq rq, *req = &rq;
2531 uint32_t stategenid = 0, vrestart = 0, restart = 0;
2532
2533#if DIAGNOSTIC
2534 /* XXX limitation based on need to back up uio on short write */
2535 if (uio_iovcnt(uio) != 1)
2536 panic("nfs3_write_rpc: iovcnt > 1");
2537#endif
2538 FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
2539 nmp = NFSTONMP(np);
2540 if (!nmp)
2541 return (ENXIO);
2542 nfsvers = nmp->nm_vers;
2543 nmwsize = nmp->nm_wsize;
2544
2545 wverfset = 0;
2546 committed = NFS_WRITE_FILESYNC;
2547
2548 totalsize = tsiz = uio_resid(uio);
2549 if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
2550 FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
2551 return (EFBIG);
2552 }
2553
2554 while (tsiz > 0) {
2555 len = (tsiz > nmwsize) ? nmwsize : tsiz;
2556 FSDBG(537, np, uio_offset(uio), len, 0);
2557 if (nmp->nm_vers >= NFS_VER4)
2558 stategenid = nmp->nm_stategenid;
2559 error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
2560 if (!error)
2561 error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
2562 nmp = NFSTONMP(np);
2563 if (!nmp)
2564 error = ENXIO;
2565 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
2566 (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
2567 lck_mtx_lock(&nmp->nm_lock);
2568 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
2569 printf("nfs_write_rpc: error %d, initiating recovery\n", error);
2570 nmp->nm_state |= NFSSTA_RECOVER;
2571 nfs_mount_sock_thread_wake(nmp);
2572 }
2573 lck_mtx_unlock(&nmp->nm_lock);
2574 if (error == NFSERR_GRACE)
2575 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
2576 if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
2577 continue;
2578 }
2579 if (error)
2580 break;
2581 if (nfsvers == NFS_VER2) {
2582 tsiz -= len;
2583 continue;
2584 }
2585
2586 /* check for a short write */
2587 if (rlen < len) {
2588 backup = len - rlen;
2589 uio_pushback(uio, backup);
2590 len = rlen;
2591 }
2592
2593 /* return lowest commit level returned */
2594 if (commit < committed)
2595 committed = commit;
2596
2597 tsiz -= len;
2598
2599 /* check write verifier */
2600 if (!wverfset) {
2601 wverf = wverf2;
2602 wverfset = 1;
2603 } else if (wverf != wverf2) {
2604 /* verifier changed, so we need to restart all the writes */
2605 if (++vrestart > 100) {
2606 /* give up after too many restarts */
2607 error = EIO;
2608 break;
2609 }
2610 backup = totalsize - tsiz;
2611 uio_pushback(uio, backup);
2612 committed = NFS_WRITE_FILESYNC;
2613 wverfset = 0;
2614 tsiz = totalsize;
2615 }
2616 }
2617 if (wverfset && wverfp)
2618 *wverfp = wverf;
2619 *iomodep = committed;
2620 if (error)
2621 uio_setresid(uio, tsiz);
2622 FSDBG_BOT(537, np, committed, uio_resid(uio), error);
2623 return (error);
2624}
2625
2626int
2627nfs3_write_rpc_async(
2628 nfsnode_t np,
2629 uio_t uio,
2630 size_t len,
2631 thread_t thd,
2632 kauth_cred_t cred,
2633 int iomode,
2634 struct nfsreq_cbinfo *cb,
2635 struct nfsreq **reqp)
2636{
2637 struct nfsmount *nmp;
2638 int error = 0, nfsvers;
2639 struct nfsm_chain nmreq;
2640
2641 nmp = NFSTONMP(np);
2642 if (!nmp)
2643 return (ENXIO);
2644 nfsvers = nmp->nm_vers;
2645
2646 nfsm_chain_null(&nmreq);
2647 nfsm_chain_build_alloc_init(error, &nmreq,
2648 NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
2649 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2650 if (nfsvers == NFS_VER3) {
2651 nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
2652 nfsm_chain_add_32(error, &nmreq, len);
2653 nfsm_chain_add_32(error, &nmreq, iomode);
2654 } else {
2655 nfsm_chain_add_32(error, &nmreq, 0);
2656 nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
2657 nfsm_chain_add_32(error, &nmreq, 0);
2658 }
2659 nfsm_chain_add_32(error, &nmreq, len);
2660 nfsmout_if(error);
2661 error = nfsm_chain_add_uio(&nmreq, uio, len);
2662 nfsm_chain_build_done(error, &nmreq);
2663 nfsmout_if(error);
2664 error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, cb, reqp);
2665nfsmout:
2666 nfsm_chain_cleanup(&nmreq);
2667 return (error);
2668}
2669
2670int
2671nfs3_write_rpc_async_finish(
2672 nfsnode_t np,
2673 struct nfsreq *req,
2674 int *iomodep,
2675 size_t *rlenp,
2676 uint64_t *wverfp)
2677{
2678 struct nfsmount *nmp;
2679 int error = 0, lockerror = ENOENT, nfsvers, status;
2680 int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
2681 u_int64_t xid, wverf;
2682 mount_t mp;
2683 struct nfsm_chain nmrep;
2684
2685 nmp = NFSTONMP(np);
2686 if (!nmp) {
2687 nfs_request_async_cancel(req);
2688 return (ENXIO);
2689 }
2690 nfsvers = nmp->nm_vers;
2691
2692 nfsm_chain_null(&nmrep);
2693
2694 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2695 if (error == EINPROGRESS) /* async request restarted */
2696 return (error);
2697 nmp = NFSTONMP(np);
2698 if (!nmp)
2699 error = ENXIO;
2700 if (!error && (lockerror = nfs_node_lock(np)))
2701 error = lockerror;
2702 if (nfsvers == NFS_VER3) {
2703 struct timespec premtime = { 0, 0 };
2704 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
2705 if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
2706 updatemtime = 1;
2707 if (!error)
2708 error = status;
2709 nfsm_chain_get_32(error, &nmrep, rlen);
2710 nfsmout_if(error);
2711 *rlenp = rlen;
2712 if (rlen <= 0)
2713 error = NFSERR_IO;
2714 nfsm_chain_get_32(error, &nmrep, committed);
2715 nfsm_chain_get_64(error, &nmrep, wverf);
2716 nfsmout_if(error);
2717 if (wverfp)
2718 *wverfp = wverf;
2719 lck_mtx_lock(&nmp->nm_lock);
2720 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
2721 nmp->nm_verf = wverf;
2722 nmp->nm_state |= NFSSTA_HASWRITEVERF;
2723 } else if (nmp->nm_verf != wverf) {
2724 nmp->nm_verf = wverf;
2725 }
2726 lck_mtx_unlock(&nmp->nm_lock);
2727 } else {
2728 if (!error)
2729 error = status;
2730 nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
2731 nfsmout_if(error);
2732 }
2733 if (updatemtime)
2734 NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
2735nfsmout:
2736 if (!lockerror)
2737 nfs_node_unlock(np);
2738 nfsm_chain_cleanup(&nmrep);
2739 if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
2740 ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
2741 committed = NFS_WRITE_FILESYNC;
2742 *iomodep = committed;
2743 return (error);
2744}
2745
2746/*
2747 * NFS mknod vnode op
2748 *
2749 * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
2750 * mode set to specify the file type and the size field for rdev.
2751 */
2752int
2753nfs3_vnop_mknod(
2754 struct vnop_mknod_args /* {
2755 struct vnodeop_desc *a_desc;
2756 vnode_t a_dvp;
2757 vnode_t *a_vpp;
2758 struct componentname *a_cnp;
2759 struct vnode_attr *a_vap;
2760 vfs_context_t a_context;
2761 } */ *ap)
2762{
2763 vnode_t dvp = ap->a_dvp;
2764 vnode_t *vpp = ap->a_vpp;
2765 struct componentname *cnp = ap->a_cnp;
2766 struct vnode_attr *vap = ap->a_vap;
2767 vfs_context_t ctx = ap->a_context;
2768 vnode_t newvp = NULL;
2769 nfsnode_t np = NULL;
2770 struct nfsmount *nmp;
2771 nfsnode_t dnp = VTONFS(dvp);
2772 struct nfs_vattr nvattr, dnvattr;
2773 fhandle_t fh;
2774 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
2775 struct timespec premtime = { 0, 0 };
2776 u_int32_t rdev;
2777 u_int64_t xid, dxid;
2778 int nfsvers, gotuid, gotgid;
2779 struct nfsm_chain nmreq, nmrep;
2780
2781 nmp = VTONMP(dvp);
2782 if (!nmp)
2783 return (ENXIO);
2784 nfsvers = nmp->nm_vers;
2785
2786 if (!VATTR_IS_ACTIVE(vap, va_type))
2787 return (EINVAL);
2788 if (vap->va_type == VCHR || vap->va_type == VBLK) {
2789 if (!VATTR_IS_ACTIVE(vap, va_rdev))
2790 return (EINVAL);
2791 rdev = vap->va_rdev;
2792 } else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
2793 rdev = 0xffffffff;
2794 else {
2795 return (ENOTSUP);
2796 }
2797 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
2798 return (ENAMETOOLONG);
2799
2800 VATTR_SET_SUPPORTED(vap, va_mode);
2801 VATTR_SET_SUPPORTED(vap, va_uid);
2802 VATTR_SET_SUPPORTED(vap, va_gid);
2803 VATTR_SET_SUPPORTED(vap, va_data_size);
2804 VATTR_SET_SUPPORTED(vap, va_access_time);
2805 VATTR_SET_SUPPORTED(vap, va_modify_time);
2806 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
2807 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
2808
2809 nfsm_chain_null(&nmreq);
2810 nfsm_chain_null(&nmrep);
2811
2812 nfsm_chain_build_alloc_init(error, &nmreq,
2813 NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
2814 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
2815 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
2816 nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
2817 if (nfsvers == NFS_VER3) {
2818 nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
2819 nfsm_chain_add_v3sattr(error, &nmreq, vap);
2820 if (vap->va_type == VCHR || vap->va_type == VBLK) {
2821 nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
2822 nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
2823 }
2824 } else {
2825 nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
2826 }
2827 nfsm_chain_build_done(error, &nmreq);
2828 if (!error)
2829 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
2830 nfsmout_if(error);
2831
2832 error = nfs_request(dnp, NULL, &nmreq, NFSPROC_MKNOD, ctx, &nmrep, &xid, &status);
2833
2834 if ((lockerror = nfs_node_lock(dnp)))
2835 error = lockerror;
2836 /* XXX no EEXIST kludge here? */
2837 dxid = xid;
2838 if (!error && !status) {
2839 if (dnp->n_flag & NNEGNCENTRIES) {
2840 dnp->n_flag &= ~NNEGNCENTRIES;
2841 cache_purge_negatives(dvp);
2842 }
2843 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
2844 }
2845 if (nfsvers == NFS_VER3)
2846 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
2847 if (!error)
2848 error = status;
2849nfsmout:
2850 nfsm_chain_cleanup(&nmreq);
2851 nfsm_chain_cleanup(&nmrep);
2852
2853 if (!lockerror) {
2854 dnp->n_flag |= NMODIFIED;
2855 /* if directory hadn't changed, update namecache mtime */
2856 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
2857 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
2858 nfs_node_unlock(dnp);
2859 /* nfs_getattr() will check changed and purge caches */
2860 nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
2861 }
2862
2863 if (!error && fh.fh_len)
2864 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
2865 if (!error && !np)
2866 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
2867 if (!error && np)
2868 newvp = NFSTOV(np);
2869 if (!busyerror)
2870 nfs_node_clear_busy(dnp);
2871
2872 if (!error && (gotuid || gotgid) &&
2873 (!newvp || nfs_getattrcache(np, &nvattr) ||
2874 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
2875 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
2876 /* clear ID bits if server didn't use them (or we can't tell) */
2877 VATTR_CLEAR_SUPPORTED(vap, va_uid);
2878 VATTR_CLEAR_SUPPORTED(vap, va_gid);
2879 }
2880 if (error) {
2881 if (newvp) {
2882 nfs_node_unlock(np);
2883 vnode_put(newvp);
2884 }
2885 } else {
2886 *vpp = newvp;
2887 nfs_node_unlock(np);
2888 }
2889 return (error);
2890}
2891
2892static uint32_t create_verf;
2893/*
2894 * NFS file create call
2895 */
2896int
2897nfs3_vnop_create(
2898 struct vnop_create_args /* {
2899 struct vnodeop_desc *a_desc;
2900 vnode_t a_dvp;
2901 vnode_t *a_vpp;
2902 struct componentname *a_cnp;
2903 struct vnode_attr *a_vap;
2904 vfs_context_t a_context;
2905 } */ *ap)
2906{
2907 vfs_context_t ctx = ap->a_context;
2908 vnode_t dvp = ap->a_dvp;
2909 struct vnode_attr *vap = ap->a_vap;
2910 struct componentname *cnp = ap->a_cnp;
2911 struct nfs_vattr nvattr, dnvattr;
2912 fhandle_t fh;
2913 nfsnode_t np = NULL;
2914 struct nfsmount *nmp;
2915 nfsnode_t dnp = VTONFS(dvp);
2916 vnode_t newvp = NULL;
2917 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0, fmode = 0;
2918 struct timespec premtime = { 0, 0 };
2919 int nfsvers, gotuid, gotgid;
2920 u_int64_t xid, dxid;
2921 uint32_t val;
2922 struct nfsm_chain nmreq, nmrep;
2923 struct nfsreq *req;
2924 struct nfs_dulookup dul;
2925
2926 nmp = VTONMP(dvp);
2927 if (!nmp)
2928 return (ENXIO);
2929 nfsvers = nmp->nm_vers;
2930
2931 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
2932 return (ENAMETOOLONG);
2933
2934 VATTR_SET_SUPPORTED(vap, va_mode);
2935 VATTR_SET_SUPPORTED(vap, va_uid);
2936 VATTR_SET_SUPPORTED(vap, va_gid);
2937 VATTR_SET_SUPPORTED(vap, va_data_size);
2938 VATTR_SET_SUPPORTED(vap, va_access_time);
2939 VATTR_SET_SUPPORTED(vap, va_modify_time);
2940 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
2941 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
2942
2943 if (vap->va_vaflags & VA_EXCLUSIVE)
2944 fmode |= O_EXCL;
2945
2946again:
2947 req = NULL;
2948 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
2949 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
2950
2951 nfsm_chain_null(&nmreq);
2952 nfsm_chain_null(&nmrep);
2953
2954 nfsm_chain_build_alloc_init(error, &nmreq,
2955 NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
2956 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
2957 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
2958 nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
2959 if (nfsvers == NFS_VER3) {
2960 if (fmode & O_EXCL) {
2961 nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
2962 lck_rw_lock_shared(in_ifaddr_rwlock);
2963 if (!TAILQ_EMPTY(&in_ifaddrhead))
2964 val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
2965 else
2966 val = create_verf;
2967 lck_rw_done(in_ifaddr_rwlock);
2968 nfsm_chain_add_32(error, &nmreq, val);
2969 ++create_verf;
2970 nfsm_chain_add_32(error, &nmreq, create_verf);
2971 } else {
2972 nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
2973 nfsm_chain_add_v3sattr(error, &nmreq, vap);
2974 }
2975 } else {
2976 nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
2977 }
2978 nfsm_chain_build_done(error, &nmreq);
2979 nfsmout_if(error);
2980
2981 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
2982 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
2983 if (!error) {
2984 nfs_dulookup_start(&dul, dnp, ctx);
2985 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2986 }
2987
2988 if ((lockerror = nfs_node_lock(dnp)))
2989 error = lockerror;
2990 dxid = xid;
2991 if (!error && !status) {
2992 if (dnp->n_flag & NNEGNCENTRIES) {
2993 dnp->n_flag &= ~NNEGNCENTRIES;
2994 cache_purge_negatives(dvp);
2995 }
2996 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
2997 }
2998 if (nfsvers == NFS_VER3)
2999 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3000 if (!error)
3001 error = status;
3002nfsmout:
3003 nfsm_chain_cleanup(&nmreq);
3004 nfsm_chain_cleanup(&nmrep);
3005
3006 if (!lockerror) {
3007 dnp->n_flag |= NMODIFIED;
3008 /* if directory hadn't changed, update namecache mtime */
3009 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3010 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3011 nfs_node_unlock(dnp);
3012 /* nfs_getattr() will check changed and purge caches */
3013 nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
3014 }
3015
3016 if (!error && fh.fh_len)
3017 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
3018 if (!error && !np)
3019 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3020 if (!error && np)
3021 newvp = NFSTOV(np);
3022
3023 nfs_dulookup_finish(&dul, dnp, ctx);
3024 if (!busyerror)
3025 nfs_node_clear_busy(dnp);
3026
3027 if (error) {
3028 if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
3029 fmode &= ~O_EXCL;
3030 goto again;
3031 }
3032 if (newvp) {
3033 nfs_node_unlock(np);
3034 vnode_put(newvp);
3035 }
3036 } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
3037 nfs_node_unlock(np);
3038 error = nfs3_setattr_rpc(np, vap, ctx);
3039 if (error && (gotuid || gotgid)) {
3040 /* it's possible the server didn't like our attempt to set IDs. */
3041 /* so, let's try it again without those */
3042 VATTR_CLEAR_ACTIVE(vap, va_uid);
3043 VATTR_CLEAR_ACTIVE(vap, va_gid);
3044 error = nfs3_setattr_rpc(np, vap, ctx);
3045 }
3046 if (error)
3047 vnode_put(newvp);
3048 else
3049 nfs_node_lock_force(np);
3050 }
3051 if (!error)
3052 *ap->a_vpp = newvp;
3053 if (!error && (gotuid || gotgid) &&
3054 (!newvp || nfs_getattrcache(np, &nvattr) ||
3055 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3056 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3057 /* clear ID bits if server didn't use them (or we can't tell) */
3058 VATTR_CLEAR_SUPPORTED(vap, va_uid);
3059 VATTR_CLEAR_SUPPORTED(vap, va_gid);
3060 }
3061 if (!error)
3062 nfs_node_unlock(np);
3063 return (error);
3064}
3065
3066/*
3067 * NFS file remove call
3068 * To try and make NFS semantics closer to UFS semantics, a file that has
3069 * other processes using the vnode is renamed instead of removed and then
3070 * removed later on the last close.
3071 * - If vnode_isinuse()
3072 * If a rename is not already in the works
3073 * call nfs_sillyrename() to set it up
3074 * else
3075 * do the remove RPC
3076 */
3077int
3078nfs_vnop_remove(
3079 struct vnop_remove_args /* {
3080 struct vnodeop_desc *a_desc;
3081 vnode_t a_dvp;
3082 vnode_t a_vp;
3083 struct componentname *a_cnp;
3084 int a_flags;
3085 vfs_context_t a_context;
3086 } */ *ap)
3087{
3088 vfs_context_t ctx = ap->a_context;
3089 vnode_t vp = ap->a_vp;
3090 vnode_t dvp = ap->a_dvp;
3091 struct componentname *cnp = ap->a_cnp;
3092 nfsnode_t dnp = VTONFS(dvp);
3093 nfsnode_t np = VTONFS(vp);
3094 int error = 0, nfsvers, inuse, gotattr = 0, flushed = 0, setsize = 0;
3095 struct nfs_vattr nvattr;
3096 struct nfsmount *nmp;
3097 struct nfs_dulookup dul;
3098
3099 /* XXX prevent removing a sillyrenamed file? */
3100
3101 nmp = NFSTONMP(dnp);
3102 if (!nmp)
3103 return (ENXIO);
3104 nfsvers = nmp->nm_vers;
3105
3106again_relock:
3107 error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
3108 if (error)
3109 return (error);
3110
3111 /* lock the node while we remove the file */
3112 lck_mtx_lock(nfs_node_hash_mutex);
3113 while (np->n_hflag & NHLOCKED) {
3114 np->n_hflag |= NHLOCKWANT;
3115 msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
3116 }
3117 np->n_hflag |= NHLOCKED;
3118 lck_mtx_unlock(nfs_node_hash_mutex);
3119
3120 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
3121again:
3122 inuse = vnode_isinuse(vp, 0);
3123 if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
3124 /* Caller requested Carbon delete semantics, but file is busy */
3125 error = EBUSY;
3126 goto out;
3127 }
3128 if (inuse && !gotattr) {
3129 if (nfs_getattr(np, &nvattr, ctx, NGA_CACHED))
3130 nvattr.nva_nlink = 1;
3131 gotattr = 1;
3132 goto again;
3133 }
3134 if (!inuse || (np->n_sillyrename && (nvattr.nva_nlink > 1))) {
3135
3136 if (!inuse && !flushed) { /* flush all the buffers first */
3137 /* unlock the node */
3138 lck_mtx_lock(nfs_node_hash_mutex);
3139 np->n_hflag &= ~NHLOCKED;
3140 if (np->n_hflag & NHLOCKWANT) {
3141 np->n_hflag &= ~NHLOCKWANT;
3142 wakeup(np);
3143 }
3144 lck_mtx_unlock(nfs_node_hash_mutex);
3145 nfs_node_clear_busy2(dnp, np);
3146 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
3147 FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
3148 flushed = 1;
3149 if (error == EINTR) {
3150 nfs_node_lock_force(np);
3151 NATTRINVALIDATE(np);
3152 nfs_node_unlock(np);
3153 return (error);
3154 }
3155 goto again_relock;
3156 }
3157
3158 if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
3159 lck_mtx_lock(&np->n_openlock);
3160 np->n_openflags &= ~N_DELEG_MASK;
3161 lck_mtx_unlock(&np->n_openlock);
3162 nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid,
3163 vfs_context_thread(ctx), vfs_context_ucred(ctx));
3164 }
3165
3166 /*
3167 * Purge the name cache so that the chance of a lookup for
3168 * the name succeeding while the remove is in progress is
3169 * minimized.
3170 */
3171 nfs_name_cache_purge(dnp, np, cnp, ctx);
3172
3173 nfs_dulookup_start(&dul, dnp, ctx);
3174
3175 /* Do the rpc */
3176 error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
3177 vfs_context_thread(ctx), vfs_context_ucred(ctx));
3178
3179 /*
3180 * Kludge City: If the first reply to the remove rpc is lost..
3181 * the reply to the retransmitted request will be ENOENT
3182 * since the file was in fact removed
3183 * Therefore, we cheat and return success.
3184 */
3185 if (error == ENOENT)
3186 error = 0;
3187
3188 if (!error && !inuse && !np->n_sillyrename) {
3189 /*
3190 * removal succeeded, it's not in use, and not silly renamed so
3191 * remove nfsnode from hash now so we can't accidentally find it
3192 * again if another object gets created with the same filehandle
3193 * before this vnode gets reclaimed
3194 */
3195 lck_mtx_lock(nfs_node_hash_mutex);
3196 if (np->n_hflag & NHHASHED) {
3197 LIST_REMOVE(np, n_hash);
3198 np->n_hflag &= ~NHHASHED;
3199 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
3200 }
3201 lck_mtx_unlock(nfs_node_hash_mutex);
3202 /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
3203 /* clear all flags other than these */
3204 nfs_node_lock_force(np);
3205 np->n_flag &= (NMODIFIED);
3206 NATTRINVALIDATE(np);
3207 nfs_node_unlock(np);
3208 vnode_recycle(vp);
3209 setsize = 1;
3210 } else {
3211 nfs_node_lock_force(np);
3212 NATTRINVALIDATE(np);
3213 nfs_node_unlock(np);
3214 }
3215 } else if (!np->n_sillyrename) {
3216 nfs_dulookup_start(&dul, dnp, ctx);
3217 error = nfs_sillyrename(dnp, np, cnp, ctx);
3218 nfs_node_lock_force(np);
3219 NATTRINVALIDATE(np);
3220 nfs_node_unlock(np);
3221 } else {
3222 nfs_node_lock_force(np);
3223 NATTRINVALIDATE(np);
3224 nfs_node_unlock(np);
3225 nfs_dulookup_start(&dul, dnp, ctx);
3226 }
3227
3228 /* nfs_getattr() will check changed and purge caches */
3229 nfs_getattr(dnp, &nvattr, ctx, NGA_CACHED);
3230 nfs_dulookup_finish(&dul, dnp, ctx);
3231out:
3232 /* unlock the node */
3233 lck_mtx_lock(nfs_node_hash_mutex);
3234 np->n_hflag &= ~NHLOCKED;
3235 if (np->n_hflag & NHLOCKWANT) {
3236 np->n_hflag &= ~NHLOCKWANT;
3237 wakeup(np);
3238 }
3239 lck_mtx_unlock(nfs_node_hash_mutex);
3240 nfs_node_clear_busy2(dnp, np);
3241 if (setsize)
3242 ubc_setsize(vp, 0);
3243 return (error);
3244}
3245
3246/*
3247 * NFS silly-renamed file removal function called from nfs_vnop_inactive
3248 */
3249int
3250nfs_removeit(struct nfs_sillyrename *nsp)
3251{
3252 struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp);
3253 if (!nmp)
3254 return (ENXIO);
3255 return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred);
3256}
3257
3258/*
3259 * NFS remove rpc, called from nfs_remove() and nfs_removeit().
3260 */
3261int
3262nfs3_remove_rpc(
3263 nfsnode_t dnp,
3264 char *name,
3265 int namelen,
3266 thread_t thd,
3267 kauth_cred_t cred)
3268{
3269 int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
3270 struct timespec premtime = { 0, 0 };
3271 struct nfsmount *nmp;
3272 int nfsvers;
3273 u_int64_t xid;
3274 struct nfsm_chain nmreq, nmrep;
3275
3276 nmp = NFSTONMP(dnp);
3277 if (!nmp)
3278 return (ENXIO);
3279 nfsvers = nmp->nm_vers;
3280 if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN))
3281 return (ENAMETOOLONG);
3282
3283 nfsm_chain_null(&nmreq);
3284 nfsm_chain_null(&nmrep);
3285
3286 nfsm_chain_build_alloc_init(error, &nmreq,
3287 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
3288 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3289 nfsm_chain_add_string(error, &nmreq, name, namelen);
3290 nfsm_chain_build_done(error, &nmreq);
3291 nfsmout_if(error);
3292
3293 error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, 0, &nmrep, &xid, &status);
3294
3295 if ((lockerror = nfs_node_lock(dnp)))
3296 error = lockerror;
3297 if (nfsvers == NFS_VER3)
3298 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
3299 nfsmout_if(error);
3300 dnp->n_flag |= NMODIFIED;
3301 /* if directory hadn't changed, update namecache mtime */
3302 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3303 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3304 if (!wccpostattr)
3305 NATTRINVALIDATE(dnp);
3306 if (!error)
3307 error = status;
3308nfsmout:
3309 if (!lockerror)
3310 nfs_node_unlock(dnp);
3311 nfsm_chain_cleanup(&nmreq);
3312 nfsm_chain_cleanup(&nmrep);
3313 return (error);
3314}
3315
3316/*
3317 * NFS file rename call
3318 */
3319int
3320nfs_vnop_rename(
3321 struct vnop_rename_args /* {
3322 struct vnodeop_desc *a_desc;
3323 vnode_t a_fdvp;
3324 vnode_t a_fvp;
3325 struct componentname *a_fcnp;
3326 vnode_t a_tdvp;
3327 vnode_t a_tvp;
3328 struct componentname *a_tcnp;
3329 vfs_context_t a_context;
3330 } */ *ap)
3331{
3332 vfs_context_t ctx = ap->a_context;
3333 vnode_t fdvp = ap->a_fdvp;
3334 vnode_t fvp = ap->a_fvp;
3335 vnode_t tdvp = ap->a_tdvp;
3336 vnode_t tvp = ap->a_tvp;
3337 nfsnode_t fdnp, fnp, tdnp, tnp;
3338 struct componentname *tcnp = ap->a_tcnp;
3339 struct componentname *fcnp = ap->a_fcnp;
3340 int error, nfsvers, inuse=0, tvprecycle=0, locked=0;
3341 mount_t fmp, tdmp, tmp;
3342 struct nfs_vattr nvattr;
3343 struct nfsmount *nmp;
3344
3345 fdnp = VTONFS(fdvp);
3346 fnp = VTONFS(fvp);
3347 tdnp = VTONFS(tdvp);
3348 tnp = tvp ? VTONFS(tvp) : NULL;
3349
3350 nmp = NFSTONMP(fdnp);
3351 if (!nmp)
3352 return (ENXIO);
3353 nfsvers = nmp->nm_vers;
3354
3355 error = nfs_node_set_busy4(fdnp, fnp, tdnp, tnp, vfs_context_thread(ctx));
3356 if (error)
3357 return (error);
3358
3359 if (tvp && (tvp != fvp)) {
3360 /* lock the node while we rename over the existing file */
3361 lck_mtx_lock(nfs_node_hash_mutex);
3362 while (tnp->n_hflag & NHLOCKED) {
3363 tnp->n_hflag |= NHLOCKWANT;
3364 msleep(tnp, nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
3365 }
3366 tnp->n_hflag |= NHLOCKED;
3367 lck_mtx_unlock(nfs_node_hash_mutex);
3368 locked = 1;
3369 }
3370
3371 /* Check for cross-device rename */
3372 fmp = vnode_mount(fvp);
3373 tmp = tvp ? vnode_mount(tvp) : NULL;
3374 tdmp = vnode_mount(tdvp);
3375 if ((fmp != tdmp) || (tvp && (fmp != tmp))) {
3376 error = EXDEV;
3377 goto out;
3378 }
3379
3380 /* XXX prevent renaming from/over a sillyrenamed file? */
3381
3382 /*
3383 * If the tvp exists and is in use, sillyrename it before doing the
3384 * rename of the new file over it.
3385 * XXX Can't sillyrename a directory.
3386 * Don't sillyrename if source and target are same vnode (hard
3387 * links or case-variants)
3388 */
3389 if (tvp && (tvp != fvp))
3390 inuse = vnode_isinuse(tvp, 0);
3391 if (inuse && !tnp->n_sillyrename && (vnode_vtype(tvp) != VDIR)) {
3392 error = nfs_sillyrename(tdnp, tnp, tcnp, ctx);
3393 if (error) {
3394 /* sillyrename failed. Instead of pressing on, return error */
3395 goto out; /* should not be ENOENT. */
3396 } else {
3397 /* sillyrename succeeded.*/
3398 tvp = NULL;
3399 }
3400 } else if (tvp && (nmp->nm_vers >= NFS_VER4) && (tnp->n_openflags & N_DELEG_MASK)) {
3401 lck_mtx_lock(&tnp->n_openlock);
3402 tnp->n_openflags &= ~N_DELEG_MASK;
3403 lck_mtx_unlock(&tnp->n_openlock);
3404 nfs4_delegreturn_rpc(nmp, tnp->n_fhp, tnp->n_fhsize, &tnp->n_dstateid,
3405 vfs_context_thread(ctx), vfs_context_ucred(ctx));
3406 }
3407
3408 error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen,
3409 tdnp, tcnp->cn_nameptr, tcnp->cn_namelen, ctx);
3410
3411 /*
3412 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
3413 */
3414 if (error == ENOENT)
3415 error = 0;
3416
3417 if (tvp && (tvp != fvp) && !tnp->n_sillyrename) {
3418 nfs_node_lock_force(tnp);
3419 tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
3420 (nfs_getattrcache(tnp, &nvattr) || (nvattr.nva_nlink == 1)));
3421 nfs_node_unlock(tnp);
3422 lck_mtx_lock(nfs_node_hash_mutex);
3423 if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
3424 /*
3425 * remove nfsnode from hash now so we can't accidentally find it
3426 * again if another object gets created with the same filehandle
3427 * before this vnode gets reclaimed
3428 */
3429 LIST_REMOVE(tnp, n_hash);
3430 tnp->n_hflag &= ~NHHASHED;
3431 FSDBG(266, 0, tnp, tnp->n_flag, 0xb1eb1e);
3432 }
3433 lck_mtx_unlock(nfs_node_hash_mutex);
3434 }
3435
3436 /* purge the old name cache entries and enter the new one */
3437 nfs_name_cache_purge(fdnp, fnp, fcnp, ctx);
3438 if (tvp) {
3439 nfs_name_cache_purge(tdnp, tnp, tcnp, ctx);
3440 if (tvprecycle) {
3441 /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
3442 /* clear all flags other than these */
3443 nfs_node_lock_force(tnp);
3444 tnp->n_flag &= (NMODIFIED);
3445 nfs_node_unlock(tnp);
3446 vnode_recycle(tvp);
3447 }
3448 }
3449 if (!error) {
3450 nfs_node_lock_force(tdnp);
3451 if (tdnp->n_flag & NNEGNCENTRIES) {
3452 tdnp->n_flag &= ~NNEGNCENTRIES;
3453 cache_purge_negatives(tdvp);
3454 }
3455 nfs_node_unlock(tdnp);
3456 nfs_node_lock_force(fnp);
3457 cache_enter(tdvp, fvp, tcnp);
3458 if (tdvp != fdvp) { /* update parent pointer */
3459 if (fnp->n_parent && !vnode_get(fnp->n_parent)) {
3460 /* remove ref from old parent */
3461 vnode_rele(fnp->n_parent);
3462 vnode_put(fnp->n_parent);
3463 }
3464 fnp->n_parent = tdvp;
3465 if (tdvp && !vnode_get(tdvp)) {
3466 /* add ref to new parent */
3467 vnode_ref(tdvp);
3468 vnode_put(tdvp);
3469 } else {
3470 fnp->n_parent = NULL;
3471 }
3472 }
3473 nfs_node_unlock(fnp);
3474 }
3475out:
3476 /* nfs_getattr() will check changed and purge caches */
3477 nfs_getattr(fdnp, &nvattr, ctx, NGA_CACHED);
3478 nfs_getattr(tdnp, &nvattr, ctx, NGA_CACHED);
3479 if (locked) {
3480 /* unlock node */
3481 lck_mtx_lock(nfs_node_hash_mutex);
3482 tnp->n_hflag &= ~NHLOCKED;
3483 if (tnp->n_hflag & NHLOCKWANT) {
3484 tnp->n_hflag &= ~NHLOCKWANT;
3485 wakeup(tnp);
3486 }
3487 lck_mtx_unlock(nfs_node_hash_mutex);
3488 }
3489 nfs_node_clear_busy4(fdnp, fnp, tdnp, tnp);
3490 return (error);
3491}
3492
3493/*
3494 * Do an NFS rename rpc. Called from nfs_vnop_rename() and nfs_sillyrename().
3495 */
3496int
3497nfs3_rename_rpc(
3498 nfsnode_t fdnp,
3499 char *fnameptr,
3500 int fnamelen,
3501 nfsnode_t tdnp,
3502 char *tnameptr,
3503 int tnamelen,
3504 vfs_context_t ctx)
3505{
3506 int error = 0, lockerror = ENOENT, status, fwccpostattr = 0, twccpostattr = 0;
3507 struct timespec fpremtime = { 0, 0 }, tpremtime = { 0, 0 };
3508 struct nfsmount *nmp;
3509 int nfsvers;
3510 u_int64_t xid, txid;
3511 struct nfsm_chain nmreq, nmrep;
3512
3513 nmp = NFSTONMP(fdnp);
3514 if (!nmp)
3515 return (ENXIO);
3516 nfsvers = nmp->nm_vers;
3517 if ((nfsvers == NFS_VER2) &&
3518 ((fnamelen > NFS_MAXNAMLEN) || (tnamelen > NFS_MAXNAMLEN)))
3519 return (ENAMETOOLONG);
3520
3521 nfsm_chain_null(&nmreq);
3522 nfsm_chain_null(&nmrep);
3523
3524 nfsm_chain_build_alloc_init(error, &nmreq,
3525 (NFSX_FH(nfsvers) + NFSX_UNSIGNED) * 2 +
3526 nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
3527 nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
3528 nfsm_chain_add_string(error, &nmreq, fnameptr, fnamelen);
3529 nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
3530 nfsm_chain_add_string(error, &nmreq, tnameptr, tnamelen);
3531 nfsm_chain_build_done(error, &nmreq);
3532 nfsmout_if(error);
3533
3534 error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, &nmrep, &xid, &status);
3535
3536 if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
3537 error = lockerror;
3538 if (nfsvers == NFS_VER3) {
3539 txid = xid;
3540 nfsm_chain_get_wcc_data(error, &nmrep, fdnp, &fpremtime, &fwccpostattr, &xid);
3541 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &tpremtime, &twccpostattr, &txid);
3542 }
3543 if (!error)
3544 error = status;
3545nfsmout:
3546 nfsm_chain_cleanup(&nmreq);
3547 nfsm_chain_cleanup(&nmrep);
3548 if (!lockerror) {
3549 fdnp->n_flag |= NMODIFIED;
3550 /* if directory hadn't changed, update namecache mtime */
3551 if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==))
3552 NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr);
3553 if (!fwccpostattr)
3554 NATTRINVALIDATE(fdnp);
3555 tdnp->n_flag |= NMODIFIED;
3556 /* if directory hadn't changed, update namecache mtime */
3557 if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==))
3558 NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
3559 if (!twccpostattr)
3560 NATTRINVALIDATE(tdnp);
3561 nfs_node_unlock2(fdnp, tdnp);
3562 }
3563 return (error);
3564}
3565
3566/*
3567 * NFS hard link create call
3568 */
3569int
3570nfs3_vnop_link(
3571 struct vnop_link_args /* {
3572 struct vnodeop_desc *a_desc;
3573 vnode_t a_vp;
3574 vnode_t a_tdvp;
3575 struct componentname *a_cnp;
3576 vfs_context_t a_context;
3577 } */ *ap)
3578{
3579 vfs_context_t ctx = ap->a_context;
3580 vnode_t vp = ap->a_vp;
3581 vnode_t tdvp = ap->a_tdvp;
3582 struct componentname *cnp = ap->a_cnp;
3583 int error = 0, lockerror = ENOENT, status, wccpostattr = 0, attrflag = 0;
3584 struct timespec premtime = { 0, 0 };
3585 struct nfsmount *nmp;
3586 nfsnode_t np = VTONFS(vp);
3587 nfsnode_t tdnp = VTONFS(tdvp);
3588 int nfsvers;
3589 u_int64_t xid, txid;
3590 struct nfsm_chain nmreq, nmrep;
3591
3592 if (vnode_mount(vp) != vnode_mount(tdvp))
3593 return (EXDEV);
3594
3595 nmp = VTONMP(vp);
3596 if (!nmp)
3597 return (ENXIO);
3598 nfsvers = nmp->nm_vers;
3599 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3600 return (ENAMETOOLONG);
3601
3602 /*
3603 * Push all writes to the server, so that the attribute cache
3604 * doesn't get "out of sync" with the server.
3605 * XXX There should be a better way!
3606 */
3607 nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
3608
3609 error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx));
3610 if (error)
3611 return (error);
3612
3613 nfsm_chain_null(&nmreq);
3614 nfsm_chain_null(&nmrep);
3615
3616 nfsm_chain_build_alloc_init(error, &nmreq,
3617 NFSX_FH(nfsvers)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
3618 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
3619 nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
3620 nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3621 nfsm_chain_build_done(error, &nmreq);
3622 nfsmout_if(error);
3623 error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx,
3624 &nmrep, &xid, &status);
3625
3626 if ((lockerror = nfs_node_lock2(tdnp, np))) {
3627 error = lockerror;
3628 goto nfsmout;
3629 }
3630 if (nfsvers == NFS_VER3) {
3631 txid = xid;
3632 nfsm_chain_postop_attr_update_flag(error, &nmrep, np, attrflag, &xid);
3633 nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &premtime, &wccpostattr, &txid);
3634 }
3635 if (!error)
3636 error = status;
3637nfsmout:
3638 nfsm_chain_cleanup(&nmreq);
3639 nfsm_chain_cleanup(&nmrep);
3640 if (!lockerror) {
3641 if (!attrflag)
3642 NATTRINVALIDATE(np);
3643 tdnp->n_flag |= NMODIFIED;
3644 /* if directory hadn't changed, update namecache mtime */
3645 if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==))
3646 NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
3647 if (!wccpostattr)
3648 NATTRINVALIDATE(tdnp);
3649 if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
3650 tdnp->n_flag &= ~NNEGNCENTRIES;
3651 cache_purge_negatives(tdvp);
3652 }
3653 nfs_node_unlock2(tdnp, np);
3654 }
3655 nfs_node_clear_busy2(tdnp, np);
3656 /*
3657 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3658 */
3659 if (error == EEXIST)
3660 error = 0;
3661 return (error);
3662}
3663
3664/*
3665 * NFS symbolic link create call
3666 */
3667int
3668nfs3_vnop_symlink(
3669 struct vnop_symlink_args /* {
3670 struct vnodeop_desc *a_desc;
3671 vnode_t a_dvp;
3672 vnode_t *a_vpp;
3673 struct componentname *a_cnp;
3674 struct vnode_attr *a_vap;
3675 char *a_target;
3676 vfs_context_t a_context;
3677 } */ *ap)
3678{
3679 vfs_context_t ctx = ap->a_context;
3680 vnode_t dvp = ap->a_dvp;
3681 struct vnode_attr *vap = ap->a_vap;
3682 struct componentname *cnp = ap->a_cnp;
3683 struct nfs_vattr nvattr, dnvattr;
3684 fhandle_t fh;
3685 int slen, error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
3686 struct timespec premtime = { 0, 0 };
3687 vnode_t newvp = NULL;
3688 int nfsvers, gotuid, gotgid;
3689 u_int64_t xid, dxid;
3690 nfsnode_t np = NULL;
3691 nfsnode_t dnp = VTONFS(dvp);
3692 struct nfsmount *nmp;
3693 struct nfsm_chain nmreq, nmrep;
3694 struct nfsreq *req = NULL;
3695 struct nfs_dulookup dul;
3696
3697 nmp = VTONMP(dvp);
3698 if (!nmp)
3699 return (ENXIO);
3700 nfsvers = nmp->nm_vers;
3701
3702 slen = strlen(ap->a_target);
3703 if ((nfsvers == NFS_VER2) &&
3704 ((cnp->cn_namelen > NFS_MAXNAMLEN) || (slen > NFS_MAXPATHLEN)))
3705 return (ENAMETOOLONG);
3706
3707 VATTR_SET_SUPPORTED(vap, va_mode);
3708 VATTR_SET_SUPPORTED(vap, va_uid);
3709 VATTR_SET_SUPPORTED(vap, va_gid);
3710 VATTR_SET_SUPPORTED(vap, va_data_size);
3711 VATTR_SET_SUPPORTED(vap, va_access_time);
3712 VATTR_SET_SUPPORTED(vap, va_modify_time);
3713 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3714 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3715
3716 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
3717 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
3718
3719 nfsm_chain_null(&nmreq);
3720 nfsm_chain_null(&nmrep);
3721
3722 nfsm_chain_build_alloc_init(error, &nmreq,
3723 NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
3724 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(nfsvers));
3725 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3726 nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3727 if (nfsvers == NFS_VER3)
3728 nfsm_chain_add_v3sattr(error, &nmreq, vap);
3729 nfsm_chain_add_string(error, &nmreq, ap->a_target, slen);
3730 if (nfsvers == NFS_VER2)
3731 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
3732 nfsm_chain_build_done(error, &nmreq);
3733 nfsmout_if(error);
3734
3735 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK,
3736 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
3737 if (!error) {
3738 nfs_dulookup_start(&dul, dnp, ctx);
3739 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3740 }
3741
3742 if ((lockerror = nfs_node_lock(dnp)))
3743 error = lockerror;
3744 dxid = xid;
3745 if (!error && !status) {
3746 if (dnp->n_flag & NNEGNCENTRIES) {
3747 dnp->n_flag &= ~NNEGNCENTRIES;
3748 cache_purge_negatives(dvp);
3749 }
3750 if (nfsvers == NFS_VER3)
3751 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
3752 else
3753 fh.fh_len = 0;
3754 }
3755 if (nfsvers == NFS_VER3)
3756 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3757 if (!error)
3758 error = status;
3759nfsmout:
3760 nfsm_chain_cleanup(&nmreq);
3761 nfsm_chain_cleanup(&nmrep);
3762
3763 if (!lockerror) {
3764 dnp->n_flag |= NMODIFIED;
3765 /* if directory hadn't changed, update namecache mtime */
3766 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3767 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3768 nfs_node_unlock(dnp);
3769 /* nfs_getattr() will check changed and purge caches */
3770 nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
3771 }
3772
3773 if (!error && fh.fh_len)
3774 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
3775 if (!error && np)
3776 newvp = NFSTOV(np);
3777
3778 nfs_dulookup_finish(&dul, dnp, ctx);
3779
3780 /*
3781 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
3782 * if we can succeed in looking up the symlink.
3783 */
3784 if ((error == EEXIST) || (!error && !newvp)) {
3785 if (newvp) {
3786 nfs_node_unlock(np);
3787 vnode_put(newvp);
3788 newvp = NULL;
3789 }
3790 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3791 if (!error) {
3792 newvp = NFSTOV(np);
3793 if (vnode_vtype(newvp) != VLNK)
3794 error = EEXIST;
3795 }
3796 }
3797 if (!busyerror)
3798 nfs_node_clear_busy(dnp);
3799 if (!error && (gotuid || gotgid) &&
3800 (!newvp || nfs_getattrcache(np, &nvattr) ||
3801 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3802 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3803 /* clear ID bits if server didn't use them (or we can't tell) */
3804 VATTR_CLEAR_SUPPORTED(vap, va_uid);
3805 VATTR_CLEAR_SUPPORTED(vap, va_gid);
3806 }
3807 if (error) {
3808 if (newvp) {
3809 nfs_node_unlock(np);
3810 vnode_put(newvp);
3811 }
3812 } else {
3813 nfs_node_unlock(np);
3814 *ap->a_vpp = newvp;
3815 }
3816 return (error);
3817}
3818
3819/*
3820 * NFS make dir call
3821 */
3822int
3823nfs3_vnop_mkdir(
3824 struct vnop_mkdir_args /* {
3825 struct vnodeop_desc *a_desc;
3826 vnode_t a_dvp;
3827 vnode_t *a_vpp;
3828 struct componentname *a_cnp;
3829 struct vnode_attr *a_vap;
3830 vfs_context_t a_context;
3831 } */ *ap)
3832{
3833 vfs_context_t ctx = ap->a_context;
3834 vnode_t dvp = ap->a_dvp;
3835 struct vnode_attr *vap = ap->a_vap;
3836 struct componentname *cnp = ap->a_cnp;
3837 struct nfs_vattr nvattr, dnvattr;
3838 nfsnode_t np = NULL;
3839 struct nfsmount *nmp;
3840 nfsnode_t dnp = VTONFS(dvp);
3841 vnode_t newvp = NULL;
3842 int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
3843 struct timespec premtime = { 0, 0 };
3844 int nfsvers, gotuid, gotgid;
3845 u_int64_t xid, dxid;
3846 fhandle_t fh;
3847 struct nfsm_chain nmreq, nmrep;
3848 struct nfsreq *req = NULL;
3849 struct nfs_dulookup dul;
3850
3851 nmp = VTONMP(dvp);
3852 if (!nmp)
3853 return (ENXIO);
3854 nfsvers = nmp->nm_vers;
3855 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3856 return (ENAMETOOLONG);
3857
3858 VATTR_SET_SUPPORTED(vap, va_mode);
3859 VATTR_SET_SUPPORTED(vap, va_uid);
3860 VATTR_SET_SUPPORTED(vap, va_gid);
3861 VATTR_SET_SUPPORTED(vap, va_data_size);
3862 VATTR_SET_SUPPORTED(vap, va_access_time);
3863 VATTR_SET_SUPPORTED(vap, va_modify_time);
3864 gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3865 gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3866
3867 error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
3868 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
3869
3870 nfsm_chain_null(&nmreq);
3871 nfsm_chain_null(&nmrep);
3872
3873 nfsm_chain_build_alloc_init(error, &nmreq,
3874 NFSX_FH(nfsvers) + NFSX_UNSIGNED +
3875 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
3876 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3877 nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3878 if (nfsvers == NFS_VER3)
3879 nfsm_chain_add_v3sattr(error, &nmreq, vap);
3880 else
3881 nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
3882 nfsm_chain_build_done(error, &nmreq);
3883 nfsmout_if(error);
3884
3885 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR,
3886 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
3887 if (!error) {
3888 nfs_dulookup_start(&dul, dnp, ctx);
3889 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3890 }
3891
3892 if ((lockerror = nfs_node_lock(dnp)))
3893 error = lockerror;
3894 dxid = xid;
3895 if (!error && !status) {
3896 if (dnp->n_flag & NNEGNCENTRIES) {
3897 dnp->n_flag &= ~NNEGNCENTRIES;
3898 cache_purge_negatives(dvp);
3899 }
3900 error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
3901 }
3902 if (nfsvers == NFS_VER3)
3903 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3904 if (!error)
3905 error = status;
3906nfsmout:
3907 nfsm_chain_cleanup(&nmreq);
3908 nfsm_chain_cleanup(&nmrep);
3909
3910 if (!lockerror) {
3911 dnp->n_flag |= NMODIFIED;
3912 /* if directory hadn't changed, update namecache mtime */
3913 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3914 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3915 nfs_node_unlock(dnp);
3916 /* nfs_getattr() will check changed and purge caches */
3917 nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
3918 }
3919
3920 if (!error && fh.fh_len)
3921 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
3922 if (!error && np)
3923 newvp = NFSTOV(np);
3924
3925 nfs_dulookup_finish(&dul, dnp, ctx);
3926
3927 /*
3928 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
3929 * if we can succeed in looking up the directory.
3930 */
3931 if ((error == EEXIST) || (!error && !newvp)) {
3932 if (newvp) {
3933 nfs_node_unlock(np);
3934 vnode_put(newvp);
3935 newvp = NULL;
3936 }
3937 error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3938 if (!error) {
3939 newvp = NFSTOV(np);
3940 if (vnode_vtype(newvp) != VDIR)
3941 error = EEXIST;
3942 }
3943 }
3944 if (!busyerror)
3945 nfs_node_clear_busy(dnp);
3946 if (!error && (gotuid || gotgid) &&
3947 (!newvp || nfs_getattrcache(np, &nvattr) ||
3948 (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3949 (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3950 /* clear ID bits if server didn't use them (or we can't tell) */
3951 VATTR_CLEAR_SUPPORTED(vap, va_uid);
3952 VATTR_CLEAR_SUPPORTED(vap, va_gid);
3953 }
3954 if (error) {
3955 if (newvp) {
3956 nfs_node_unlock(np);
3957 vnode_put(newvp);
3958 }
3959 } else {
3960 nfs_node_unlock(np);
3961 *ap->a_vpp = newvp;
3962 }
3963 return (error);
3964}
3965
3966/*
3967 * NFS remove directory call
3968 */
3969int
3970nfs3_vnop_rmdir(
3971 struct vnop_rmdir_args /* {
3972 struct vnodeop_desc *a_desc;
3973 vnode_t a_dvp;
3974 vnode_t a_vp;
3975 struct componentname *a_cnp;
3976 vfs_context_t a_context;
3977 } */ *ap)
3978{
3979 vfs_context_t ctx = ap->a_context;
3980 vnode_t vp = ap->a_vp;
3981 vnode_t dvp = ap->a_dvp;
3982 struct componentname *cnp = ap->a_cnp;
3983 int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
3984 struct timespec premtime = { 0, 0 };
3985 struct nfsmount *nmp;
3986 nfsnode_t np = VTONFS(vp);
3987 nfsnode_t dnp = VTONFS(dvp);
3988 struct nfs_vattr dnvattr;
3989 int nfsvers;
3990 u_int64_t xid;
3991 struct nfsm_chain nmreq, nmrep;
3992 struct nfsreq *req = NULL;
3993 struct nfs_dulookup dul;
3994
3995 nmp = VTONMP(vp);
3996 if (!nmp)
3997 return (ENXIO);
3998 nfsvers = nmp->nm_vers;
3999 if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
4000 return (ENAMETOOLONG);
4001
4002 if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
4003 return (error);
4004
4005 nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4006
4007 nfsm_chain_null(&nmreq);
4008 nfsm_chain_null(&nmrep);
4009
4010 nfsm_chain_build_alloc_init(error, &nmreq,
4011 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
4012 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4013 nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
4014 nfsm_chain_build_done(error, &nmreq);
4015 nfsmout_if(error);
4016
4017 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_RMDIR,
4018 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
4019 if (!error) {
4020 nfs_dulookup_start(&dul, dnp, ctx);
4021 error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4022 }
4023
4024 if ((lockerror = nfs_node_lock(dnp)))
4025 error = lockerror;
4026 if (nfsvers == NFS_VER3)
4027 nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
4028 if (!error)
4029 error = status;
4030nfsmout:
4031 nfsm_chain_cleanup(&nmreq);
4032 nfsm_chain_cleanup(&nmrep);
4033
4034 if (!lockerror) {
4035 dnp->n_flag |= NMODIFIED;
4036 /* if directory hadn't changed, update namecache mtime */
4037 if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
4038 NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
4039 nfs_node_unlock(dnp);
4040 nfs_name_cache_purge(dnp, np, cnp, ctx);
4041 /* nfs_getattr() will check changed and purge caches */
4042 nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
4043 }
4044 nfs_dulookup_finish(&dul, dnp, ctx);
4045 nfs_node_clear_busy2(dnp, np);
4046
4047 /*
4048 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
4049 */
4050 if (error == ENOENT)
4051 error = 0;
4052 if (!error) {
4053 /*
4054 * remove nfsnode from hash now so we can't accidentally find it
4055 * again if another object gets created with the same filehandle
4056 * before this vnode gets reclaimed
4057 */
4058 lck_mtx_lock(nfs_node_hash_mutex);
4059 if (np->n_hflag & NHHASHED) {
4060 LIST_REMOVE(np, n_hash);
4061 np->n_hflag &= ~NHHASHED;
4062 FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
4063 }
4064 lck_mtx_unlock(nfs_node_hash_mutex);
4065 }
4066 return (error);
4067}
4068
4069/*
4070 * NFS readdir call
4071 *
4072 * The incoming "offset" is a directory cookie indicating where in the
4073 * directory entries should be read from. A zero cookie means start at
4074 * the beginning of the directory. Any other cookie will be a cookie
4075 * returned from the server.
4076 *
4077 * Using that cookie, determine which buffer (and where in that buffer)
4078 * to start returning entries from. Buffer logical block numbers are
4079 * the cookies they start at. If a buffer is found that is not full,
4080 * call into the bio/RPC code to fill it. The RPC code will probably
4081 * fill several buffers (dropping the first, requiring a re-get).
4082 *
4083 * When done copying entries to the buffer, set the offset to the current
4084 * entry's cookie and enter that cookie in the cookie cache.
4085 *
4086 * Note: because the getdirentries(2) API returns a long-typed offset,
4087 * the incoming offset is a potentially truncated cookie (ptc).
4088 * The cookie matching code is aware of this and will fall back to
4089 * matching only 32 bits of the cookie.
4090 */
4091int
4092nfs_vnop_readdir(
4093 struct vnop_readdir_args /* {
4094 struct vnodeop_desc *a_desc;
4095 vnode_t a_vp;
4096 struct uio *a_uio;
4097 int a_flags;
4098 int *a_eofflag;
4099 int *a_numdirent;
4100 vfs_context_t a_context;
4101 } */ *ap)
4102{
4103 vfs_context_t ctx = ap->a_context;
4104 vnode_t dvp = ap->a_vp;
4105 nfsnode_t dnp = VTONFS(dvp);
4106 struct nfsmount *nmp;
4107 uio_t uio = ap->a_uio;
4108 int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
4109 struct nfs_vattr nvattr;
4110 uint16_t i, iptc, rlen, nlen;
4111 uint64_t cookie, nextcookie, lbn = 0;
4112 struct nfsbuf *bp = NULL;
4113 struct nfs_dir_buf_header *ndbhp;
4114 struct direntry *dp, *dpptc;
4115 struct dirent dent;
4116 char *cp = NULL;
4117 thread_t thd;
4118
4119 nmp = VTONMP(dvp);
4120 if (!nmp)
4121 return (ENXIO);
4122 nfsvers = nmp->nm_vers;
4123 bigcookies = (nmp->nm_state & NFSSTA_BIGCOOKIES);
4124 extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
4125
4126 if (vnode_vtype(dvp) != VDIR)
4127 return (EPERM);
4128
4129 if (ap->a_eofflag)
4130 *ap->a_eofflag = 0;
4131
4132 if (uio_resid(uio) == 0)
4133 return (0);
4134
4135 thd = vfs_context_thread(ctx);
4136 numdirent = done = 0;
4137 nextcookie = uio_offset(uio);
4138 ptc = bigcookies && NFS_DIR_COOKIE_POTENTIALLY_TRUNCATED(nextcookie);
4139
4140 if ((error = nfs_node_lock(dnp)))
4141 goto out;
4142
4143 if (dnp->n_flag & NNEEDINVALIDATE) {
4144 dnp->n_flag &= ~NNEEDINVALIDATE;
4145 nfs_invaldir(dnp);
4146 nfs_node_unlock(dnp);
4147 error = nfs_vinvalbuf(dvp, 0, ctx, 1);
4148 if (!error)
4149 error = nfs_node_lock(dnp);
4150 if (error)
4151 goto out;
4152 }
4153
4154 /*
4155 * check for need to invalidate when (re)starting at beginning
4156 */
4157 if (!nextcookie) {
4158 if (dnp->n_flag & NMODIFIED) {
4159 nfs_invaldir(dnp);
4160 nfs_node_unlock(dnp);
4161 if ((error = nfs_vinvalbuf(dvp, 0, ctx, 1)))
4162 goto out;
4163 } else {
4164 nfs_node_unlock(dnp);
4165 }
4166 /* nfs_getattr() will check changed and purge caches */
4167 if ((error = nfs_getattr(dnp, &nvattr, ctx, NGA_UNCACHED)))
4168 goto out;
4169 } else {
4170 nfs_node_unlock(dnp);
4171 }
4172
4173 error = nfs_dir_cookie_to_lbn(dnp, nextcookie, &ptc, &lbn);
4174 if (error) {
4175 if (error < 0) { /* just hit EOF cookie */
4176 done = 1;
4177 error = 0;
4178 }
4179 if (ap->a_eofflag)
4180 *ap->a_eofflag = 1;
4181 }
4182
4183 while (!error && !done) {
4184 OSAddAtomic(1, &nfsstats.biocache_readdirs);
4185 cookie = nextcookie;
4186getbuffer:
4187 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp);
4188 if (error)
4189 goto out;
4190 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4191 if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
4192 if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
4193 ndbhp->ndbh_flags = 0;
4194 ndbhp->ndbh_count = 0;
4195 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
4196 ndbhp->ndbh_ncgen = dnp->n_ncgen;
4197 }
4198 error = nfs_buf_readdir(bp, ctx);
4199 if (error == NFSERR_DIRBUFDROPPED)
4200 goto getbuffer;
4201 if (error)
4202 nfs_buf_release(bp, 1);
4203 if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
4204 if (!nfs_node_lock(dnp)) {
4205 nfs_invaldir(dnp);
4206 nfs_node_unlock(dnp);
4207 }
4208 nfs_vinvalbuf(dvp, 0, ctx, 1);
4209 if (error == NFSERR_BAD_COOKIE)
4210 error = ENOENT;
4211 }
4212 if (error)
4213 goto out;
4214 }
4215
4216 /* find next entry to return */
4217 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
4218 i = 0;
4219 if ((lbn != cookie) && !(ptc && NFS_DIR_COOKIE_SAME32(lbn, cookie))) {
4220 dpptc = NULL;
4221 iptc = 0;
4222 for (; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
4223 if (ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
4224 iptc = i;
4225 dpptc = dp;
4226 }
4227 nextcookie = dp->d_seekoff;
4228 dp = NFS_DIRENTRY_NEXT(dp);
4229 }
4230 if ((i == ndbhp->ndbh_count) && dpptc) {
4231 i = iptc;
4232 dp = dpptc;
4233 }
4234 if (i < ndbhp->ndbh_count) {
4235 nextcookie = dp->d_seekoff;
4236 dp = NFS_DIRENTRY_NEXT(dp);
4237 i++;
4238 }
4239 }
4240 ptc = 0; /* only have to deal with ptc on first cookie */
4241
4242 /* return as many entries as we can */
4243 for (; i < ndbhp->ndbh_count; i++) {
4244 if (extended) {
4245 rlen = dp->d_reclen;
4246 cp = (char*)dp;
4247 } else {
4248 if (!cp) {
4249 cp = (char*)&dent;
4250 bzero(cp, sizeof(dent));
4251 }
4252 if (dp->d_namlen > (sizeof(dent.d_name) - 1))
4253 nlen = sizeof(dent.d_name) - 1;
4254 else
4255 nlen = dp->d_namlen;
4256 rlen = NFS_DIRENT_LEN(nlen);
4257 dent.d_reclen = rlen;
4258 dent.d_ino = dp->d_ino;
4259 dent.d_type = dp->d_type;
4260 dent.d_namlen = nlen;
4261 strlcpy(dent.d_name, dp->d_name, nlen + 1);
4262 }
4263 /* check that the record fits */
4264 if (rlen > uio_resid(uio)) {
4265 done = 1;
4266 break;
4267 }
4268 if ((error = uiomove(cp, rlen, uio)))
4269 break;
4270 numdirent++;
4271 nextcookie = dp->d_seekoff;
4272 dp = NFS_DIRENTRY_NEXT(dp);
4273 }
4274
4275 if (i == ndbhp->ndbh_count) {
4276 /* hit end of buffer, move to next buffer */
4277 lbn = nextcookie;
4278 /* if we also hit EOF, we're done */
4279 if (ISSET(ndbhp->ndbh_flags, NDB_EOF)) {
4280 done = 1;
4281 if (ap->a_eofflag)
4282 *ap->a_eofflag = 1;
4283 }
4284 }
4285 if (!error)
4286 uio_setoffset(uio, nextcookie);
4287 if (!error && !done && (nextcookie == cookie)) {
4288 printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
4289 error = EIO;
4290 }
4291 nfs_buf_release(bp, 1);
4292 }
4293
4294 if (!error)
4295 nfs_dir_cookie_cache(dnp, nextcookie, lbn);
4296
4297 if (ap->a_numdirent)
4298 *ap->a_numdirent = numdirent;
4299out:
4300 return (error);
4301}
4302
4303
4304/*
4305 * Invalidate cached directory information, except for the actual directory
4306 * blocks (which are invalidated separately).
4307 */
4308void
4309nfs_invaldir(nfsnode_t dnp)
4310{
4311 if (vnode_vtype(NFSTOV(dnp)) != VDIR)
4312 return;
4313 dnp->n_eofcookie = 0;
4314 dnp->n_cookieverf = 0;
4315 if (!dnp->n_cookiecache)
4316 return;
4317 dnp->n_cookiecache->free = 0;
4318 dnp->n_cookiecache->mru = -1;
4319 memset(dnp->n_cookiecache->next, -1, NFSNUMCOOKIES);
4320}
4321
4322/*
4323 * calculate how much space is available for additional directory entries.
4324 */
4325uint32_t
4326nfs_dir_buf_freespace(struct nfsbuf *bp, int rdirplus)
4327{
4328 struct nfs_dir_buf_header *ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4329 uint32_t space;
4330
4331 if (!ndbhp)
4332 return (0);
4333 space = bp->nb_bufsize - ndbhp->ndbh_entry_end;
4334 if (rdirplus)
4335 space -= ndbhp->ndbh_count * sizeof(struct nfs_vattr);
4336 return (space);
4337}
4338
4339/*
4340 * add/update a cookie->lbn entry in the directory cookie cache
4341 */
4342void
4343nfs_dir_cookie_cache(nfsnode_t dnp, uint64_t cookie, uint64_t lbn)
4344{
4345 struct nfsdmap *ndcc;
4346 int8_t i, prev;
4347
4348 if (!cookie)
4349 return;
4350
4351 if (nfs_node_lock(dnp))
4352 return;
4353
4354 if (cookie == dnp->n_eofcookie) { /* EOF cookie */
4355 nfs_node_unlock(dnp);
4356 return;
4357 }
4358
4359 ndcc = dnp->n_cookiecache;
4360 if (!ndcc) {
4361 /* allocate the cookie cache structure */
4362 MALLOC_ZONE(dnp->n_cookiecache, struct nfsdmap *,
4363 sizeof(struct nfsdmap), M_NFSDIROFF, M_WAITOK);
4364 if (!dnp->n_cookiecache) {
4365 nfs_node_unlock(dnp);
4366 return;
4367 }
4368 ndcc = dnp->n_cookiecache;
4369 ndcc->free = 0;
4370 ndcc->mru = -1;
4371 memset(ndcc->next, -1, NFSNUMCOOKIES);
4372 }
4373
4374 /*
4375 * Search the list for this cookie.
4376 * Keep track of previous and last entries.
4377 */
4378 prev = -1;
4379 i = ndcc->mru;
4380 while ((i != -1) && (cookie != ndcc->cookies[i].key)) {
4381 if (ndcc->next[i] == -1) /* stop on last entry so we can reuse */
4382 break;
4383 prev = i;
4384 i = ndcc->next[i];
4385 }
4386 if ((i != -1) && (cookie == ndcc->cookies[i].key)) {
4387 /* found it, remove from list */
4388 if (prev != -1)
4389 ndcc->next[prev] = ndcc->next[i];
4390 else
4391 ndcc->mru = ndcc->next[i];
4392 } else {
4393 /* not found, use next free entry or reuse last entry */
4394 if (ndcc->free != NFSNUMCOOKIES)
4395 i = ndcc->free++;
4396 else
4397 ndcc->next[prev] = -1;
4398 ndcc->cookies[i].key = cookie;
4399 ndcc->cookies[i].lbn = lbn;
4400 }
4401 /* insert cookie at head of MRU list */
4402 ndcc->next[i] = ndcc->mru;
4403 ndcc->mru = i;
4404 nfs_node_unlock(dnp);
4405}
4406
4407/*
4408 * Try to map the given directory cookie to a directory buffer (return lbn).
4409 * If we have a possibly truncated cookie (ptc), check for 32-bit matches too.
4410 */
4411int
4412nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
4413{
4414 struct nfsdmap *ndcc = dnp->n_cookiecache;
4415 int8_t i, eofptc, iptc, found;
4416 struct nfsmount *nmp;
4417 struct nfsbuf *bp, *lastbp;
4418 struct nfsbuflists blist;
4419 struct direntry *dp, *dpptc;
4420 struct nfs_dir_buf_header *ndbhp;
4421
4422 if (!cookie) { /* initial cookie */
4423 *lbnp = 0;
4424 *ptc = 0;
4425 return (0);
4426 }
4427
4428 if (nfs_node_lock(dnp))
4429 return (ENOENT);
4430
4431 if (cookie == dnp->n_eofcookie) { /* EOF cookie */
4432 nfs_node_unlock(dnp);
4433 OSAddAtomic(1, &nfsstats.direofcache_hits);
4434 *ptc = 0;
4435 return (-1);
4436 }
4437 /* note if cookie is a 32-bit match with the EOF cookie */
4438 eofptc = *ptc ? NFS_DIR_COOKIE_SAME32(cookie, dnp->n_eofcookie) : 0;
4439 iptc = -1;
4440
4441 /* search the list for the cookie */
4442 for (i = ndcc ? ndcc->mru : -1; i >= 0; i = ndcc->next[i]) {
4443 if (ndcc->cookies[i].key == cookie) {
4444 /* found a match for this cookie */
4445 *lbnp = ndcc->cookies[i].lbn;
4446 nfs_node_unlock(dnp);
4447 OSAddAtomic(1, &nfsstats.direofcache_hits);
4448 *ptc = 0;
4449 return (0);
4450 }
4451 /* check for 32-bit match */
4452 if (*ptc && (iptc == -1) && NFS_DIR_COOKIE_SAME32(ndcc->cookies[i].key, cookie))
4453 iptc = i;
4454 }
4455 /* exact match not found */
4456 if (eofptc) {
4457 /* but 32-bit match hit the EOF cookie */
4458 nfs_node_unlock(dnp);
4459 OSAddAtomic(1, &nfsstats.direofcache_hits);
4460 return (-1);
4461 }
4462 if (iptc >= 0) {
4463 /* but 32-bit match got a hit */
4464 *lbnp = ndcc->cookies[iptc].lbn;
4465 nfs_node_unlock(dnp);
4466 OSAddAtomic(1, &nfsstats.direofcache_hits);
4467 return (0);
4468 }
4469 nfs_node_unlock(dnp);
4470
4471 /*
4472 * No match found in the cookie cache... hmm...
4473 * Let's search the directory's buffers for the cookie.
4474 */
4475 nmp = NFSTONMP(dnp);
4476 if (!nmp)
4477 return (ENXIO);
4478 dpptc = NULL;
4479 found = 0;
4480
4481 lck_mtx_lock(nfs_buf_mutex);
4482 /*
4483 * Scan the list of buffers, keeping them in order.
4484 * Note that itercomplete inserts each of the remaining buffers
4485 * into the head of list (thus reversing the elements). So, we
4486 * make sure to iterate through all buffers, inserting them after
4487 * each other, to keep them in order.
4488 * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
4489 * we don't drop nfs_buf_mutex.
4490 */
4491 if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
4492 lastbp = NULL;
4493 while ((bp = LIST_FIRST(&blist))) {
4494 LIST_REMOVE(bp, nb_vnbufs);
4495 if (!lastbp)
4496 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
4497 else
4498 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
4499 lastbp = bp;
4500 if (found)
4501 continue;
4502 nfs_buf_refget(bp);
4503 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
4504 /* just skip this buffer */
4505 nfs_buf_refrele(bp);
4506 continue;
4507 }
4508 nfs_buf_refrele(bp);
4509
4510 /* scan the buffer for the cookie */
4511 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4512 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
4513 dpptc = NULL;
4514 for (i=0; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) {
4515 if (*ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) {
4516 dpptc = dp;
4517 iptc = i;
4518 }
4519 dp = NFS_DIRENTRY_NEXT(dp);
4520 }
4521 if ((i == ndbhp->ndbh_count) && dpptc) {
4522 /* found only a PTC match */
4523 dp = dpptc;
4524 i = iptc;
4525 } else if (i < ndbhp->ndbh_count) {
4526 *ptc = 0;
4527 }
4528 if (i < (ndbhp->ndbh_count-1)) {
4529 /* next entry is *in* this buffer: return this block */
4530 *lbnp = bp->nb_lblkno;
4531 found = 1;
4532 } else if (i == (ndbhp->ndbh_count-1)) {
4533 /* next entry refers to *next* buffer: return next block */
4534 *lbnp = dp->d_seekoff;
4535 found = 1;
4536 }
4537 nfs_buf_drop(bp);
4538 }
4539 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
4540 }
4541 lck_mtx_unlock(nfs_buf_mutex);
4542 if (found) {
4543 OSAddAtomic(1, &nfsstats.direofcache_hits);
4544 return (0);
4545 }
4546
4547 /* still not found... oh well, just start a new block */
4548 *lbnp = cookie;
4549 OSAddAtomic(1, &nfsstats.direofcache_misses);
4550 return (0);
4551}
4552
4553/*
4554 * scan a directory buffer for the given name
4555 * Returns: ESRCH if not found, ENOENT if found invalid, 0 if found
4556 * Note: should only be called with RDIRPLUS directory buffers
4557 */
4558
4559#define NDBS_PURGE 1
4560#define NDBS_UPDATE 2
4561
4562int
4563nfs_dir_buf_search(
4564 struct nfsbuf *bp,
4565 struct componentname *cnp,
4566 fhandle_t *fhp,
4567 struct nfs_vattr *nvap,
4568 uint64_t *xidp,
4569 time_t *attrstampp,
4570 daddr64_t *nextlbnp,
4571 int flags)
4572{
4573 struct direntry *dp;
4574 struct nfs_dir_buf_header *ndbhp;
4575 struct nfs_vattr *nvattrp;
4576 daddr64_t nextlbn = 0;
4577 int i, error = ESRCH, fhlen;
4578
4579 /* scan the buffer for the name */
4580 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4581 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
4582 for (i=0; i < ndbhp->ndbh_count; i++) {
4583 nextlbn = dp->d_seekoff;
4584 if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) {
4585 fhlen = dp->d_name[dp->d_namlen+1];
4586 nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
4587 if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) ||
4588 (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
4589 /* entry is no longer valid */
4590 error = ENOENT;
4591 break;
4592 }
4593 if (flags == NDBS_PURGE) {
4594 dp->d_fileno = 0;
4595 bzero(nvattrp, sizeof(*nvattrp));
4596 error = ENOENT;
4597 break;
4598 }
4599 if (flags == NDBS_UPDATE) {
4600 /* update direntry's attrs if fh matches */
4601 if ((fhp->fh_len == fhlen) && !bcmp(&dp->d_name[dp->d_namlen+2], fhp->fh_data, fhlen)) {
4602 bcopy(nvap, nvattrp, sizeof(*nvap));
4603 dp->d_fileno = nvattrp->nva_fileid;
4604 nvattrp->nva_fileid = *xidp;
4605 *(time_t*)(&dp->d_name[dp->d_namlen+2+fhp->fh_len]) = *attrstampp;
4606 }
4607 error = 0;
4608 break;
4609 }
4610 /* copy out fh, attrs, attrstamp, and xid */
4611 fhp->fh_len = fhlen;
4612 bcopy(&dp->d_name[dp->d_namlen+2], fhp->fh_data, MAX(fhp->fh_len, (int)sizeof(fhp->fh_data)));
4613 *attrstampp = *(time_t*)(&dp->d_name[dp->d_namlen+2+fhp->fh_len]);
4614 bcopy(nvattrp, nvap, sizeof(*nvap));
4615 *xidp = nvap->nva_fileid;
4616 nvap->nva_fileid = dp->d_fileno;
4617 error = 0;
4618 break;
4619 }
4620 dp = NFS_DIRENTRY_NEXT(dp);
4621 }
4622 if (nextlbnp)
4623 *nextlbnp = nextlbn;
4624 return (error);
4625}
4626
4627/*
4628 * Look up a name in a directory's buffers.
4629 * Note: should only be called with RDIRPLUS directory buffers
4630 */
4631int
4632nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cnp, vfs_context_t ctx, int purge)
4633{
4634 nfsnode_t newnp;
4635 struct nfsmount *nmp;
4636 int error = 0, slpflag, slptimeo, i, found = 0, count = 0;
4637 u_int64_t xid;
4638 struct nfs_vattr nvattr;
4639 fhandle_t fh;
4640 time_t attrstamp = 0;
4641 thread_t thd = vfs_context_thread(ctx);
4642 struct nfsbuf *bp, *lastbp, *foundbp;
4643 struct nfsbuflists blist;
4644 daddr64_t lbn, nextlbn;
4645 int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_');
4646
4647 if (!(nmp = NFSTONMP(dnp)))
4648 return (ENXIO);
4649 slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
4650 slptimeo = 0;
4651 if (!purge)
4652 *npp = NULL;
4653
4654 /* first check most recent buffer (and next one too) */
4655 lbn = dnp->n_lastdbl;
4656 for (i=0; i < 2; i++) {
4657 if ((error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ|NBLK_ONLYVALID, &bp)))
4658 return (error);
4659 if (!bp)
4660 break;
4661 count++;
4662 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, &nextlbn, purge ? NDBS_PURGE : 0);
4663 nfs_buf_release(bp, 0);
4664 if (error == ESRCH) {
4665 error = 0;
4666 } else {
4667 found = 1;
4668 break;
4669 }
4670 lbn = nextlbn;
4671 }
4672
4673 lck_mtx_lock(nfs_buf_mutex);
4674 if (found) {
4675 dnp->n_lastdbl = lbn;
4676 goto done;
4677 }
4678
4679 /*
4680 * Scan the list of buffers, keeping them in order.
4681 * Note that itercomplete inserts each of the remaining buffers
4682 * into the head of list (thus reversing the elements). So, we
4683 * make sure to iterate through all buffers, inserting them after
4684 * each other, to keep them in order.
4685 * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because
4686 * we don't drop nfs_buf_mutex.
4687 */
4688 if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) {
4689 lastbp = foundbp = NULL;
4690 while ((bp = LIST_FIRST(&blist))) {
4691 LIST_REMOVE(bp, nb_vnbufs);
4692 if (!lastbp)
4693 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs);
4694 else
4695 LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs);
4696 lastbp = bp;
4697 if (error || found)
4698 continue;
4699 if (!purge && dotunder && (count > 100)) /* don't waste too much time looking for ._ files */
4700 continue;
4701 nfs_buf_refget(bp);
4702 lbn = bp->nb_lblkno;
4703 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
4704 /* just skip this buffer */
4705 nfs_buf_refrele(bp);
4706 continue;
4707 }
4708 nfs_buf_refrele(bp);
4709 count++;
4710 error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, purge ? NDBS_PURGE : 0);
4711 if (error == ESRCH) {
4712 error = 0;
4713 } else {
4714 found = 1;
4715 foundbp = bp;
4716 }
4717 nfs_buf_drop(bp);
4718 }
4719 if (found) {
4720 LIST_REMOVE(foundbp, nb_vnbufs);
4721 LIST_INSERT_HEAD(&dnp->n_cleanblkhd, foundbp, nb_vnbufs);
4722 dnp->n_lastdbl = foundbp->nb_lblkno;
4723 }
4724 nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
4725 }
4726done:
4727 lck_mtx_unlock(nfs_buf_mutex);
4728
4729 if (!error && found && !purge) {
4730 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
4731 &nvattr, &xid, NG_MAKEENTRY, &newnp);
4732 if (error)
4733 return (error);
4734 newnp->n_attrstamp = attrstamp;
4735 *npp = newnp;
4736 nfs_node_unlock(newnp);
4737 /* check if the dir buffer's attrs are out of date */
4738 if (!nfs_getattr(newnp, &nvattr, ctx, NGA_CACHED) &&
4739 (newnp->n_attrstamp != attrstamp)) {
4740 /* they are, so update them */
4741 error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ|NBLK_ONLYVALID, &bp);
4742 if (!error && bp) {
4743 attrstamp = newnp->n_attrstamp;
4744 xid = newnp->n_xid;
4745 nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, NDBS_UPDATE);
4746 nfs_buf_release(bp, 0);
4747 }
4748 error = 0;
4749 }
4750 }
4751
4752 return (error);
4753}
4754
4755/*
4756 * Purge name cache entries for the given node.
4757 * For RDIRPLUS, also invalidate the entry in the directory's buffers.
4758 */
4759void
4760nfs_name_cache_purge(nfsnode_t dnp, nfsnode_t np, struct componentname *cnp, vfs_context_t ctx)
4761{
4762 struct nfsmount *nmp = NFSTONMP(dnp);
4763
4764 cache_purge(NFSTOV(np));
4765 if (nmp && (nmp->nm_vers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS))
4766 nfs_dir_buf_cache_lookup(dnp, NULL, cnp, ctx, 1);
4767}
4768
4769/*
4770 * NFS V3 readdir (plus) RPC.
4771 */
4772int
4773nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
4774{
4775 struct nfsmount *nmp;
4776 int error = 0, lockerror, nfsvers, rdirplus, bigcookies;
4777 int i, status, attrflag, fhflag, more_entries = 1, eof, bp_dropped = 0;
4778 uint32_t nmreaddirsize, nmrsize;
4779 uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
4780 uint64_t cookie, lastcookie, xid, savedxid, fileno;
4781 struct nfsm_chain nmreq, nmrep, nmrepsave;
4782 fhandle_t fh;
4783 struct nfs_vattr *nvattrp;
4784 struct nfs_dir_buf_header *ndbhp;
4785 struct direntry *dp;
4786 char *padstart, padlen;
4787 struct timeval now;
4788
4789 nmp = NFSTONMP(dnp);
4790 if (!nmp)
4791 return (ENXIO);
4792 nfsvers = nmp->nm_vers;
4793 nmreaddirsize = nmp->nm_readdirsize;
4794 nmrsize = nmp->nm_rsize;
4795 bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
4796noplus:
4797 rdirplus = ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) ? 1 : 0;
4798
4799 if ((lockerror = nfs_node_lock(dnp)))
4800 return (lockerror);
4801
4802 /* determine cookie to use, and move dp to the right offset */
4803 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4804 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
4805 if (ndbhp->ndbh_count) {
4806 for (i=0; i < ndbhp->ndbh_count-1; i++)
4807 dp = NFS_DIRENTRY_NEXT(dp);
4808 cookie = dp->d_seekoff;
4809 dp = NFS_DIRENTRY_NEXT(dp);
4810 } else {
4811 cookie = bp->nb_lblkno;
4812 /* increment with every buffer read */
4813 OSAddAtomic(1, &nfsstats.readdir_bios);
4814 }
4815 lastcookie = cookie;
4816
4817 /*
4818 * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
4819 * the buffer is full (or we hit EOF). Then put the remainder of the
4820 * results in the next buffer(s).
4821 */
4822 nfsm_chain_null(&nmreq);
4823 nfsm_chain_null(&nmrep);
4824 while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
4825 nfsm_chain_build_alloc_init(error, &nmreq,
4826 NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers) + NFSX_UNSIGNED);
4827 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4828 if (nfsvers == NFS_VER3) {
4829 /* opaque values don't need swapping, but as long */
4830 /* as we are consistent about it, it should be ok */
4831 nfsm_chain_add_64(error, &nmreq, cookie);
4832 nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
4833 } else {
4834 nfsm_chain_add_32(error, &nmreq, cookie);
4835 }
4836 nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
4837 if (rdirplus)
4838 nfsm_chain_add_32(error, &nmreq, nmrsize);
4839 nfsm_chain_build_done(error, &nmreq);
4840 nfs_node_unlock(dnp);
4841 lockerror = ENOENT;
4842 nfsmout_if(error);
4843
4844 error = nfs_request(dnp, NULL, &nmreq,
4845 rdirplus ? NFSPROC_READDIRPLUS : NFSPROC_READDIR,
4846 ctx, &nmrep, &xid, &status);
4847
4848 if ((lockerror = nfs_node_lock(dnp)))
4849 error = lockerror;
4850
4851 savedxid = xid;
4852 if (nfsvers == NFS_VER3)
4853 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
4854 if (!error)
4855 error = status;
4856 if (nfsvers == NFS_VER3)
4857 nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
4858 nfsm_chain_get_32(error, &nmrep, more_entries);
4859
4860 if (!lockerror) {
4861 nfs_node_unlock(dnp);
4862 lockerror = ENOENT;
4863 }
4864 if (error == NFSERR_NOTSUPP) {
4865 /* oops... it doesn't look like readdirplus is supported */
4866 lck_mtx_lock(&nmp->nm_lock);
4867 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
4868 lck_mtx_unlock(&nmp->nm_lock);
4869 goto noplus;
4870 }
4871 nfsmout_if(error);
4872
4873 if (rdirplus)
4874 microuptime(&now);
4875
4876 /* loop through the entries packing them into the buffer */
4877 while (more_entries) {
4878 if (nfsvers == NFS_VER3)
4879 nfsm_chain_get_64(error, &nmrep, fileno);
4880 else
4881 nfsm_chain_get_32(error, &nmrep, fileno);
4882 nfsm_chain_get_32(error, &nmrep, namlen);
4883 nfsmout_if(error);
4884 /* just truncate names that don't fit in direntry.d_name */
4885 if (namlen <= 0) {
4886 error = EBADRPC;
4887 goto nfsmout;
4888 }
4889 if (namlen > (sizeof(dp->d_name)-1)) {
4890 skiplen = namlen - sizeof(dp->d_name) + 1;
4891 namlen = sizeof(dp->d_name) - 1;
4892 } else {
4893 skiplen = 0;
4894 }
4895 /* guess that fh size will be same as parent */
4896 fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
4897 xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
4898 attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
4899 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
4900 space_needed = reclen + attrlen;
4901 space_free = nfs_dir_buf_freespace(bp, rdirplus);
4902 if (space_needed > space_free) {
4903 /*
4904 * We still have entries to pack, but we've
4905 * run out of room in the current buffer.
4906 * So we need to move to the next buffer.
4907 * The block# for the next buffer is the
4908 * last cookie in the current buffer.
4909 */
4910nextbuffer:
4911 ndbhp->ndbh_flags |= NDB_FULL;
4912 nfs_buf_release(bp, 0);
4913 bp_dropped = 1;
4914 bp = NULL;
4915 error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
4916 nfsmout_if(error);
4917 /* initialize buffer */
4918 ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
4919 ndbhp->ndbh_flags = 0;
4920 ndbhp->ndbh_count = 0;
4921 ndbhp->ndbh_entry_end = sizeof(*ndbhp);
4922 ndbhp->ndbh_ncgen = dnp->n_ncgen;
4923 space_free = nfs_dir_buf_freespace(bp, rdirplus);
4924 dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
4925 /* increment with every buffer read */
4926 OSAddAtomic(1, &nfsstats.readdir_bios);
4927 }
4928 nmrepsave = nmrep;
4929 dp->d_fileno = fileno;
4930 dp->d_namlen = namlen;
4931 dp->d_reclen = reclen;
4932 dp->d_type = DT_UNKNOWN;
4933 nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
4934 nfsmout_if(error);
4935 dp->d_name[namlen] = '\0';
4936 if (skiplen)
4937 nfsm_chain_adv(error, &nmrep,
4938 nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
4939 if (nfsvers == NFS_VER3)
4940 nfsm_chain_get_64(error, &nmrep, cookie);
4941 else
4942 nfsm_chain_get_32(error, &nmrep, cookie);
4943 nfsmout_if(error);
4944 dp->d_seekoff = cookie;
4945 if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
4946 /* we've got a big cookie, make sure flag is set */
4947 lck_mtx_lock(&nmp->nm_lock);
4948 nmp->nm_state |= NFSSTA_BIGCOOKIES;
4949 lck_mtx_unlock(&nmp->nm_lock);
4950 bigcookies = 1;
4951 }
4952 if (rdirplus) {
4953 nvattrp = NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count);
4954 /* check for attributes */
4955 nfsm_chain_get_32(error, &nmrep, attrflag);
4956 nfsmout_if(error);
4957 if (attrflag) {
4958 /* grab attributes */
4959 error = nfs_parsefattr(&nmrep, NFS_VER3, nvattrp);
4960 nfsmout_if(error);
4961 dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
4962 /* fileid is already in d_fileno, so stash xid in attrs */
4963 nvattrp->nva_fileid = savedxid;
4964 } else {
4965 /* mark the attributes invalid */
4966 bzero(nvattrp, sizeof(struct nfs_vattr));
4967 }
4968 /* check for file handle */
4969 nfsm_chain_get_32(error, &nmrep, fhflag);
4970 nfsmout_if(error);
4971 if (fhflag) {
4972 nfsm_chain_get_fh(error, &nmrep, NFS_VER3, &fh);
4973 nfsmout_if(error);
4974 fhlen = fh.fh_len + 1;
4975 xlen = fhlen + sizeof(time_t);
4976 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
4977 space_needed = reclen + attrlen;
4978 if (space_needed > space_free) {
4979 /* didn't actually have the room... move on to next buffer */
4980 nmrep = nmrepsave;
4981 goto nextbuffer;
4982 }
4983 /* pack the file handle into the record */
4984 dp->d_name[dp->d_namlen+1] = fh.fh_len;
4985 bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len);
4986 } else {
4987 /* mark the file handle invalid */
4988 fh.fh_len = 0;
4989 fhlen = fh.fh_len + 1;
4990 xlen = fhlen + sizeof(time_t);
4991 reclen = NFS_DIRENTRY_LEN(namlen + xlen);
4992 bzero(&dp->d_name[dp->d_namlen+1], fhlen);
4993 }
4994 *(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec;
4995 dp->d_reclen = reclen;
4996 }
4997 padstart = dp->d_name + dp->d_namlen + 1 + xlen;
4998 ndbhp->ndbh_count++;
4999 lastcookie = cookie;
5000 /* advance to next direntry in buffer */
5001 dp = NFS_DIRENTRY_NEXT(dp);
5002 ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
5003 /* zero out the pad bytes */
5004 padlen = (char*)dp - padstart;
5005 if (padlen > 0)
5006 bzero(padstart, padlen);
5007 /* check for more entries */
5008 nfsm_chain_get_32(error, &nmrep, more_entries);
5009 nfsmout_if(error);
5010 }
5011 /* Finally, get the eof boolean */
5012 nfsm_chain_get_32(error, &nmrep, eof);
5013 nfsmout_if(error);
5014 if (eof) {
5015 ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF);
5016 nfs_node_lock_force(dnp);
5017 dnp->n_eofcookie = lastcookie;
5018 nfs_node_unlock(dnp);
5019 } else {
5020 more_entries = 1;
5021 }
5022 if (bp_dropped) {
5023 nfs_buf_release(bp, 0);
5024 bp = NULL;
5025 break;
5026 }
5027 if ((lockerror = nfs_node_lock(dnp)))
5028 error = lockerror;
5029 nfsmout_if(error);
5030 nfsm_chain_cleanup(&nmrep);
5031 nfsm_chain_null(&nmreq);
5032 }
5033nfsmout:
5034 if (bp_dropped && bp)
5035 nfs_buf_release(bp, 0);
5036 if (!lockerror)
5037 nfs_node_unlock(dnp);
5038 nfsm_chain_cleanup(&nmreq);
5039 nfsm_chain_cleanup(&nmrep);
5040 return (bp_dropped ? NFSERR_DIRBUFDROPPED : error);
5041}
5042
5043/*
5044 * Silly rename. To make the NFS filesystem that is stateless look a little
5045 * more like the "ufs" a remove of an active vnode is translated to a rename
5046 * to a funny looking filename that is removed by nfs_vnop_inactive on the
5047 * nfsnode. There is the potential for another process on a different client
5048 * to create the same funny name between when the lookitup() fails and the
5049 * rename() completes, but...
5050 */
5051
5052/* format of "random" silly names - includes a number and pid */
5053/* (note: shouldn't exceed size of nfs_sillyrename.nsr_name) */
5054#define NFS_SILLYNAME_FORMAT ".nfs.%08x.%04x"
5055/* starting from zero isn't silly enough */
5056static uint32_t nfs_sillyrename_number = 0x20051025;
5057
5058int
5059nfs_sillyrename(
5060 nfsnode_t dnp,
5061 nfsnode_t np,
5062 struct componentname *cnp,
5063 vfs_context_t ctx)
5064{
5065 struct nfs_sillyrename *nsp;
5066 int error;
5067 short pid;
5068 kauth_cred_t cred;
5069 uint32_t num;
5070 struct nfsmount *nmp;
5071
5072 nmp = NFSTONMP(dnp);
5073 if (!nmp)
5074 return (ENXIO);
5075
5076 nfs_name_cache_purge(dnp, np, cnp, ctx);
5077
5078 MALLOC_ZONE(nsp, struct nfs_sillyrename *,
5079 sizeof (struct nfs_sillyrename), M_NFSREQ, M_WAITOK);
5080 if (!nsp)
5081 return (ENOMEM);
5082 cred = vfs_context_ucred(ctx);
5083 kauth_cred_ref(cred);
5084 nsp->nsr_cred = cred;
5085 nsp->nsr_dnp = dnp;
5086 error = vnode_ref(NFSTOV(dnp));
5087 if (error)
5088 goto bad_norele;
5089
5090 /* Fudge together a funny name */
5091 pid = vfs_context_pid(ctx);
5092 num = OSAddAtomic(1, &nfs_sillyrename_number);
5093 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
5094 NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
5095 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name))
5096 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
5097
5098 /* Try lookitups until we get one that isn't there */
5099 while (nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, NULL) == 0) {
5100 num = OSAddAtomic(1, &nfs_sillyrename_number);
5101 nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
5102 NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
5103 if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name))
5104 nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
5105 }
5106
5107 /* now, do the rename */
5108 error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
5109 dnp, nsp->nsr_name, nsp->nsr_namlen, ctx);
5110 if (!error) {
5111 nfs_node_lock_force(dnp);
5112 if (dnp->n_flag & NNEGNCENTRIES) {
5113 dnp->n_flag &= ~NNEGNCENTRIES;
5114 cache_purge_negatives(NFSTOV(dnp));
5115 }
5116 nfs_node_unlock(dnp);
5117 }
5118 FSDBG(267, dnp, np, num, error);
5119 if (error)
5120 goto bad;
5121 error = nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, &np);
5122 nfs_node_lock_force(np);
5123 np->n_sillyrename = nsp;
5124 nfs_node_unlock(np);
5125 return (0);
5126bad:
5127 vnode_rele(NFSTOV(dnp));
5128bad_norele:
5129 nsp->nsr_cred = NOCRED;
5130 kauth_cred_unref(&cred);
5131 FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ);
5132 return (error);
5133}
5134
5135int
5136nfs3_lookup_rpc_async(
5137 nfsnode_t dnp,
5138 char *name,
5139 int namelen,
5140 vfs_context_t ctx,
5141 struct nfsreq **reqp)
5142{
5143 struct nfsmount *nmp;
5144 struct nfsm_chain nmreq;
5145 int error = 0, nfsvers;
5146
5147 nmp = NFSTONMP(dnp);
5148 if (!nmp)
5149 return (ENXIO);
5150 nfsvers = nmp->nm_vers;
5151
5152 nfsm_chain_null(&nmreq);
5153
5154 nfsm_chain_build_alloc_init(error, &nmreq,
5155 NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
5156 nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
5157 nfsm_chain_add_string(error, &nmreq, name, namelen);
5158 nfsm_chain_build_done(error, &nmreq);
5159 nfsmout_if(error);
5160 error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_LOOKUP,
5161 vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, reqp);
5162nfsmout:
5163 nfsm_chain_cleanup(&nmreq);
5164 return (error);
5165}
5166
5167int
5168nfs3_lookup_rpc_async_finish(
5169 nfsnode_t dnp,
5170 vfs_context_t ctx,
5171 struct nfsreq *req,
5172 u_int64_t *xidp,
5173 fhandle_t *fhp,
5174 struct nfs_vattr *nvap)
5175{
5176 int error = 0, lockerror = ENOENT, status, nfsvers, attrflag;
5177 u_int64_t xid;
5178 struct nfsmount *nmp;
5179 struct nfsm_chain nmrep;
5180
5181 nmp = NFSTONMP(dnp);
5182 nfsvers = nmp->nm_vers;
5183
5184 nfsm_chain_null(&nmrep);
5185
5186 error = nfs_request_async_finish(req, &nmrep, xidp, &status);
5187
5188 if ((lockerror = nfs_node_lock(dnp)))
5189 error = lockerror;
5190 xid = *xidp;
5191 if (error || status) {
5192 if (nfsvers == NFS_VER3)
5193 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
5194 if (!error)
5195 error = status;
5196 goto nfsmout;
5197 }
5198
5199 nfsmout_if(error || !fhp || !nvap);
5200
5201 /* get the file handle */
5202 nfsm_chain_get_fh(error, &nmrep, nfsvers, fhp);
5203
5204 /* get the attributes */
5205 if (nfsvers == NFS_VER3) {
5206 nfsm_chain_postop_attr_get(error, &nmrep, attrflag, nvap);
5207 nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
5208 if (!error && !attrflag)
5209 error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, ctx, nvap, xidp);
5210 } else {
5211 error = nfs_parsefattr(&nmrep, nfsvers, nvap);
5212 }
5213nfsmout:
5214 if (!lockerror)
5215 nfs_node_unlock(dnp);
5216 nfsm_chain_cleanup(&nmrep);
5217 return (error);
5218}
5219
5220/*
5221 * Look up a file name and optionally either update the file handle or
5222 * allocate an nfsnode, depending on the value of npp.
5223 * npp == NULL --> just do the lookup
5224 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
5225 * handled too
5226 * *npp != NULL --> update the file handle in the vnode
5227 */
5228int
5229nfs_lookitup(
5230 nfsnode_t dnp,
5231 char *name,
5232 int namelen,
5233 vfs_context_t ctx,
5234 nfsnode_t *npp)
5235{
5236 int error = 0;
5237 nfsnode_t np, newnp = NULL;
5238 u_int64_t xid;
5239 fhandle_t fh;
5240 struct nfsmount *nmp;
5241 struct nfs_vattr nvattr;
5242 struct nfsreq rq, *req = &rq;
5243
5244 nmp = NFSTONMP(dnp);
5245 if (!nmp)
5246 return (ENXIO);
5247
5248 if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
5249 (namelen > (int)nmp->nm_fsattr.nfsa_maxname))
5250 return (ENAMETOOLONG);
5251
5252 /* check for lookup of "." */
5253 if ((name[0] == '.') && (namelen == 1)) {
5254 /* skip lookup, we know who we are */
5255 fh.fh_len = 0;
5256 newnp = dnp;
5257 goto nfsmout;
5258 }
5259
5260 error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, name, namelen, ctx, &req);
5261 nfsmout_if(error);
5262 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, req, &xid, &fh, &nvattr);
5263 nfsmout_if(!npp || error);
5264
5265 if (*npp) {
5266 np = *npp;
5267 if (fh.fh_len != np->n_fhsize) {
5268 u_char *oldbuf = (np->n_fhsize > NFS_SMALLFH) ? np->n_fhp : NULL;
5269 if (fh.fh_len > NFS_SMALLFH) {
5270 MALLOC_ZONE(np->n_fhp, u_char *, fh.fh_len, M_NFSBIGFH, M_WAITOK);
5271 if (!np->n_fhp) {
5272 np->n_fhp = oldbuf;
5273 error = ENOMEM;
5274 goto nfsmout;
5275 }
5276 } else {
5277 np->n_fhp = &np->n_fh[0];
5278 }
5279 if (oldbuf)
5280 FREE_ZONE(oldbuf, np->n_fhsize, M_NFSBIGFH);
5281 }
5282 bcopy(fh.fh_data, np->n_fhp, fh.fh_len);
5283 np->n_fhsize = fh.fh_len;
5284 nfs_node_lock_force(np);
5285 error = nfs_loadattrcache(np, &nvattr, &xid, 0);
5286 nfs_node_unlock(np);
5287 nfsmout_if(error);
5288 newnp = np;
5289 } else if (NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) {
5290 nfs_node_lock_force(dnp);
5291 if (dnp->n_xid <= xid)
5292 error = nfs_loadattrcache(dnp, &nvattr, &xid, 0);
5293 nfs_node_unlock(dnp);
5294 nfsmout_if(error);
5295 newnp = dnp;
5296 } else {
5297 struct componentname cn, *cnp = &cn;
5298 bzero(cnp, sizeof(*cnp));
5299 cnp->cn_nameptr = name;
5300 cnp->cn_namelen = namelen;
5301 error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
5302 &nvattr, &xid, NG_MAKEENTRY, &np);
5303 nfsmout_if(error);
5304 newnp = np;
5305 }
5306
5307nfsmout:
5308 if (npp && !*npp && !error)
5309 *npp = newnp;
5310 return (error);
5311}
5312
5313/*
5314 * set up and initialize a "._" file lookup structure used for
5315 * performing async lookups.
5316 */
5317void
5318nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen, vfs_context_t ctx)
5319{
5320 int error, du_namelen;
5321 vnode_t du_vp;
5322
5323 /* check for ._ file in name cache */
5324 dulp->du_flags = 0;
5325 bzero(&dulp->du_cn, sizeof(dulp->du_cn));
5326 du_namelen = namelen + 2;
5327 if ((namelen >= 2) && (name[0] == '.') && (name[1] == '_'))
5328 return;
5329 if (du_namelen >= (int)sizeof(dulp->du_smallname))
5330 MALLOC(dulp->du_cn.cn_nameptr, char *, du_namelen + 1, M_TEMP, M_WAITOK);
5331 else
5332 dulp->du_cn.cn_nameptr = dulp->du_smallname;
5333 if (!dulp->du_cn.cn_nameptr)
5334 return;
5335 dulp->du_cn.cn_namelen = du_namelen;
5336 snprintf(dulp->du_cn.cn_nameptr, du_namelen + 1, "._%s", name);
5337 dulp->du_cn.cn_nameptr[du_namelen] = '\0';
5338 dulp->du_cn.cn_nameiop = LOOKUP;
5339 dulp->du_cn.cn_flags = MAKEENTRY;
5340
5341 error = cache_lookup(NFSTOV(dnp), &du_vp, &dulp->du_cn);
5342 if (error == -1) {
5343 vnode_put(du_vp);
5344 } else if (!error) {
5345 struct nfsmount *nmp = NFSTONMP(dnp);
5346 if (nmp && (nmp->nm_vers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) {
5347 /* if rdirplus, try dir buf cache lookup */
5348 nfsnode_t du_np = NULL;
5349 if (!nfs_dir_buf_cache_lookup(dnp, &du_np, &dulp->du_cn, ctx, 0) && du_np) {
5350 /* dir buf cache hit */
5351 du_vp = NFSTOV(du_np);
5352 vnode_put(du_vp);
5353 error = -1;
5354 }
5355 }
5356 if (!error)
5357 dulp->du_flags |= NFS_DULOOKUP_DOIT;
5358 }
5359}
5360
5361/*
5362 * start an async "._" file lookup request
5363 */
5364void
5365nfs_dulookup_start(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
5366{
5367 struct nfsmount *nmp = NFSTONMP(dnp);
5368 struct nfsreq *req = &dulp->du_req;
5369
5370 if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT))
5371 return;
5372 if (!nmp->nm_funcs->nf_lookup_rpc_async(dnp, dulp->du_cn.cn_nameptr,
5373 dulp->du_cn.cn_namelen, ctx, &req))
5374 dulp->du_flags |= NFS_DULOOKUP_INPROG;
5375}
5376
5377/*
5378 * finish an async "._" file lookup request and clean up the structure
5379 */
5380void
5381nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
5382{
5383 struct nfsmount *nmp = NFSTONMP(dnp);
5384 int error;
5385 nfsnode_t du_np;
5386 u_int64_t xid;
5387 fhandle_t fh;
5388 struct nfs_vattr nvattr;
5389
5390 if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_INPROG))
5391 goto out;
5392
5393 error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, &dulp->du_req, &xid, &fh, &nvattr);
5394 dulp->du_flags &= ~NFS_DULOOKUP_INPROG;
5395 if (error == ENOENT) {
5396 /* add a negative entry in the name cache */
5397 nfs_node_lock_force(dnp);
5398 cache_enter(NFSTOV(dnp), NULL, &dulp->du_cn);
5399 dnp->n_flag |= NNEGNCENTRIES;
5400 nfs_node_unlock(dnp);
5401 } else if (!error) {
5402 error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len,
5403 &nvattr, &xid, NG_MAKEENTRY, &du_np);
5404 if (!error) {
5405 nfs_node_unlock(du_np);
5406 vnode_put(NFSTOV(du_np));
5407 }
5408 }
5409out:
5410 if (dulp->du_flags & NFS_DULOOKUP_INPROG)
5411 nfs_request_async_cancel(&dulp->du_req);
5412 if (dulp->du_cn.cn_nameptr && (dulp->du_cn.cn_nameptr != dulp->du_smallname))
5413 FREE(dulp->du_cn.cn_nameptr, M_TEMP);
5414}
5415
5416
5417/*
5418 * NFS Version 3 commit RPC
5419 */
5420int
5421nfs3_commit_rpc(
5422 nfsnode_t np,
5423 u_int64_t offset,
5424 u_int64_t count,
5425 kauth_cred_t cred)
5426{
5427 struct nfsmount *nmp;
5428 int error = 0, lockerror, status, wccpostattr = 0, nfsvers;
5429 struct timespec premtime = { 0, 0 };
5430 u_int64_t xid, wverf;
5431 uint32_t count32;
5432 struct nfsm_chain nmreq, nmrep;
5433
5434 nmp = NFSTONMP(np);
5435 FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
5436 if (!nmp)
5437 return (ENXIO);
5438 if (!(nmp->nm_state & NFSSTA_HASWRITEVERF))
5439 return (0);
5440 nfsvers = nmp->nm_vers;
5441
5442 if (count > UINT32_MAX)
5443 count32 = 0;
5444 else
5445 count32 = count;
5446
5447 nfsm_chain_null(&nmreq);
5448 nfsm_chain_null(&nmrep);
5449
5450 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
5451 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5452 nfsm_chain_add_64(error, &nmreq, offset);
5453 nfsm_chain_add_32(error, &nmreq, count32);
5454 nfsm_chain_build_done(error, &nmreq);
5455 nfsmout_if(error);
5456 error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT,
5457 current_thread(), cred, 0, &nmrep, &xid, &status);
5458 if ((lockerror = nfs_node_lock(np)))
5459 error = lockerror;
5460 /* can we do anything useful with the wcc info? */
5461 nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
5462 if (!lockerror)
5463 nfs_node_unlock(np);
5464 if (!error)
5465 error = status;
5466 nfsm_chain_get_64(error, &nmrep, wverf);
5467 nfsmout_if(error);
5468 lck_mtx_lock(&nmp->nm_lock);
5469 if (nmp->nm_verf != wverf) {
5470 nmp->nm_verf = wverf;
5471 error = NFSERR_STALEWRITEVERF;
5472 }
5473 lck_mtx_unlock(&nmp->nm_lock);
5474nfsmout:
5475 nfsm_chain_cleanup(&nmreq);
5476 nfsm_chain_cleanup(&nmrep);
5477 return (error);
5478}
5479
5480
5481int
5482nfs_vnop_blockmap(
5483 __unused struct vnop_blockmap_args /* {
5484 struct vnodeop_desc *a_desc;
5485 vnode_t a_vp;
5486 off_t a_foffset;
5487 size_t a_size;
5488 daddr64_t *a_bpn;
5489 size_t *a_run;
5490 void *a_poff;
5491 int a_flags;
5492 } */ *ap)
5493{
5494 return (ENOTSUP);
5495}
5496
5497/*
5498 * Mmap a file
5499 *
5500 * NB Currently unsupported.
5501 */
5502/*ARGSUSED*/
5503int
5504nfs_vnop_mmap(
5505 __unused struct vnop_mmap_args /* {
5506 struct vnodeop_desc *a_desc;
5507 vnode_t a_vp;
5508 int a_fflags;
5509 vfs_context_t a_context;
5510 } */ *ap)
5511{
5512 return (EINVAL);
5513}
5514
5515/*
5516 * fsync vnode op. Just call nfs_flush().
5517 */
5518/* ARGSUSED */
5519int
5520nfs_vnop_fsync(
5521 struct vnop_fsync_args /* {
5522 struct vnodeop_desc *a_desc;
5523 vnode_t a_vp;
5524 int a_waitfor;
5525 vfs_context_t a_context;
5526 } */ *ap)
5527{
5528 return (nfs_flush(VTONFS(ap->a_vp), ap->a_waitfor, vfs_context_thread(ap->a_context), 0));
5529}
5530
5531
5532/*
5533 * Do an NFS pathconf RPC.
5534 */
5535int
5536nfs3_pathconf_rpc(
5537 nfsnode_t np,
5538 struct nfs_fsattr *nfsap,
5539 vfs_context_t ctx)
5540{
5541 u_int64_t xid;
5542 int error = 0, lockerror, status, nfsvers;
5543 struct nfsm_chain nmreq, nmrep;
5544 struct nfsmount *nmp = NFSTONMP(np);
5545 uint32_t val = 0;
5546
5547 if (!nmp)
5548 return (ENXIO);
5549 nfsvers = nmp->nm_vers;
5550
5551 nfsm_chain_null(&nmreq);
5552 nfsm_chain_null(&nmrep);
5553
5554 /* fetch pathconf info from server */
5555 nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
5556 nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5557 nfsm_chain_build_done(error, &nmreq);
5558 nfsmout_if(error);
5559 error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx,
5560 &nmrep, &xid, &status);
5561 if ((lockerror = nfs_node_lock(np)))
5562 error = lockerror;
5563 nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
5564 if (!lockerror)
5565 nfs_node_unlock(np);
5566 if (!error)
5567 error = status;
5568 nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink);
5569 nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxname);
5570 nfsm_chain_get_32(error, &nmrep, val);
5571 if (val)
5572 nfsap->nfsa_flags |= NFS_FSFLAG_NO_TRUNC;
5573 nfsm_chain_get_32(error, &nmrep, val);
5574 if (val)
5575 nfsap->nfsa_flags |= NFS_FSFLAG_CHOWN_RESTRICTED;
5576 nfsm_chain_get_32(error, &nmrep, val);
5577 if (val)
5578 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_INSENSITIVE;
5579 nfsm_chain_get_32(error, &nmrep, val);
5580 if (val)
5581 nfsap->nfsa_flags |= NFS_FSFLAG_CASE_PRESERVING;
5582 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK);
5583 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME);
5584 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC);
5585 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
5586 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
5587 NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
5588nfsmout:
5589 nfsm_chain_cleanup(&nmreq);
5590 nfsm_chain_cleanup(&nmrep);
5591 return (error);
5592}
5593
5594/* save pathconf info for NFSv3 mount */
5595void
5596nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap)
5597{
5598 nmp->nm_fsattr.nfsa_maxlink = nfsap->nfsa_maxlink;
5599 nmp->nm_fsattr.nfsa_maxname = nfsap->nfsa_maxname;
5600 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC;
5601 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED;
5602 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE;
5603 nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING;
5604 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXLINK);
5605 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
5606 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_NO_TRUNC);
5607 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
5608 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
5609 NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
5610 nmp->nm_state |= NFSSTA_GOTPATHCONF;
5611}
5612
5613/*
5614 * Return POSIX pathconf information applicable to nfs.
5615 *
5616 * The NFS V2 protocol doesn't support this, so just return EINVAL
5617 * for V2.
5618 */
5619/* ARGSUSED */
5620int
5621nfs_vnop_pathconf(
5622 struct vnop_pathconf_args /* {
5623 struct vnodeop_desc *a_desc;
5624 vnode_t a_vp;
5625 int a_name;
5626 int32_t *a_retval;
5627 vfs_context_t a_context;
5628 } */ *ap)
5629{
5630 vnode_t vp = ap->a_vp;
5631 nfsnode_t np = VTONFS(vp);
5632 struct nfsmount *nmp;
5633 struct nfs_fsattr nfsa, *nfsap;
5634 int error = 0;
5635 uint64_t maxFileSize;
5636 uint nbits;
5637
5638 nmp = VTONMP(vp);
5639 if (!nmp)
5640 return (ENXIO);
5641
5642 switch (ap->a_name) {
5643 case _PC_LINK_MAX:
5644 case _PC_NAME_MAX:
5645 case _PC_CHOWN_RESTRICTED:
5646 case _PC_NO_TRUNC:
5647 case _PC_CASE_SENSITIVE:
5648 case _PC_CASE_PRESERVING:
5649 break;
5650 case _PC_FILESIZEBITS:
5651 if (nmp->nm_vers == NFS_VER2) {
5652 *ap->a_retval = 32;
5653 return (0);
5654 }
5655 break;
5656 default:
5657 /* don't bother contacting the server if we know the answer */
5658 return (EINVAL);
5659 }
5660
5661 if (nmp->nm_vers == NFS_VER2)
5662 return (EINVAL);
5663
5664 lck_mtx_lock(&nmp->nm_lock);
5665 if (nmp->nm_vers == NFS_VER3) {
5666 if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) {
5667 /* no pathconf info cached */
5668 lck_mtx_unlock(&nmp->nm_lock);
5669 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
5670 error = nfs3_pathconf_rpc(np, &nfsa, ap->a_context);
5671 if (error)
5672 return (error);
5673 nmp = VTONMP(vp);
5674 if (!nmp)
5675 return (ENXIO);
5676 lck_mtx_lock(&nmp->nm_lock);
5677 if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS) {
5678 /* all files have the same pathconf info, */
5679 /* so cache a copy of the results */
5680 nfs3_pathconf_cache(nmp, &nfsa);
5681 }
5682 nfsap = &nfsa;
5683 } else {
5684 nfsap = &nmp->nm_fsattr;
5685 }
5686 } else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
5687 /* no pathconf info cached */
5688 lck_mtx_unlock(&nmp->nm_lock);
5689 NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
5690 error = nfs4_pathconf_rpc(np, &nfsa, ap->a_context);
5691 if (error)
5692 return (error);
5693 nmp = VTONMP(vp);
5694 if (!nmp)
5695 return (ENXIO);
5696 lck_mtx_lock(&nmp->nm_lock);
5697 nfsap = &nfsa;
5698 } else {
5699 nfsap = &nmp->nm_fsattr;
5700 }
5701
5702 switch (ap->a_name) {
5703 case _PC_LINK_MAX:
5704 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK))
5705 *ap->a_retval = nfsap->nfsa_maxlink;
5706 else if ((nmp->nm_vers == NFS_VER4) && NFS_BITMAP_ISSET(np->n_vattr.nva_bitmap, NFS_FATTR_MAXLINK))
5707 *ap->a_retval = np->n_vattr.nva_maxlink;
5708 else
5709 error = EINVAL;
5710 break;
5711 case _PC_NAME_MAX:
5712 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME))
5713 *ap->a_retval = nfsap->nfsa_maxname;
5714 else
5715 error = EINVAL;
5716 break;
5717 case _PC_CHOWN_RESTRICTED:
5718 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED))
5719 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0;
5720 else
5721 error = EINVAL;
5722 break;
5723 case _PC_NO_TRUNC:
5724 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC))
5725 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0;
5726 else
5727 error = EINVAL;
5728 break;
5729 case _PC_CASE_SENSITIVE:
5730 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE))
5731 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1;
5732 else
5733 error = EINVAL;
5734 break;
5735 case _PC_CASE_PRESERVING:
5736 if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING))
5737 *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0;
5738 else
5739 error = EINVAL;
5740 break;
5741 case _PC_FILESIZEBITS:
5742 if (!NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
5743 *ap->a_retval = 64;
5744 error = 0;
5745 break;
5746 }
5747 maxFileSize = nfsap->nfsa_maxfilesize;
5748 nbits = 1;
5749 if (maxFileSize & 0xffffffff00000000ULL) {
5750 nbits += 32;
5751 maxFileSize >>= 32;
5752 }
5753 if (maxFileSize & 0xffff0000) {
5754 nbits += 16;
5755 maxFileSize >>= 16;
5756 }
5757 if (maxFileSize & 0xff00) {
5758 nbits += 8;
5759 maxFileSize >>= 8;
5760 }
5761 if (maxFileSize & 0xf0) {
5762 nbits += 4;
5763 maxFileSize >>= 4;
5764 }
5765 if (maxFileSize & 0xc) {
5766 nbits += 2;
5767 maxFileSize >>= 2;
5768 }
5769 if (maxFileSize & 0x2) {
5770 nbits += 1;
5771 }
5772 *ap->a_retval = nbits;
5773 break;
5774 default:
5775 error = EINVAL;
5776 }
5777
5778 lck_mtx_unlock(&nmp->nm_lock);
5779
5780 return (error);
5781}
5782
5783/*
5784 * Read wrapper for special devices.
5785 */
5786int
5787nfsspec_vnop_read(
5788 struct vnop_read_args /* {
5789 struct vnodeop_desc *a_desc;
5790 vnode_t a_vp;
5791 struct uio *a_uio;
5792 int a_ioflag;
5793 vfs_context_t a_context;
5794 } */ *ap)
5795{
5796 nfsnode_t np = VTONFS(ap->a_vp);
5797 struct timeval now;
5798 int error;
5799
5800 /*
5801 * Set access flag.
5802 */
5803 if ((error = nfs_node_lock(np)))
5804 return (error);
5805 np->n_flag |= NACC;
5806 microtime(&now);
5807 np->n_atim.tv_sec = now.tv_sec;
5808 np->n_atim.tv_nsec = now.tv_usec * 1000;
5809 nfs_node_unlock(np);
5810 return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap));
5811}
5812
5813/*
5814 * Write wrapper for special devices.
5815 */
5816int
5817nfsspec_vnop_write(
5818 struct vnop_write_args /* {
5819 struct vnodeop_desc *a_desc;
5820 vnode_t a_vp;
5821 struct uio *a_uio;
5822 int a_ioflag;
5823 vfs_context_t a_context;
5824 } */ *ap)
5825{
5826 nfsnode_t np = VTONFS(ap->a_vp);
5827 struct timeval now;
5828 int error;
5829
5830 /*
5831 * Set update flag.
5832 */
5833 if ((error = nfs_node_lock(np)))
5834 return (error);
5835 np->n_flag |= NUPD;
5836 microtime(&now);
5837 np->n_mtim.tv_sec = now.tv_sec;
5838 np->n_mtim.tv_nsec = now.tv_usec * 1000;
5839 nfs_node_unlock(np);
5840 return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap));
5841}
5842
5843/*
5844 * Close wrapper for special devices.
5845 *
5846 * Update the times on the nfsnode then do device close.
5847 */
5848int
5849nfsspec_vnop_close(
5850 struct vnop_close_args /* {
5851 struct vnodeop_desc *a_desc;
5852 vnode_t a_vp;
5853 int a_fflag;
5854 vfs_context_t a_context;
5855 } */ *ap)
5856{
5857 vnode_t vp = ap->a_vp;
5858 nfsnode_t np = VTONFS(vp);
5859 struct vnode_attr vattr;
5860 mount_t mp;
5861 int error;
5862
5863 if ((error = nfs_node_lock(np)))
5864 return (error);
5865 if (np->n_flag & (NACC | NUPD)) {
5866 np->n_flag |= NCHG;
5867 if (!vnode_isinuse(vp, 0) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
5868 VATTR_INIT(&vattr);
5869 if (np->n_flag & NACC) {
5870 vattr.va_access_time = np->n_atim;
5871 VATTR_SET_ACTIVE(&vattr, va_access_time);
5872 }
5873 if (np->n_flag & NUPD) {
5874 vattr.va_modify_time = np->n_mtim;
5875 VATTR_SET_ACTIVE(&vattr, va_modify_time);
5876 }
5877 nfs_node_unlock(np);
5878 vnode_setattr(vp, &vattr, ap->a_context);
5879 } else {
5880 nfs_node_unlock(np);
5881 }
5882 } else {
5883 nfs_node_unlock(np);
5884 }
5885 return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap));
5886}
5887
5888#if FIFO
5889extern vnop_t **fifo_vnodeop_p;
5890
5891/*
5892 * Read wrapper for fifos.
5893 */
5894int
5895nfsfifo_vnop_read(
5896 struct vnop_read_args /* {
5897 struct vnodeop_desc *a_desc;
5898 vnode_t a_vp;
5899 struct uio *a_uio;
5900 int a_ioflag;
5901 vfs_context_t a_context;
5902 } */ *ap)
5903{
5904 nfsnode_t np = VTONFS(ap->a_vp);
5905 struct timeval now;
5906 int error;
5907
5908 /*
5909 * Set access flag.
5910 */
5911 if ((error = nfs_node_lock(np)))
5912 return (error);
5913 np->n_flag |= NACC;
5914 microtime(&now);
5915 np->n_atim.tv_sec = now.tv_sec;
5916 np->n_atim.tv_nsec = now.tv_usec * 1000;
5917 nfs_node_unlock(np);
5918 return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap));
5919}
5920
5921/*
5922 * Write wrapper for fifos.
5923 */
5924int
5925nfsfifo_vnop_write(
5926 struct vnop_write_args /* {
5927 struct vnodeop_desc *a_desc;
5928 vnode_t a_vp;
5929 struct uio *a_uio;
5930 int a_ioflag;
5931 vfs_context_t a_context;
5932 } */ *ap)
5933{
5934 nfsnode_t np = VTONFS(ap->a_vp);
5935 struct timeval now;
5936 int error;
5937
5938 /*
5939 * Set update flag.
5940 */
5941 if ((error = nfs_node_lock(np)))
5942 return (error);
5943 np->n_flag |= NUPD;
5944 microtime(&now);
5945 np->n_mtim.tv_sec = now.tv_sec;
5946 np->n_mtim.tv_nsec = now.tv_usec * 1000;
5947 nfs_node_unlock(np);
5948 return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap));
5949}
5950
5951/*
5952 * Close wrapper for fifos.
5953 *
5954 * Update the times on the nfsnode then do fifo close.
5955 */
5956int
5957nfsfifo_vnop_close(
5958 struct vnop_close_args /* {
5959 struct vnodeop_desc *a_desc;
5960 vnode_t a_vp;
5961 int a_fflag;
5962 vfs_context_t a_context;
5963 } */ *ap)
5964{
5965 vnode_t vp = ap->a_vp;
5966 nfsnode_t np = VTONFS(vp);
5967 struct vnode_attr vattr;
5968 struct timeval now;
5969 mount_t mp;
5970 int error;
5971
5972 if ((error = nfs_node_lock(np)))
5973 return (error);
5974 if (np->n_flag & (NACC | NUPD)) {
5975 microtime(&now);
5976 if (np->n_flag & NACC) {
5977 np->n_atim.tv_sec = now.tv_sec;
5978 np->n_atim.tv_nsec = now.tv_usec * 1000;
5979 }
5980 if (np->n_flag & NUPD) {
5981 np->n_mtim.tv_sec = now.tv_sec;
5982 np->n_mtim.tv_nsec = now.tv_usec * 1000;
5983 }
5984 np->n_flag |= NCHG;
5985 if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
5986 VATTR_INIT(&vattr);
5987 if (np->n_flag & NACC) {
5988 vattr.va_access_time = np->n_atim;
5989 VATTR_SET_ACTIVE(&vattr, va_access_time);
5990 }
5991 if (np->n_flag & NUPD) {
5992 vattr.va_modify_time = np->n_mtim;
5993 VATTR_SET_ACTIVE(&vattr, va_modify_time);
5994 }
5995 nfs_node_unlock(np);
5996 vnode_setattr(vp, &vattr, ap->a_context);
5997 } else {
5998 nfs_node_unlock(np);
5999 }
6000 } else {
6001 nfs_node_unlock(np);
6002 }
6003 return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap));
6004}
6005#endif /* FIFO */
6006
6007/*ARGSUSED*/
6008int
6009nfs_vnop_ioctl(
6010 __unused struct vnop_ioctl_args /* {
6011 struct vnodeop_desc *a_desc;
6012 vnode_t a_vp;
6013 u_int32_t a_command;
6014 caddr_t a_data;
6015 int a_fflag;
6016 vfs_context_t a_context;
6017 } */ *ap)
6018{
6019
6020 /*
6021 * XXX we were once bogusly enoictl() which returned this (ENOTTY).
6022 * Probably we should return ENODEV.
6023 */
6024 return (ENOTTY);
6025}
6026
6027/*ARGSUSED*/
6028int
6029nfs_vnop_select(
6030 __unused struct vnop_select_args /* {
6031 struct vnodeop_desc *a_desc;
6032 vnode_t a_vp;
6033 int a_which;
6034 int a_fflags;
6035 void *a_wql;
6036 vfs_context_t a_context;
6037 } */ *ap)
6038{
6039
6040 /*
6041 * We were once bogusly seltrue() which returns 1. Is this right?
6042 */
6043 return (1);
6044}
6045
6046/*
6047 * vnode OP for pagein using UPL
6048 *
6049 * No buffer I/O, just RPCs straight into the mapped pages.
6050 */
6051int
6052nfs_vnop_pagein(
6053 struct vnop_pagein_args /* {
6054 struct vnodeop_desc *a_desc;
6055 vnode_t a_vp;
6056 upl_t a_pl;
6057 vm_offset_t a_pl_offset;
6058 off_t a_f_offset;
6059 size_t a_size;
6060 int a_flags;
6061 vfs_context_t a_context;
6062 } */ *ap)
6063{
6064 vnode_t vp = ap->a_vp;
6065 upl_t pl = ap->a_pl;
6066 size_t size = ap->a_size;
6067 off_t f_offset = ap->a_f_offset;
6068 vm_offset_t pl_offset = ap->a_pl_offset;
6069 int flags = ap->a_flags;
6070 thread_t thd;
6071 kauth_cred_t cred;
6072 nfsnode_t np = VTONFS(vp);
6073 size_t nmrsize, iosize, txsize, rxsize, retsize;
6074 off_t txoffset;
6075 struct nfsmount *nmp;
6076 int error = 0;
6077 vm_offset_t ioaddr, rxaddr;
6078 uio_t uio;
6079 char uio_buf [ UIO_SIZEOF(1) ];
6080 int nofreeupl = flags & UPL_NOCOMMIT;
6081 upl_page_info_t *plinfo;
6082#define MAXPAGINGREQS 16 /* max outstanding RPCs for pagein/pageout */
6083 struct nfsreq *req[MAXPAGINGREQS];
6084 int nextsend, nextwait;
6085 uint32_t stategenid = 0, restart = 0;
6086 kern_return_t kret;
6087
6088 FSDBG(322, np, f_offset, size, flags);
6089 if (pl == (upl_t)NULL)
6090 panic("nfs_pagein: no upl");
6091
6092 if (size <= 0) {
6093 printf("nfs_pagein: invalid size %ld", size);
6094 if (!nofreeupl)
6095 (void) ubc_upl_abort(pl, 0);
6096 return (EINVAL);
6097 }
6098 if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) {
6099 if (!nofreeupl)
6100 ubc_upl_abort_range(pl, pl_offset, size,
6101 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
6102 return (EINVAL);
6103 }
6104
6105 thd = vfs_context_thread(ap->a_context);
6106 cred = ubc_getcred(vp);
6107 if (!IS_VALID_CRED(cred))
6108 cred = vfs_context_ucred(ap->a_context);
6109
6110 uio = uio_createwithbuffer(1, f_offset, UIO_SYSSPACE, UIO_READ,
6111 &uio_buf, sizeof(uio_buf));
6112
6113 nmp = VTONMP(vp);
6114 if (!nmp) {
6115 if (!nofreeupl)
6116 ubc_upl_abort_range(pl, pl_offset, size,
6117 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
6118 return (ENXIO);
6119 }
6120 nmrsize = nmp->nm_rsize;
6121
6122 plinfo = ubc_upl_pageinfo(pl);
6123 kret = ubc_upl_map(pl, &ioaddr);
6124 if (kret != KERN_SUCCESS)
6125 panic("nfs_vnop_pagein: ubc_upl_map() failed with (%d)", kret);
6126 ioaddr += pl_offset;
6127
6128tryagain:
6129 if (nmp->nm_vers >= NFS_VER4)
6130 stategenid = nmp->nm_stategenid;
6131 txsize = rxsize = size;
6132 txoffset = f_offset;
6133 rxaddr = ioaddr;
6134
6135 bzero(req, sizeof(req));
6136 nextsend = nextwait = 0;
6137 do {
6138 /* send requests while we need to and have available slots */
6139 while ((txsize > 0) && (req[nextsend] == NULL)) {
6140 iosize = MIN(nmrsize, txsize);
6141 if ((error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, iosize, thd, cred, NULL, &req[nextsend]))) {
6142 req[nextsend] = NULL;
6143 break;
6144 }
6145 txoffset += iosize;
6146 txsize -= iosize;
6147 nextsend = (nextsend + 1) % MAXPAGINGREQS;
6148 }
6149 /* wait while we need to and break out if more requests to send */
6150 while ((rxsize > 0) && req[nextwait]) {
6151 iosize = retsize = MIN(nmrsize, rxsize);
6152 uio_reset(uio, uio_offset(uio), UIO_SYSSPACE, UIO_READ);
6153 uio_addiov(uio, CAST_USER_ADDR_T(rxaddr), iosize);
6154 FSDBG(322, uio_offset(uio), uio_resid(uio), rxaddr, rxsize);
6155#if UPL_DEBUG
6156 upl_ubc_alias_set(pl, (uintptr_t) current_thread(), (uintptr_t) 2);
6157#endif /* UPL_DEBUG */
6158 OSAddAtomic(1, &nfsstats.pageins);
6159 error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL);
6160 req[nextwait] = NULL;
6161 nextwait = (nextwait + 1) % MAXPAGINGREQS;
6162 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
6163 lck_mtx_lock(&nmp->nm_lock);
6164 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
6165 printf("nfs_vnop_pagein: error %d, initiating recovery\n", error);
6166 nmp->nm_state |= NFSSTA_RECOVER;
6167 nfs_mount_sock_thread_wake(nmp);
6168 }
6169 lck_mtx_unlock(&nmp->nm_lock);
6170 if (error == NFSERR_GRACE)
6171 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
6172 restart++;
6173 goto cancel;
6174 }
6175 if (error) {
6176 FSDBG(322, uio_offset(uio), uio_resid(uio), error, -1);
6177 break;
6178 }
6179 if (retsize < iosize) {
6180 /* Just zero fill the rest of the valid area. */
6181 int zcnt = iosize - retsize;
6182 bzero((char *)rxaddr + retsize, zcnt);
6183 FSDBG(324, uio_offset(uio), retsize, zcnt, rxaddr);
6184 uio_update(uio, zcnt);
6185 }
6186 rxaddr += iosize;
6187 rxsize -= iosize;
6188 if (txsize)
6189 break;
6190 }
6191 } while (!error && (txsize || rxsize));
6192
6193 restart = 0;
6194
6195 if (error) {
6196cancel:
6197 /* cancel any outstanding requests */
6198 while (req[nextwait]) {
6199 nfs_request_async_cancel(req[nextwait]);
6200 req[nextwait] = NULL;
6201 nextwait = (nextwait + 1) % MAXPAGINGREQS;
6202 }
6203 if (restart) {
6204 if ((restart <= nfs_mount_state_max_restarts(nmp)) && /* guard against no progress */
6205 (!(error = nfs_mount_state_wait_for_recovery(nmp))))
6206 goto tryagain;
6207 printf("nfs_pagein: too many restarts, aborting.\n");
6208 }
6209 }
6210
6211 ubc_upl_unmap(pl);
6212
6213 if (!nofreeupl) {
6214 if (error)
6215 ubc_upl_abort_range(pl, pl_offset, size,
6216 UPL_ABORT_ERROR |
6217 UPL_ABORT_FREE_ON_EMPTY);
6218 else
6219 ubc_upl_commit_range(pl, pl_offset, size,
6220 UPL_COMMIT_CLEAR_DIRTY |
6221 UPL_COMMIT_FREE_ON_EMPTY);
6222 }
6223 return (error);
6224}
6225
6226
6227/*
6228 * the following are needed only by nfs_pageout to know how to handle errors
6229 * see nfs_pageout comments on explanation of actions.
6230 * the errors here are copied from errno.h and errors returned by servers
6231 * are expected to match the same numbers here. If not, our actions maybe
6232 * erroneous.
6233 */
6234char nfs_pageouterrorhandler(int);
6235enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER};
6236#define NFS_ELAST 88
6237static u_char errorcount[NFS_ELAST+1]; /* better be zeros when initialized */
6238static const char errortooutcome[NFS_ELAST+1] = {
6239 NOACTION,
6240 DUMP, /* EPERM 1 Operation not permitted */
6241 DUMP, /* ENOENT 2 No such file or directory */
6242 DUMPANDLOG, /* ESRCH 3 No such process */
6243 RETRY, /* EINTR 4 Interrupted system call */
6244 DUMP, /* EIO 5 Input/output error */
6245 DUMP, /* ENXIO 6 Device not configured */
6246 DUMPANDLOG, /* E2BIG 7 Argument list too long */
6247 DUMPANDLOG, /* ENOEXEC 8 Exec format error */
6248 DUMPANDLOG, /* EBADF 9 Bad file descriptor */
6249 DUMPANDLOG, /* ECHILD 10 No child processes */
6250 DUMPANDLOG, /* EDEADLK 11 Resource deadlock avoided - was EAGAIN */
6251 RETRY, /* ENOMEM 12 Cannot allocate memory */
6252 DUMP, /* EACCES 13 Permission denied */
6253 DUMPANDLOG, /* EFAULT 14 Bad address */
6254 DUMPANDLOG, /* ENOTBLK 15 POSIX - Block device required */
6255 RETRY, /* EBUSY 16 Device busy */
6256 DUMP, /* EEXIST 17 File exists */
6257 DUMP, /* EXDEV 18 Cross-device link */
6258 DUMP, /* ENODEV 19 Operation not supported by device */
6259 DUMP, /* ENOTDIR 20 Not a directory */
6260 DUMP, /* EISDIR 21 Is a directory */
6261 DUMP, /* EINVAL 22 Invalid argument */
6262 DUMPANDLOG, /* ENFILE 23 Too many open files in system */
6263 DUMPANDLOG, /* EMFILE 24 Too many open files */
6264 DUMPANDLOG, /* ENOTTY 25 Inappropriate ioctl for device */
6265 DUMPANDLOG, /* ETXTBSY 26 Text file busy - POSIX */
6266 DUMP, /* EFBIG 27 File too large */
6267 DUMP, /* ENOSPC 28 No space left on device */
6268 DUMPANDLOG, /* ESPIPE 29 Illegal seek */
6269 DUMP, /* EROFS 30 Read-only file system */
6270 DUMP, /* EMLINK 31 Too many links */
6271 RETRY, /* EPIPE 32 Broken pipe */
6272 /* math software */
6273 DUMPANDLOG, /* EDOM 33 Numerical argument out of domain */
6274 DUMPANDLOG, /* ERANGE 34 Result too large */
6275 RETRY, /* EAGAIN/EWOULDBLOCK 35 Resource temporarily unavailable */
6276 DUMPANDLOG, /* EINPROGRESS 36 Operation now in progress */
6277 DUMPANDLOG, /* EALREADY 37 Operation already in progress */
6278 /* ipc/network software -- argument errors */
6279 DUMPANDLOG, /* ENOTSOC 38 Socket operation on non-socket */
6280 DUMPANDLOG, /* EDESTADDRREQ 39 Destination address required */
6281 DUMPANDLOG, /* EMSGSIZE 40 Message too long */
6282 DUMPANDLOG, /* EPROTOTYPE 41 Protocol wrong type for socket */
6283 DUMPANDLOG, /* ENOPROTOOPT 42 Protocol not available */
6284 DUMPANDLOG, /* EPROTONOSUPPORT 43 Protocol not supported */
6285 DUMPANDLOG, /* ESOCKTNOSUPPORT 44 Socket type not supported */
6286 DUMPANDLOG, /* ENOTSUP 45 Operation not supported */
6287 DUMPANDLOG, /* EPFNOSUPPORT 46 Protocol family not supported */
6288 DUMPANDLOG, /* EAFNOSUPPORT 47 Address family not supported by protocol family */
6289 DUMPANDLOG, /* EADDRINUSE 48 Address already in use */
6290 DUMPANDLOG, /* EADDRNOTAVAIL 49 Can't assign requested address */
6291 /* ipc/network software -- operational errors */
6292 RETRY, /* ENETDOWN 50 Network is down */
6293 RETRY, /* ENETUNREACH 51 Network is unreachable */
6294 RETRY, /* ENETRESET 52 Network dropped connection on reset */
6295 RETRY, /* ECONNABORTED 53 Software caused connection abort */
6296 RETRY, /* ECONNRESET 54 Connection reset by peer */
6297 RETRY, /* ENOBUFS 55 No buffer space available */
6298 RETRY, /* EISCONN 56 Socket is already connected */
6299 RETRY, /* ENOTCONN 57 Socket is not connected */
6300 RETRY, /* ESHUTDOWN 58 Can't send after socket shutdown */
6301 RETRY, /* ETOOMANYREFS 59 Too many references: can't splice */
6302 RETRY, /* ETIMEDOUT 60 Operation timed out */
6303 RETRY, /* ECONNREFUSED 61 Connection refused */
6304
6305 DUMPANDLOG, /* ELOOP 62 Too many levels of symbolic links */
6306 DUMP, /* ENAMETOOLONG 63 File name too long */
6307 RETRY, /* EHOSTDOWN 64 Host is down */
6308 RETRY, /* EHOSTUNREACH 65 No route to host */
6309 DUMP, /* ENOTEMPTY 66 Directory not empty */
6310 /* quotas & mush */
6311 DUMPANDLOG, /* PROCLIM 67 Too many processes */
6312 DUMPANDLOG, /* EUSERS 68 Too many users */
6313 DUMPANDLOG, /* EDQUOT 69 Disc quota exceeded */
6314 /* Network File System */
6315 DUMP, /* ESTALE 70 Stale NFS file handle */
6316 DUMP, /* EREMOTE 71 Too many levels of remote in path */
6317 DUMPANDLOG, /* EBADRPC 72 RPC struct is bad */
6318 DUMPANDLOG, /* ERPCMISMATCH 73 RPC version wrong */
6319 DUMPANDLOG, /* EPROGUNAVAIL 74 RPC prog. not avail */
6320 DUMPANDLOG, /* EPROGMISMATCH 75 Program version wrong */
6321 DUMPANDLOG, /* EPROCUNAVAIL 76 Bad procedure for program */
6322
6323 DUMPANDLOG, /* ENOLCK 77 No locks available */
6324 DUMPANDLOG, /* ENOSYS 78 Function not implemented */
6325 DUMPANDLOG, /* EFTYPE 79 Inappropriate file type or format */
6326 DUMPANDLOG, /* EAUTH 80 Authentication error */
6327 DUMPANDLOG, /* ENEEDAUTH 81 Need authenticator */
6328 /* Intelligent device errors */
6329 DUMPANDLOG, /* EPWROFF 82 Device power is off */
6330 DUMPANDLOG, /* EDEVERR 83 Device error, e.g. paper out */
6331 DUMPANDLOG, /* EOVERFLOW 84 Value too large to be stored in data type */
6332 /* Program loading errors */
6333 DUMPANDLOG, /* EBADEXEC 85 Bad executable */
6334 DUMPANDLOG, /* EBADARCH 86 Bad CPU type in executable */
6335 DUMPANDLOG, /* ESHLIBVERS 87 Shared library version mismatch */
6336 DUMPANDLOG, /* EBADMACHO 88 Malformed Macho file */
6337};
6338
6339char
6340nfs_pageouterrorhandler(int error)
6341{
6342 if (error > NFS_ELAST)
6343 return(DUMP);
6344 else
6345 return(errortooutcome[error]);
6346}
6347
6348
6349/*
6350 * vnode OP for pageout using UPL
6351 *
6352 * No buffer I/O, just RPCs straight from the mapped pages.
6353 * File size changes are not permitted in pageout.
6354 */
6355int
6356nfs_vnop_pageout(
6357 struct vnop_pageout_args /* {
6358 struct vnodeop_desc *a_desc;
6359 vnode_t a_vp;
6360 upl_t a_pl;
6361 vm_offset_t a_pl_offset;
6362 off_t a_f_offset;
6363 size_t a_size;
6364 int a_flags;
6365 vfs_context_t a_context;
6366 } */ *ap)
6367{
6368 vnode_t vp = ap->a_vp;
6369 upl_t pl = ap->a_pl;
6370 size_t size = ap->a_size;
6371 off_t f_offset = ap->a_f_offset;
6372 vm_offset_t pl_offset = ap->a_pl_offset;
6373 int flags = ap->a_flags;
6374 nfsnode_t np = VTONFS(vp);
6375 thread_t thd;
6376 kauth_cred_t cred;
6377 struct nfsbuf *bp;
6378 struct nfsmount *nmp = VTONMP(vp);
6379 daddr64_t lbn;
6380 int error = 0, iomode;
6381 off_t off, txoffset, rxoffset;
6382 vm_offset_t ioaddr, txaddr, rxaddr;
6383 uio_t auio;
6384 char uio_buf [ UIO_SIZEOF(1) ];
6385 int nofreeupl = flags & UPL_NOCOMMIT;
6386 size_t nmwsize, biosize, iosize, pgsize, txsize, rxsize, xsize, remsize;
6387 struct nfsreq *req[MAXPAGINGREQS];
6388 int nextsend, nextwait, wverfset, commit;
6389 uint64_t wverf, wverf2;
6390 uint32_t stategenid = 0, vrestart = 0, restart = 0, vrestarts = 0, restarts = 0;
6391 kern_return_t kret;
6392
6393 FSDBG(323, f_offset, size, pl, pl_offset);
6394
6395 if (pl == (upl_t)NULL)
6396 panic("nfs_pageout: no upl");
6397
6398 if (size <= 0) {
6399 printf("nfs_pageout: invalid size %ld", size);
6400 if (!nofreeupl)
6401 ubc_upl_abort(pl, 0);
6402 return (EINVAL);
6403 }
6404
6405 if (!nmp) {
6406 if (!nofreeupl)
6407 ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
6408 return (ENXIO);
6409 }
6410 biosize = nmp->nm_biosize;
6411 nmwsize = nmp->nm_wsize;
6412
6413 nfs_data_lock_noupdate(np, NFS_DATA_LOCK_SHARED);
6414
6415 /*
6416 * Check to see whether the buffer is incore.
6417 * If incore and not busy, invalidate it from the cache.
6418 */
6419 for (iosize = 0; iosize < size; iosize += xsize) {
6420 off = f_offset + iosize;
6421 /* need make sure we do things on block boundaries */
6422 xsize = biosize - (off % biosize);
6423 if (off + xsize > f_offset + size)
6424 xsize = f_offset + size - off;
6425 lbn = (daddr64_t)(off / biosize);
6426 lck_mtx_lock(nfs_buf_mutex);
6427 if ((bp = nfs_buf_incore(np, lbn))) {
6428 FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags);
6429 if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
6430 lck_mtx_unlock(nfs_buf_mutex);
6431 nfs_data_unlock_noupdate(np);
6432 /* no panic. just tell vm we are busy */
6433 if (!nofreeupl)
6434 ubc_upl_abort(pl, 0);
6435 return (EBUSY);
6436 }
6437 if (bp->nb_dirtyend > 0) {
6438 /*
6439 * if there's a dirty range in the buffer, check
6440 * to see if it extends beyond the pageout region
6441 *
6442 * if the dirty region lies completely within the
6443 * pageout region, we just invalidate the buffer
6444 * because it's all being written out now anyway.
6445 *
6446 * if any of the dirty region lies outside the
6447 * pageout region, we'll try to clip the dirty
6448 * region to eliminate the portion that's being
6449 * paged out. If that's not possible, because
6450 * the dirty region extends before and after the
6451 * pageout region, then we'll just return EBUSY.
6452 */
6453 off_t boff, start, end;
6454 boff = NBOFF(bp);
6455 start = off;
6456 end = off + xsize;
6457 /* clip end to EOF */
6458 if (end > (off_t)np->n_size)
6459 end = np->n_size;
6460 start -= boff;
6461 end -= boff;
6462 if ((bp->nb_dirtyoff < start) &&
6463 (bp->nb_dirtyend > end)) {
6464 /*
6465 * not gonna be able to clip the dirty region
6466 *
6467 * But before returning the bad news, move the
6468 * buffer to the start of the delwri list and
6469 * give the list a push to try to flush the
6470 * buffer out.
6471 */
6472 FSDBG(323, np, bp, 0xd00deebc, EBUSY);
6473 nfs_buf_remfree(bp);
6474 TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free);
6475 nfsbufdelwricnt++;
6476 nfs_buf_drop(bp);
6477 nfs_buf_delwri_push(1);
6478 lck_mtx_unlock(nfs_buf_mutex);
6479 nfs_data_unlock_noupdate(np);
6480 if (!nofreeupl)
6481 ubc_upl_abort(pl, 0);
6482 return (EBUSY);
6483 }
6484 if ((bp->nb_dirtyoff < start) ||
6485 (bp->nb_dirtyend > end)) {
6486 /* clip dirty region, if necessary */
6487 if (bp->nb_dirtyoff < start)
6488 bp->nb_dirtyend = min(bp->nb_dirtyend, start);
6489 if (bp->nb_dirtyend > end)
6490 bp->nb_dirtyoff = max(bp->nb_dirtyoff, end);
6491 FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
6492 /* we're leaving this block dirty */
6493 nfs_buf_drop(bp);
6494 lck_mtx_unlock(nfs_buf_mutex);
6495 continue;
6496 }
6497 }
6498 nfs_buf_remfree(bp);
6499 lck_mtx_unlock(nfs_buf_mutex);
6500 SET(bp->nb_flags, NB_INVAL);
6501 nfs_node_lock_force(np);
6502 if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
6503 CLR(bp->nb_flags, NB_NEEDCOMMIT);
6504 np->n_needcommitcnt--;
6505 CHECK_NEEDCOMMITCNT(np);
6506 }
6507 nfs_node_unlock(np);
6508 nfs_buf_release(bp, 1);
6509 } else {
6510 lck_mtx_unlock(nfs_buf_mutex);
6511 }
6512 }
6513
6514 thd = vfs_context_thread(ap->a_context);
6515 cred = ubc_getcred(vp);
6516 if (!IS_VALID_CRED(cred))
6517 cred = vfs_context_ucred(ap->a_context);
6518
6519 nfs_node_lock_force(np);
6520 if (np->n_flag & NWRITEERR) {
6521 error = np->n_error;
6522 nfs_node_unlock(np);
6523 nfs_data_unlock_noupdate(np);
6524 if (!nofreeupl)
6525 ubc_upl_abort_range(pl, pl_offset, size,
6526 UPL_ABORT_FREE_ON_EMPTY);
6527 return (error);
6528 }
6529 nfs_node_unlock(np);
6530
6531 if (f_offset < 0 || f_offset >= (off_t)np->n_size ||
6532 f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) {
6533 nfs_data_unlock_noupdate(np);
6534 if (!nofreeupl)
6535 ubc_upl_abort_range(pl, pl_offset, size,
6536 UPL_ABORT_FREE_ON_EMPTY);
6537 return (EINVAL);
6538 }
6539
6540 kret = ubc_upl_map(pl, &ioaddr);
6541 if (kret != KERN_SUCCESS)
6542 panic("nfs_vnop_pageout: ubc_upl_map() failed with (%d)", kret);
6543 ioaddr += pl_offset;
6544
6545 if ((u_quad_t)f_offset + size > np->n_size)
6546 xsize = np->n_size - f_offset;
6547 else
6548 xsize = size;
6549
6550 pgsize = round_page_64(xsize);
6551 if ((size > pgsize) && !nofreeupl)
6552 ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
6553 UPL_ABORT_FREE_ON_EMPTY);
6554
6555 /*
6556 * check for partial page and clear the
6557 * contents past end of the file before
6558 * releasing it in the VM page cache
6559 */
6560 if ((u_quad_t)f_offset < np->n_size && (u_quad_t)f_offset + size > np->n_size) {
6561 size_t io = np->n_size - f_offset;
6562 bzero((caddr_t)(ioaddr + io), size - io);
6563 FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
6564 }
6565 nfs_data_unlock_noupdate(np);
6566
6567 auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE,
6568 &uio_buf, sizeof(uio_buf));
6569
6570tryagain:
6571 if (nmp->nm_vers >= NFS_VER4)
6572 stategenid = nmp->nm_stategenid;
6573 wverf = wverf2 = wverfset = 0;
6574 txsize = rxsize = xsize;
6575 txoffset = rxoffset = f_offset;
6576 txaddr = rxaddr = ioaddr;
6577 commit = NFS_WRITE_FILESYNC;
6578
6579 bzero(req, sizeof(req));
6580 nextsend = nextwait = 0;
6581 do {
6582 /* send requests while we need to and have available slots */
6583 while ((txsize > 0) && (req[nextsend] == NULL)) {
6584 iosize = MIN(nmwsize, txsize);
6585 uio_reset(auio, txoffset, UIO_SYSSPACE, UIO_WRITE);
6586 uio_addiov(auio, CAST_USER_ADDR_T(txaddr), iosize);
6587 FSDBG(323, uio_offset(auio), iosize, txaddr, txsize);
6588 OSAddAtomic(1, &nfsstats.pageouts);
6589 nfs_node_lock_force(np);
6590 np->n_numoutput++;
6591 nfs_node_unlock(np);
6592 vnode_startwrite(vp);
6593 iomode = NFS_WRITE_UNSTABLE;
6594 if ((error = nmp->nm_funcs->nf_write_rpc_async(np, auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) {
6595 req[nextsend] = NULL;
6596 vnode_writedone(vp);
6597 nfs_node_lock_force(np);
6598 np->n_numoutput--;
6599 nfs_node_unlock(np);
6600 break;
6601 }
6602 txaddr += iosize;
6603 txoffset += iosize;
6604 txsize -= iosize;
6605 nextsend = (nextsend + 1) % MAXPAGINGREQS;
6606 }
6607 /* wait while we need to and break out if more requests to send */
6608 while ((rxsize > 0) && req[nextwait]) {
6609 iosize = remsize = MIN(nmwsize, rxsize);
6610 error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req[nextwait], &iomode, &iosize, &wverf2);
6611 req[nextwait] = NULL;
6612 nextwait = (nextwait + 1) % MAXPAGINGREQS;
6613 vnode_writedone(vp);
6614 nfs_node_lock_force(np);
6615 np->n_numoutput--;
6616 nfs_node_unlock(np);
6617 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
6618 lck_mtx_lock(&nmp->nm_lock);
6619 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
6620 printf("nfs_vnop_pageout: error %d, initiating recovery\n", error);
6621 nmp->nm_state |= NFSSTA_RECOVER;
6622 nfs_mount_sock_thread_wake(nmp);
6623 }
6624 lck_mtx_unlock(&nmp->nm_lock);
6625 if (error == NFSERR_GRACE)
6626 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
6627 restart = 1;
6628 goto cancel;
6629 }
6630 if (error) {
6631 FSDBG(323, rxoffset, rxsize, error, -1);
6632 break;
6633 }
6634 if (!wverfset) {
6635 wverf = wverf2;
6636 wverfset = 1;
6637 } else if (wverf != wverf2) {
6638 /* verifier changed, so we need to restart all the writes */
6639 vrestart = 1;
6640 goto cancel;
6641 }
6642 /* Retain the lowest commitment level returned. */
6643 if (iomode < commit)
6644 commit = iomode;
6645 rxaddr += iosize;
6646 rxoffset += iosize;
6647 rxsize -= iosize;
6648 remsize -= iosize;
6649 if (remsize > 0) {
6650 /* need to try sending the remainder */
6651 iosize = remsize;
6652 uio_reset(auio, rxoffset, UIO_SYSSPACE, UIO_WRITE);
6653 uio_addiov(auio, CAST_USER_ADDR_T(rxaddr), remsize);
6654 iomode = NFS_WRITE_UNSTABLE;
6655 error = nfs_write_rpc2(np, auio, thd, cred, &iomode, &wverf2);
6656 if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
6657 printf("nfs_vnop_pageout: restart: error %d\n", error);
6658 lck_mtx_lock(&nmp->nm_lock);
6659 if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
6660 printf("nfs_vnop_pageout: error %d, initiating recovery\n", error);
6661 nmp->nm_state |= NFSSTA_RECOVER;
6662 nfs_mount_sock_thread_wake(nmp);
6663 }
6664 lck_mtx_unlock(&nmp->nm_lock);
6665 if (error == NFSERR_GRACE)
6666 tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
6667 restart = 1;
6668 goto cancel;
6669 }
6670 if (error) {
6671 FSDBG(323, rxoffset, rxsize, error, -1);
6672 break;
6673 }
6674 if (wverf != wverf2) {
6675 /* verifier changed, so we need to restart all the writes */
6676 vrestart = 1;
6677 goto cancel;
6678 }
6679 if (iomode < commit)
6680 commit = iomode;
6681 rxaddr += iosize;
6682 rxoffset += iosize;
6683 rxsize -= iosize;
6684 }
6685 if (txsize)
6686 break;
6687 }
6688 } while (!error && (txsize || rxsize));
6689
6690 vrestart = 0;
6691
6692 if (!error && (commit != NFS_WRITE_FILESYNC)) {
6693 error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred);
6694 if (error == NFSERR_STALEWRITEVERF) {
6695 vrestart = 1;
6696 error = EIO;
6697 }
6698 }
6699
6700 if (error) {
6701cancel:
6702 /* cancel any outstanding requests */
6703 while (req[nextwait]) {
6704 nfs_request_async_cancel(req[nextwait]);
6705 req[nextwait] = NULL;
6706 nextwait = (nextwait + 1) % MAXPAGINGREQS;
6707 vnode_writedone(vp);
6708 nfs_node_lock_force(np);
6709 np->n_numoutput--;
6710 nfs_node_unlock(np);
6711 }
6712 if (vrestart) {
6713 if (++vrestarts <= 100) /* guard against no progress */
6714 goto tryagain;
6715 printf("nfs_pageout: too many restarts, aborting.\n");
6716 FSDBG(323, f_offset, xsize, ERESTART, -1);
6717 }
6718 if (restart) {
6719 if ((restarts <= nfs_mount_state_max_restarts(nmp)) && /* guard against no progress */
6720 (!(error = nfs_mount_state_wait_for_recovery(nmp))))
6721 goto tryagain;
6722 printf("nfs_pageout: too many restarts, aborting.\n");
6723 FSDBG(323, f_offset, xsize, ERESTART, -1);
6724 }
6725 }
6726
6727 ubc_upl_unmap(pl);
6728
6729 /*
6730 * We've had several different solutions on what to do when the pageout
6731 * gets an error. If we don't handle it, and return an error to the
6732 * caller, vm, it will retry . This can end in endless looping
6733 * between vm and here doing retries of the same page. Doing a dump
6734 * back to vm, will get it out of vm's knowledge and we lose whatever
6735 * data existed. This is risky, but in some cases necessary. For
6736 * example, the initial fix here was to do that for ESTALE. In that case
6737 * the server is telling us that the file is no longer the same. We
6738 * would not want to keep paging out to that. We also saw some 151
6739 * errors from Auspex server and NFSv3 can return errors higher than
6740 * ELAST. Those along with NFS known server errors we will "dump" from
6741 * vm. Errors we don't expect to occur, we dump and log for further
6742 * analysis. Errors that could be transient, networking ones,
6743 * we let vm "retry". Lastly, errors that we retry, but may have potential
6744 * to storm the network, we "retrywithsleep". "sever" will be used in
6745 * in the future to dump all pages of object for cases like ESTALE.
6746 * All this is the basis for the states returned and first guesses on
6747 * error handling. Tweaking expected as more statistics are gathered.
6748 * Note, in the long run we may need another more robust solution to
6749 * have some kind of persistant store when the vm cannot dump nor keep
6750 * retrying as a solution, but this would be a file architectural change
6751 */
6752 if (!nofreeupl) { /* otherwise stacked file system has to handle this */
6753 if (error) {
6754 int abortflags = 0;
6755 char action = nfs_pageouterrorhandler(error);
6756
6757 switch (action) {
6758 case DUMP:
6759 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
6760 break;
6761 case DUMPANDLOG:
6762 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
6763 if (error <= NFS_ELAST) {
6764 if ((errorcount[error] % 100) == 0)
6765 printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
6766 errorcount[error]++;
6767 }
6768 break;
6769 case RETRY:
6770 abortflags = UPL_ABORT_FREE_ON_EMPTY;
6771 break;
6772 case RETRYWITHSLEEP:
6773 abortflags = UPL_ABORT_FREE_ON_EMPTY;
6774 /* pri unused. PSOCK for placeholder. */
6775 tsleep(&lbolt, PSOCK, "nfspageout", 0);
6776 break;
6777 case SEVER: /* not implemented */
6778 default:
6779 printf("nfs_pageout: action %d not expected\n", action);
6780 break;
6781 }
6782
6783 ubc_upl_abort_range(pl, pl_offset, pgsize, abortflags);
6784 /* return error in all cases above */
6785
6786 } else {
6787 ubc_upl_commit_range(pl, pl_offset, pgsize,
6788 UPL_COMMIT_CLEAR_DIRTY |
6789 UPL_COMMIT_FREE_ON_EMPTY);
6790 }
6791 }
6792 return (error);
6793}
6794
6795/* Blktooff derives file offset given a logical block number */
6796int
6797nfs_vnop_blktooff(
6798 struct vnop_blktooff_args /* {
6799 struct vnodeop_desc *a_desc;
6800 vnode_t a_vp;
6801 daddr64_t a_lblkno;
6802 off_t *a_offset;
6803 } */ *ap)
6804{
6805 int biosize;
6806 vnode_t vp = ap->a_vp;
6807 struct nfsmount *nmp = VTONMP(vp);
6808
6809 if (!nmp)
6810 return (ENXIO);
6811 biosize = nmp->nm_biosize;
6812
6813 *ap->a_offset = (off_t)(ap->a_lblkno * biosize);
6814
6815 return (0);
6816}
6817
6818int
6819nfs_vnop_offtoblk(
6820 struct vnop_offtoblk_args /* {
6821 struct vnodeop_desc *a_desc;
6822 vnode_t a_vp;
6823 off_t a_offset;
6824 daddr64_t *a_lblkno;
6825 } */ *ap)
6826{
6827 int biosize;
6828 vnode_t vp = ap->a_vp;
6829 struct nfsmount *nmp = VTONMP(vp);
6830
6831 if (!nmp)
6832 return (ENXIO);
6833 biosize = nmp->nm_biosize;
6834
6835 *ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize);
6836
6837 return (0);
6838}
6839