]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_vnops.c
a34b11003181df6de395b3de42851722aa0fea77
[apple/xnu.git] / bsd / nfs / nfs_vnops.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
59 * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
60 */
61
62
63 /*
64 * vnode op calls for Sun NFS version 2 and 3
65 */
66
67 #include <sys/param.h>
68 #include <sys/kernel.h>
69 #include <sys/systm.h>
70 #include <sys/resourcevar.h>
71 #include <sys/proc.h>
72 #include <sys/mount.h>
73 #include <sys/buf.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/conf.h>
77 #include <sys/namei.h>
78 #include <sys/vnode.h>
79 #include <sys/dirent.h>
80 #include <sys/fcntl.h>
81 #include <sys/lockf.h>
82 #include <sys/ubc.h>
83
84 #include <ufs/ufs/dir.h>
85 #include <vfs/vfs_support.h>
86
87 #include <sys/vm.h>
88 #include <machine/spl.h>
89 #include <vm/vm_pageout.h>
90
91 #include <sys/time.h>
92 #include <kern/clock.h>
93
94 #include <miscfs/fifofs/fifo.h>
95 #include <miscfs/specfs/specdev.h>
96
97 #include <nfs/rpcv2.h>
98 #include <nfs/nfsproto.h>
99 #include <nfs/nfs.h>
100 #include <nfs/nfsnode.h>
101 #include <nfs/nfsmount.h>
102 #include <nfs/xdr_subs.h>
103 #include <nfs/nfsm_subs.h>
104 #include <nfs/nqnfs.h>
105
106 #include <net/if.h>
107 #include <netinet/in.h>
108 #include <netinet/in_var.h>
109 #include <kern/task.h>
110 #include <vm/vm_kern.h>
111
112 #include <sys/kdebug.h>
113
114 #define TRUE 1
115 #define FALSE 0
116
117 static int nfsspec_read __P((struct vop_read_args *));
118 static int nfsspec_write __P((struct vop_write_args *));
119 static int nfsfifo_read __P((struct vop_read_args *));
120 static int nfsfifo_write __P((struct vop_write_args *));
121 static int nfsspec_close __P((struct vop_close_args *));
122 static int nfsfifo_close __P((struct vop_close_args *));
123 #define nfs_poll vop_nopoll
124 static int nfs_ioctl __P((struct vop_ioctl_args *));
125 static int nfs_select __P((struct vop_select_args *));
126 static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
127 static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
128 static int nfs_lookup __P((struct vop_lookup_args *));
129 static int nfs_create __P((struct vop_create_args *));
130 static int nfs_mknod __P((struct vop_mknod_args *));
131 static int nfs_open __P((struct vop_open_args *));
132 static int nfs_close __P((struct vop_close_args *));
133 static int nfs_access __P((struct vop_access_args *));
134 static int nfs_getattr __P((struct vop_getattr_args *));
135 static int nfs_setattr __P((struct vop_setattr_args *));
136 static int nfs_read __P((struct vop_read_args *));
137 static int nfs_mmap __P((struct vop_mmap_args *));
138 static int nfs_fsync __P((struct vop_fsync_args *));
139 static int nfs_remove __P((struct vop_remove_args *));
140 static int nfs_link __P((struct vop_link_args *));
141 static int nfs_rename __P((struct vop_rename_args *));
142 static int nfs_mkdir __P((struct vop_mkdir_args *));
143 static int nfs_rmdir __P((struct vop_rmdir_args *));
144 static int nfs_symlink __P((struct vop_symlink_args *));
145 static int nfs_readdir __P((struct vop_readdir_args *));
146 static int nfs_bmap __P((struct vop_bmap_args *));
147 static int nfs_strategy __P((struct vop_strategy_args *));
148 static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **));
149 static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
150 static int nfsspec_access __P((struct vop_access_args *));
151 static int nfs_readlink __P((struct vop_readlink_args *));
152 static int nfs_print __P((struct vop_print_args *));
153 static int nfs_pathconf __P((struct vop_pathconf_args *));
154 static int nfs_advlock __P((struct vop_advlock_args *));
155 static int nfs_blkatoff __P((struct vop_blkatoff_args *));
156 static int nfs_bwrite __P((struct vop_bwrite_args *));
157 static int nfs_valloc __P((struct vop_valloc_args *));
158 static int nfs_vfree __P((struct vop_vfree_args *));
159 static int nfs_truncate __P((struct vop_truncate_args *));
160 static int nfs_update __P((struct vop_update_args *));
161 static int nfs_pagein __P((struct vop_pagein_args *));
162 static int nfs_pageout __P((struct vop_pageout_args *));
163 static int nfs_blktooff __P((struct vop_blktooff_args *));
164 static int nfs_offtoblk __P((struct vop_offtoblk_args *));
165 static int nfs_cmap __P((struct vop_cmap_args *));
166
167 /*
168 * Global vfs data structures for nfs
169 */
170 vop_t **nfsv2_vnodeop_p;
171 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
172 { &vop_default_desc, (vop_t *)vn_default_error },
173 { &vop_lookup_desc, (vop_t *)nfs_lookup }, /* lookup */
174 { &vop_create_desc, (vop_t *)nfs_create }, /* create */
175 { &vop_mknod_desc, (vop_t *)nfs_mknod }, /* mknod */
176 { &vop_open_desc, (vop_t *)nfs_open }, /* open */
177 { &vop_close_desc, (vop_t *)nfs_close }, /* close */
178 { &vop_access_desc, (vop_t *)nfs_access }, /* access */
179 { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */
180 { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */
181 { &vop_read_desc, (vop_t *)nfs_read }, /* read */
182 { &vop_write_desc, (vop_t *)nfs_write }, /* write */
183 { &vop_lease_desc, (vop_t *)nfs_lease_check }, /* lease */
184 { &vop_ioctl_desc, (vop_t *)nfs_ioctl }, /* ioctl */
185 { &vop_select_desc, (vop_t *)nfs_select }, /* select */
186 { &vop_revoke_desc, (vop_t *)nfs_revoke }, /* revoke */
187 { &vop_mmap_desc, (vop_t *)nfs_mmap }, /* mmap */
188 { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */
189 { &vop_seek_desc, (vop_t *)nfs_seek }, /* seek */
190 { &vop_remove_desc, (vop_t *)nfs_remove }, /* remove */
191 { &vop_link_desc, (vop_t *)nfs_link }, /* link */
192 { &vop_rename_desc, (vop_t *)nfs_rename }, /* rename */
193 { &vop_mkdir_desc, (vop_t *)nfs_mkdir }, /* mkdir */
194 { &vop_rmdir_desc, (vop_t *)nfs_rmdir }, /* rmdir */
195 { &vop_symlink_desc, (vop_t *)nfs_symlink }, /* symlink */
196 { &vop_readdir_desc, (vop_t *)nfs_readdir }, /* readdir */
197 { &vop_readlink_desc, (vop_t *)nfs_readlink }, /* readlink */
198 { &vop_abortop_desc, (vop_t *)nfs_abortop }, /* abortop */
199 { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */
200 { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */
201 { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */
202 { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */
203 { &vop_bmap_desc, (vop_t *)nfs_bmap }, /* bmap */
204 { &vop_strategy_desc, (vop_t *)nfs_strategy }, /* strategy */
205 { &vop_print_desc, (vop_t *)nfs_print }, /* print */
206 { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */
207 { &vop_pathconf_desc, (vop_t *)nfs_pathconf }, /* pathconf */
208 { &vop_advlock_desc, (vop_t *)nfs_advlock }, /* advlock */
209 { &vop_blkatoff_desc, (vop_t *)nfs_blkatoff }, /* blkatoff */
210 { &vop_valloc_desc, (vop_t *)nfs_valloc }, /* valloc */
211 { &vop_reallocblks_desc, (vop_t *)nfs_reallocblks }, /* reallocblks */
212 { &vop_vfree_desc, (vop_t *)nfs_vfree }, /* vfree */
213 { &vop_truncate_desc, (vop_t *)nfs_truncate }, /* truncate */
214 { &vop_update_desc, (vop_t *)nfs_update }, /* update */
215 { &vop_bwrite_desc, (vop_t *)nfs_bwrite }, /* bwrite */
216 { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */
217 { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */
218 { &vop_copyfile_desc, (vop_t *)err_copyfile }, /* Copyfile */
219 { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */
220 { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */
221 { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */
222 { NULL, NULL }
223 };
224 struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
225 { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
226 #ifdef __FreeBSD__
227 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
228 #endif
229
230 /*
231 * Special device vnode ops
232 */
233 vop_t **spec_nfsv2nodeop_p;
234 static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
235 { &vop_default_desc, (vop_t *)vn_default_error },
236 { &vop_lookup_desc, (vop_t *)spec_lookup }, /* lookup */
237 { &vop_create_desc, (vop_t *)spec_create }, /* create */
238 { &vop_mknod_desc, (vop_t *)spec_mknod }, /* mknod */
239 { &vop_open_desc, (vop_t *)spec_open }, /* open */
240 { &vop_close_desc, (vop_t *)nfsspec_close }, /* close */
241 { &vop_access_desc, (vop_t *)nfsspec_access }, /* access */
242 { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */
243 { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */
244 { &vop_read_desc, (vop_t *)nfsspec_read }, /* read */
245 { &vop_write_desc, (vop_t *)nfsspec_write }, /* write */
246 { &vop_lease_desc, (vop_t *)spec_lease_check }, /* lease */
247 { &vop_ioctl_desc, (vop_t *)spec_ioctl }, /* ioctl */
248 { &vop_select_desc, (vop_t *)spec_select }, /* select */
249 { &vop_revoke_desc, (vop_t *)spec_revoke }, /* revoke */
250 { &vop_mmap_desc, (vop_t *)spec_mmap }, /* mmap */
251 { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */
252 { &vop_seek_desc, (vop_t *)spec_seek }, /* seek */
253 { &vop_remove_desc, (vop_t *)spec_remove }, /* remove */
254 { &vop_link_desc, (vop_t *)spec_link }, /* link */
255 { &vop_rename_desc, (vop_t *)spec_rename }, /* rename */
256 { &vop_mkdir_desc, (vop_t *)spec_mkdir }, /* mkdir */
257 { &vop_rmdir_desc, (vop_t *)spec_rmdir }, /* rmdir */
258 { &vop_symlink_desc, (vop_t *)spec_symlink }, /* symlink */
259 { &vop_readdir_desc, (vop_t *)spec_readdir }, /* readdir */
260 { &vop_readlink_desc, (vop_t *)spec_readlink }, /* readlink */
261 { &vop_abortop_desc, (vop_t *)spec_abortop }, /* abortop */
262 { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */
263 { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */
264 { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */
265 { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */
266 { &vop_bmap_desc, (vop_t *)spec_bmap }, /* bmap */
267 { &vop_strategy_desc, (vop_t *)spec_strategy }, /* strategy */
268 { &vop_print_desc, (vop_t *)nfs_print }, /* print */
269 { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */
270 { &vop_pathconf_desc, (vop_t *)spec_pathconf }, /* pathconf */
271 { &vop_advlock_desc, (vop_t *)spec_advlock }, /* advlock */
272 { &vop_blkatoff_desc, (vop_t *)spec_blkatoff }, /* blkatoff */
273 { &vop_valloc_desc, (vop_t *)spec_valloc }, /* valloc */
274 { &vop_reallocblks_desc, (vop_t *)spec_reallocblks }, /* reallocblks */
275 { &vop_vfree_desc, (vop_t *)spec_vfree }, /* vfree */
276 { &vop_truncate_desc, (vop_t *)spec_truncate }, /* truncate */
277 { &vop_update_desc, (vop_t *)nfs_update }, /* update */
278 { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */
279 { &vop_devblocksize_desc, (vop_t *)spec_devblocksize }, /* devblocksize */
280 { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */
281 { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */
282 { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */
283 { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */
284 { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */
285 { NULL, NULL }
286 };
287 struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
288 { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
289 #ifdef __FreeBSD__
290 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
291 #endif
292
293 vop_t **fifo_nfsv2nodeop_p;
294 static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
295 { &vop_default_desc, (vop_t *)vn_default_error },
296 { &vop_lookup_desc, (vop_t *)fifo_lookup }, /* lookup */
297 { &vop_create_desc, (vop_t *)fifo_create }, /* create */
298 { &vop_mknod_desc, (vop_t *)fifo_mknod }, /* mknod */
299 { &vop_open_desc, (vop_t *)fifo_open }, /* open */
300 { &vop_close_desc, (vop_t *)nfsfifo_close }, /* close */
301 { &vop_access_desc, (vop_t *)nfsspec_access }, /* access */
302 { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */
303 { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */
304 { &vop_read_desc, (vop_t *)nfsfifo_read }, /* read */
305 { &vop_write_desc, (vop_t *)nfsfifo_write }, /* write */
306 { &vop_lease_desc, (vop_t *)fifo_lease_check }, /* lease */
307 { &vop_ioctl_desc, (vop_t *)fifo_ioctl }, /* ioctl */
308 { &vop_select_desc, (vop_t *)fifo_select }, /* select */
309 { &vop_revoke_desc, (vop_t *)fifo_revoke }, /* revoke */
310 { &vop_mmap_desc, (vop_t *)fifo_mmap }, /* mmap */
311 { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */
312 { &vop_seek_desc, (vop_t *)fifo_seek }, /* seek */
313 { &vop_remove_desc, (vop_t *)fifo_remove }, /* remove */
314 { &vop_link_desc, (vop_t *)fifo_link }, /* link */
315 { &vop_rename_desc, (vop_t *)fifo_rename }, /* rename */
316 { &vop_mkdir_desc, (vop_t *)fifo_mkdir }, /* mkdir */
317 { &vop_rmdir_desc, (vop_t *)fifo_rmdir }, /* rmdir */
318 { &vop_symlink_desc, (vop_t *)fifo_symlink }, /* symlink */
319 { &vop_readdir_desc, (vop_t *)fifo_readdir }, /* readdir */
320 { &vop_readlink_desc, (vop_t *)fifo_readlink }, /* readlink */
321 { &vop_abortop_desc, (vop_t *)fifo_abortop }, /* abortop */
322 { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */
323 { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */
324 { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */
325 { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */
326 { &vop_bmap_desc, (vop_t *)fifo_bmap }, /* bmap */
327 { &vop_strategy_desc, (vop_t *)fifo_strategy }, /* strategy */
328 { &vop_print_desc, (vop_t *)nfs_print }, /* print */
329 { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */
330 { &vop_pathconf_desc, (vop_t *)fifo_pathconf }, /* pathconf */
331 { &vop_advlock_desc, (vop_t *)fifo_advlock }, /* advlock */
332 { &vop_blkatoff_desc, (vop_t *)fifo_blkatoff }, /* blkatoff */
333 { &vop_valloc_desc, (vop_t *)fifo_valloc }, /* valloc */
334 { &vop_reallocblks_desc, (vop_t *)fifo_reallocblks }, /* reallocblks */
335 { &vop_vfree_desc, (vop_t *)fifo_vfree }, /* vfree */
336 { &vop_truncate_desc, (vop_t *)fifo_truncate }, /* truncate */
337 { &vop_update_desc, (vop_t *)nfs_update }, /* update */
338 { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */
339 { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */
340 { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */
341 { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */
342 { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */
343 { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */
344 { NULL, NULL }
345 };
346 struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
347 { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
348 #ifdef __FreeBSD__
349 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
350 #endif
351
352 static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
353 struct ucred *cred, struct proc *procp));
354 static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
355 struct componentname *cnp,
356 struct vattr *vap));
357 static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen,
358 struct ucred *cred, struct proc *proc));
359 static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr,
360 int fnamelen, struct vnode *tdvp,
361 char *tnameptr, int tnamelen,
362 struct ucred *cred, struct proc *proc));
363 static int nfs_renameit __P((struct vnode *sdvp,
364 struct componentname *scnp,
365 struct sillyrename *sp));
366
367 /*
368 * Global variables
369 */
370 extern u_long nfs_true, nfs_false;
371 extern struct nfsstats nfsstats;
372 extern nfstype nfsv3_type[9];
373 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
374 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
375 int nfs_numasync = 0;
376 #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
377
378 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
379 /* SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
380 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
381 */
382 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
383 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
384 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
385
386
387 /*
388 * the following are needed only by nfs_pageout to know how to handle errors
389 * see nfs_pageout comments on explanation of actions.
390 * the errors here are copied from errno.h and errors returned by servers
391 * are expected to match the same numbers here. If not, our actions maybe
392 * erroneous.
393 */
394 enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER};
395
396 static int errorcount[ELAST+1]; /* better be zeros when initialized */
397
398 static const short errortooutcome[ELAST+1] = {
399 NOACTION,
400 DUMP, /* EPERM 1 Operation not permitted */
401 DUMP, /* ENOENT 2 No such file or directory */
402 DUMPANDLOG, /* ESRCH 3 No such process */
403 RETRY, /* EINTR 4 Interrupted system call */
404 DUMP, /* EIO 5 Input/output error */
405 DUMP, /* ENXIO 6 Device not configured */
406 DUMPANDLOG, /* E2BIG 7 Argument list too long */
407 DUMPANDLOG, /* ENOEXEC 8 Exec format error */
408 DUMPANDLOG, /* EBADF 9 Bad file descriptor */
409 DUMPANDLOG, /* ECHILD 10 No child processes */
410 DUMPANDLOG, /* EDEADLK 11 Resource deadlock avoided - was EAGAIN */
411 RETRY, /* ENOMEM 12 Cannot allocate memory */
412 DUMP, /* EACCES 13 Permission denied */
413 DUMPANDLOG, /* EFAULT 14 Bad address */
414 DUMPANDLOG, /* ENOTBLK 15 POSIX - Block device required */
415 RETRY, /* EBUSY 16 Device busy */
416 DUMP, /* EEXIST 17 File exists */
417 DUMP, /* EXDEV 18 Cross-device link */
418 DUMP, /* ENODEV 19 Operation not supported by device */
419 DUMP, /* ENOTDIR 20 Not a directory */
420 DUMP, /* EISDIR 21 Is a directory */
421 DUMP, /* EINVAL 22 Invalid argument */
422 DUMPANDLOG, /* ENFILE 23 Too many open files in system */
423 DUMPANDLOG, /* EMFILE 24 Too many open files */
424 DUMPANDLOG, /* ENOTTY 25 Inappropriate ioctl for device */
425 DUMPANDLOG, /* ETXTBSY 26 Text file busy - POSIX */
426 DUMP, /* EFBIG 27 File too large */
427 DUMP, /* ENOSPC 28 No space left on device */
428 DUMPANDLOG, /* ESPIPE 29 Illegal seek */
429 DUMP, /* EROFS 30 Read-only file system */
430 DUMP, /* EMLINK 31 Too many links */
431 RETRY, /* EPIPE 32 Broken pipe */
432 /* math software */
433 DUMPANDLOG, /* EDOM 33 Numerical argument out of domain */
434 DUMPANDLOG, /* ERANGE 34 Result too large */
435 RETRY, /* EAGAIN/EWOULDBLOCK 35 Resource temporarily unavailable */
436 DUMPANDLOG, /* EINPROGRESS 36 Operation now in progress */
437 DUMPANDLOG, /* EALREADY 37 Operation already in progress */
438 /* ipc/network software -- argument errors */
439 DUMPANDLOG, /* ENOTSOC 38 Socket operation on non-socket */
440 DUMPANDLOG, /* EDESTADDRREQ 39 Destination address required */
441 DUMPANDLOG, /* EMSGSIZE 40 Message too long */
442 DUMPANDLOG, /* EPROTOTYPE 41 Protocol wrong type for socket */
443 DUMPANDLOG, /* ENOPROTOOPT 42 Protocol not available */
444 DUMPANDLOG, /* EPROTONOSUPPORT 43 Protocol not supported */
445 DUMPANDLOG, /* ESOCKTNOSUPPORT 44 Socket type not supported */
446 DUMPANDLOG, /* ENOTSUP 45 Operation not supported */
447 DUMPANDLOG, /* EPFNOSUPPORT 46 Protocol family not supported */
448 DUMPANDLOG, /* EAFNOSUPPORT 47 Address family not supported by protocol family */
449 DUMPANDLOG, /* EADDRINUSE 48 Address already in use */
450 DUMPANDLOG, /* EADDRNOTAVAIL 49 Can't assign requested address */
451 /* ipc/network software -- operational errors */
452 RETRY, /* ENETDOWN 50 Network is down */
453 RETRY, /* ENETUNREACH 51 Network is unreachable */
454 RETRY, /* ENETRESET 52 Network dropped connection on reset */
455 RETRY, /* ECONNABORTED 53 Software caused connection abort */
456 RETRY, /* ECONNRESET 54 Connection reset by peer */
457 RETRY, /* ENOBUFS 55 No buffer space available */
458 RETRY, /* EISCONN 56 Socket is already connected */
459 RETRY, /* ENOTCONN 57 Socket is not connected */
460 RETRY, /* ESHUTDOWN 58 Can't send after socket shutdown */
461 RETRY, /* ETOOMANYREFS 59 Too many references: can't splice */
462 RETRY, /* ETIMEDOUT 60 Operation timed out */
463 RETRY, /* ECONNREFUSED 61 Connection refused */
464
465 DUMPANDLOG, /* ELOOP 62 Too many levels of symbolic links */
466 DUMP, /* ENAMETOOLONG 63 File name too long */
467 RETRY, /* EHOSTDOWN 64 Host is down */
468 RETRY, /* EHOSTUNREACH 65 No route to host */
469 DUMP, /* ENOTEMPTY 66 Directory not empty */
470 /* quotas & mush */
471 DUMPANDLOG, /* PROCLIM 67 Too many processes */
472 DUMPANDLOG, /* EUSERS 68 Too many users */
473 DUMPANDLOG, /* EDQUOT 69 Disc quota exceeded */
474 /* Network File System */
475 DUMP, /* ESTALE 70 Stale NFS file handle */
476 DUMP, /* EREMOTE 71 Too many levels of remote in path */
477 DUMPANDLOG, /* EBADRPC 72 RPC struct is bad */
478 DUMPANDLOG, /* ERPCMISMATCH 73 RPC version wrong */
479 DUMPANDLOG, /* EPROGUNAVAIL 74 RPC prog. not avail */
480 DUMPANDLOG, /* EPROGMISMATCH 75 Program version wrong */
481 DUMPANDLOG, /* EPROCUNAVAIL 76 Bad procedure for program */
482
483 DUMPANDLOG, /* ENOLCK 77 No locks available */
484 DUMPANDLOG, /* ENOSYS 78 Function not implemented */
485 DUMPANDLOG, /* EFTYPE 79 Inappropriate file type or format */
486 DUMPANDLOG, /* EAUTH 80 Authentication error */
487 DUMPANDLOG, /* ENEEDAUTH 81 Need authenticator */
488 /* Intelligent device errors */
489 DUMPANDLOG, /* EPWROFF 82 Device power is off */
490 DUMPANDLOG, /* EDEVERR 83 Device error, e.g. paper out */
491 DUMPANDLOG, /* EOVERFLOW 84 Value too large to be stored in data type */
492 /* Program loading errors */
493 DUMPANDLOG, /* EBADEXEC 85 Bad executable */
494 DUMPANDLOG, /* EBADARCH 86 Bad CPU type in executable */
495 DUMPANDLOG, /* ESHLIBVERS 87 Shared library version mismatch */
496 DUMPANDLOG, /* EBADMACHO 88 Malformed Macho file */
497 };
498
499
500 static short
501 nfs_pageouterrorhandler(error)
502 int error;
503 {
504 if (error > ELAST)
505 return(DUMP);
506 else
507 return(errortooutcome[error]);
508 }
509
510 static int
511 nfs3_access_otw(struct vnode *vp,
512 int wmode,
513 struct proc *p,
514 struct ucred *cred)
515 {
516 const int v3 = 1;
517 u_int32_t *tl;
518 int error = 0, attrflag;
519
520 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
521 caddr_t bpos, dpos, cp2;
522 register int32_t t1, t2;
523 register caddr_t cp;
524 u_int32_t rmode;
525 struct nfsnode *np = VTONFS(vp);
526
527 nfsstats.rpccnt[NFSPROC_ACCESS]++;
528 nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
529 nfsm_fhtom(vp, v3);
530 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
531 *tl = txdr_unsigned(wmode);
532 nfsm_request(vp, NFSPROC_ACCESS, p, cred);
533 nfsm_postop_attr(vp, attrflag);
534 if (!error) {
535 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
536 rmode = fxdr_unsigned(u_int32_t, *tl);
537 np->n_mode = rmode;
538 np->n_modeuid = cred->cr_uid;
539 np->n_modestamp = time_second;
540 }
541 nfsm_reqdone;
542 return error;
543 }
544
545 /*
546 * nfs access vnode op.
547 * For nfs version 2, just return ok. File accesses may fail later.
548 * For nfs version 3, use the access rpc to check accessibility. If file modes
549 * are changed on the server, accesses might still fail later.
550 */
551 static int
552 nfs_access(ap)
553 struct vop_access_args /* {
554 struct vnode *a_vp;
555 int a_mode;
556 struct ucred *a_cred;
557 struct proc *a_p;
558 } */ *ap;
559 {
560 register struct vnode *vp = ap->a_vp;
561 int error = 0;
562 u_long mode, wmode;
563 int v3 = NFS_ISV3(vp);
564 struct nfsnode *np = VTONFS(vp);
565
566 /*
567 * For nfs v3, do an access rpc, otherwise you are stuck emulating
568 * ufs_access() locally using the vattr. This may not be correct,
569 * since the server may apply other access criteria such as
570 * client uid-->server uid mapping that we do not know about, but
571 * this is better than just returning anything that is lying about
572 * in the cache.
573 */
574 if (v3) {
575 if (ap->a_mode & VREAD)
576 mode = NFSV3ACCESS_READ;
577 else
578 mode = 0;
579 if (vp->v_type == VDIR) {
580 if (ap->a_mode & VWRITE)
581 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
582 NFSV3ACCESS_DELETE);
583 if (ap->a_mode & VEXEC)
584 mode |= NFSV3ACCESS_LOOKUP;
585 } else {
586 if (ap->a_mode & VWRITE)
587 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
588 if (ap->a_mode & VEXEC)
589 mode |= NFSV3ACCESS_EXECUTE;
590 }
591 /* XXX safety belt, only make blanket request if caching */
592 if (nfsaccess_cache_timeout > 0) {
593 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
594 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
595 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
596 } else
597 wmode = mode;
598
599 /*
600 * Does our cached result allow us to give a definite yes to
601 * this request?
602 */
603 if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
604 (ap->a_cred->cr_uid == np->n_modeuid) &&
605 ((np->n_mode & mode) == mode)) {
606 /* nfsstats.accesscache_hits++; */
607 } else {
608 /*
609 * Either a no, or a don't know. Go to the wire.
610 */
611 /* nfsstats.accesscache_misses++; */
612 error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
613 if (!error) {
614 if ((np->n_mode & mode) != mode)
615 error = EACCES;
616 }
617 }
618 } else
619 return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here */
620 /*
621 * Disallow write attempts on filesystems mounted read-only;
622 * unless the file is a socket, fifo, or a block or character
623 * device resident on the filesystem.
624 * CSM - moved EROFS check down per NetBSD rev 1.71. So you
625 * get the correct error value with layered filesystems.
626 * EKN - moved the return(error) below this so it does get called.
627 */
628 if (!error && (ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
629 switch (vp->v_type) {
630 case VREG: case VDIR: case VLNK:
631 error = EROFS;
632 default:
633 break;
634 }
635 }
636 return (error);
637 }
638
639 /*
640 * nfs open vnode op
641 * Check to see if the type is ok
642 * and that deletion is not in progress.
643 * For paged in text files, you will need to flush the page cache
644 * if consistency is lost.
645 */
646 /* ARGSUSED */
647 static int
648 nfs_open(ap)
649 struct vop_open_args /* {
650 struct vnode *a_vp;
651 int a_mode;
652 struct ucred *a_cred;
653 struct proc *a_p;
654 } */ *ap;
655 {
656 register struct vnode *vp = ap->a_vp;
657 struct nfsnode *np = VTONFS(vp);
658 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
659 struct vattr vattr;
660 int error;
661
662 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
663 { printf("open eacces vtyp=%d\n",vp->v_type);
664 return (EACCES);
665 }
666 /*
667 * Get a valid lease. If cached data is stale, flush it.
668 */
669 if (nmp->nm_flag & NFSMNT_NQNFS) {
670 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
671 do {
672 error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
673 ap->a_p);
674 } while (error == NQNFS_EXPIRED);
675 if (error)
676 return (error);
677 if (np->n_lrev != np->n_brev ||
678 (np->n_flag & NQNFSNONCACHE)) {
679 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
680 ap->a_p, 1)) == EINTR)
681 return (error);
682 np->n_brev = np->n_lrev;
683 }
684 }
685 } else {
686 if (np->n_flag & NMODIFIED) {
687 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
688 ap->a_p, 1)) == EINTR)
689 return (error);
690 np->n_attrstamp = 0;
691 if (vp->v_type == VDIR)
692 np->n_direofoffset = 0;
693 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
694 if (error)
695 return (error);
696 np->n_mtime = vattr.va_mtime.tv_sec;
697 } else {
698 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
699 if (error)
700 return (error);
701 if (np->n_mtime != vattr.va_mtime.tv_sec) {
702 if (vp->v_type == VDIR)
703 np->n_direofoffset = 0;
704 if ((error = nfs_vinvalbuf(vp, V_SAVE,
705 ap->a_cred, ap->a_p, 1)) == EINTR)
706 return (error);
707 np->n_mtime = vattr.va_mtime.tv_sec;
708 }
709 }
710 }
711 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
712 np->n_attrstamp = 0; /* For Open/Close consistency */
713 return (0);
714 }
715
716 /*
717 * nfs close vnode op
718 * What an NFS client should do upon close after writing is a debatable issue.
719 * Most NFS clients push delayed writes to the server upon close, basically for
720 * two reasons:
721 * 1 - So that any write errors may be reported back to the client process
722 * doing the close system call. By far the two most likely errors are
723 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
724 * 2 - To put a worst case upper bound on cache inconsistency between
725 * multiple clients for the file.
726 * There is also a consistency problem for Version 2 of the protocol w.r.t.
727 * not being able to tell if other clients are writing a file concurrently,
728 * since there is no way of knowing if the changed modify time in the reply
729 * is only due to the write for this client.
730 * (NFS Version 3 provides weak cache consistency data in the reply that
731 * should be sufficient to detect and handle this case.)
732 *
733 * The current code does the following:
734 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
735 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
736 * or commit them (this satisfies 1 and 2 except for the
737 * case where the server crashes after this close but
738 * before the commit RPC, which is felt to be "good
739 * enough". Changing the last argument to nfs_flush() to
740 * a 1 would force a commit operation, if it is felt a
741 * commit is necessary now.
742 * for NQNFS - do nothing now, since 2 is dealt with via leases and
743 * 1 should be dealt with via an fsync() system call for
744 * cases where write errors are important.
745 */
746 /* ARGSUSED */
747 static int
748 nfs_close(ap)
749 struct vop_close_args /* {
750 struct vnodeop_desc *a_desc;
751 struct vnode *a_vp;
752 int a_fflag;
753 struct ucred *a_cred;
754 struct proc *a_p;
755 } */ *ap;
756 {
757 register struct vnode *vp = ap->a_vp;
758 register struct nfsnode *np = VTONFS(vp);
759 int error = 0;
760
761 if (vp->v_type == VREG) {
762 #if DIAGNOSTIC
763 register struct sillyrename *sp = np->n_sillyrename;
764 if (sp)
765 kprintf("nfs_close: %s, dvp=%x, vp=%x, ap=%x, np=%x, sp=%x\n",
766 &sp->s_name[0], (unsigned)(sp->s_dvp), (unsigned)vp,
767 (unsigned)ap, (unsigned)np, (unsigned)sp);
768 #endif
769 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
770 (np->n_flag & NMODIFIED)) {
771 if (NFS_ISV3(vp)) {
772 error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
773 np->n_flag &= ~NMODIFIED;
774 } else
775 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
776 np->n_attrstamp = 0;
777 }
778 if (np->n_flag & NWRITEERR) {
779 np->n_flag &= ~NWRITEERR;
780 error = np->n_error;
781 }
782 }
783 return (error);
784 }
785
786 /*
787 * nfs getattr call from vfs.
788 */
789 static int
790 nfs_getattr(ap)
791 struct vop_getattr_args /* {
792 struct vnode *a_vp;
793 struct vattr *a_vap;
794 struct ucred *a_cred;
795 struct proc *a_p;
796 } */ *ap;
797 {
798 register struct vnode *vp = ap->a_vp;
799 register struct nfsnode *np = VTONFS(vp);
800 register caddr_t cp;
801 register u_long *tl;
802 register int t1, t2;
803 caddr_t bpos, dpos;
804 int error = 0;
805 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
806 int v3 = NFS_ISV3(vp);
807
808 /*
809 * Update local times for special files.
810 */
811 if (np->n_flag & (NACC | NUPD))
812 np->n_flag |= NCHG;
813
814 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_START,
815 (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0);
816
817 /*
818 * First look in the cache.
819 */
820 if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) {
821 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END,
822 (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0);
823
824 return (0);
825 }
826 if (error != ENOENT)
827 return (error);
828 error = 0;
829
830 if (v3 && nfsaccess_cache_timeout > 0) {
831 /* nfsstats.accesscache_misses++; */
832 if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred))
833 return (error);
834 if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0)
835 return (0);
836 if (error != ENOENT)
837 return (error);
838 error = 0;
839 }
840
841 nfsstats.rpccnt[NFSPROC_GETATTR]++;
842 nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
843 nfsm_fhtom(vp, v3);
844 nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
845 if (!error) {
846 nfsm_loadattr(vp, ap->a_vap);
847 if (np->n_mtime != ap->a_vap->va_mtime.tv_sec) {
848 NFSTRACE(NFSTRC_GA_INV, vp);
849 if (vp->v_type == VDIR)
850 nfs_invaldir(vp);
851 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
852 ap->a_p, 1);
853 if (!error) {
854 NFSTRACE(NFSTRC_GA_INV1, vp);
855 np->n_mtime = ap->a_vap->va_mtime.tv_sec;
856 } else {
857 NFSTRACE(NFSTRC_GA_INV2, error);
858 }
859 }
860 }
861 nfsm_reqdone;
862
863 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END,
864 (int)np->n_size, -1, (int)np->n_vattr.va_size, error, 0);
865
866 return (error);
867 }
868
869 /*
870 * nfs setattr call.
871 */
872 static int
873 nfs_setattr(ap)
874 struct vop_setattr_args /* {
875 struct vnodeop_desc *a_desc;
876 struct vnode *a_vp;
877 struct vattr *a_vap;
878 struct ucred *a_cred;
879 struct proc *a_p;
880 } */ *ap;
881 {
882 register struct vnode *vp = ap->a_vp;
883 register struct nfsnode *np = VTONFS(vp);
884 register struct vattr *vap = ap->a_vap;
885 int error = 0;
886 u_quad_t tsize;
887
888 #ifndef nolint
889 tsize = (u_quad_t)0;
890 #endif
891 /*
892 * Disallow write attempts if the filesystem is mounted read-only.
893 */
894 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
895 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
896 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
897 (vp->v_mount->mnt_flag & MNT_RDONLY))
898 return (EROFS);
899 if (vap->va_size != VNOVAL) {
900 switch (vp->v_type) {
901 case VDIR:
902 return (EISDIR);
903 case VCHR:
904 case VBLK:
905 case VSOCK:
906 case VFIFO:
907 if (vap->va_mtime.tv_sec == VNOVAL &&
908 vap->va_atime.tv_sec == VNOVAL &&
909 vap->va_mode == (u_short)VNOVAL &&
910 vap->va_uid == (uid_t)VNOVAL &&
911 vap->va_gid == (gid_t)VNOVAL)
912 return (0);
913 vap->va_size = VNOVAL;
914 break;
915 default:
916 /*
917 * Disallow write attempts if the filesystem is
918 * mounted read-only.
919 */
920 if (vp->v_mount->mnt_flag & MNT_RDONLY)
921 return (EROFS);
922 np->n_flag |= NMODIFIED;
923 tsize = np->n_size;
924
925 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_START,
926 (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, np->n_flag, 0);
927
928 if (vap->va_size == 0)
929 error = nfs_vinvalbuf(vp, 0,
930 ap->a_cred, ap->a_p, 1);
931 else
932 error = nfs_vinvalbuf(vp, V_SAVE,
933 ap->a_cred, ap->a_p, 1);
934
935 if (UBCISVALID(vp))
936 ubc_setsize(vp, (off_t)vap->va_size); /* XXX check error */
937
938 if (error) {
939 printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
940
941 #if DIAGNOSTIC
942 kprintf("nfs_setattr: nfs_vinvalbuf %d\n",
943 error);
944 #endif /* DIAGNOSTIC */
945 if (UBCISVALID(vp))
946 ubc_setsize(vp, (off_t)tsize); /* XXX check error */
947
948 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END,
949 (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, -1, 0);
950
951 return (error);
952 }
953 np->n_size = np->n_vattr.va_size = vap->va_size;
954
955 };
956 } else if ((vap->va_mtime.tv_sec != VNOVAL ||
957 vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
958 vp->v_type == VREG &&
959 (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
960 ap->a_p, 1)) == EINTR)
961 return (error);
962
963 error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
964
965 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END,
966 (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, error, 0);
967
968 if (error && vap->va_size != VNOVAL) {
969 /* make every effort to resync file size w/ server... */
970 int err = 0; /* preserve "error" for return */
971
972 printf("nfs_setattr: nfs_setattrrpc %d\n", error);
973 #if DIAGNOSTIC
974 kprintf("nfs_setattr: nfs_setattrrpc %d\n", error);
975 #endif /* DIAGNOSTIC */
976 np->n_size = np->n_vattr.va_size = tsize;
977 if (UBCISVALID(vp))
978 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
979 vap->va_size = tsize;
980 err = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
981
982 if (err)
983 printf("nfs_setattr1: nfs_setattrrpc %d\n", err);
984 #if DIAGNOSTIC
985 if (err)
986 kprintf("nfs_setattr nfs_setattrrpc %d\n", err);
987 #endif /* DIAGNOSTIC */
988 }
989 return (error);
990 }
991
992 /*
993 * Do an nfs setattr rpc.
994 */
995 static int
996 nfs_setattrrpc(vp, vap, cred, procp)
997 register struct vnode *vp;
998 register struct vattr *vap;
999 struct ucred *cred;
1000 struct proc *procp;
1001 {
1002 register struct nfsv2_sattr *sp;
1003 register caddr_t cp;
1004 register long t1, t2;
1005 caddr_t bpos, dpos, cp2;
1006 u_long *tl;
1007 int error = 0, wccflag = NFSV3_WCCRATTR;
1008 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1009 int v3 = NFS_ISV3(vp);
1010
1011 nfsstats.rpccnt[NFSPROC_SETATTR]++;
1012 nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
1013 nfsm_fhtom(vp, v3);
1014 if (v3) {
1015 if (vap->va_mode != (u_short)VNOVAL) {
1016 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1017 *tl++ = nfs_true;
1018 *tl = txdr_unsigned(vap->va_mode);
1019 } else {
1020 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1021 *tl = nfs_false;
1022 }
1023 if (vap->va_uid != (uid_t)VNOVAL) {
1024 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1025 *tl++ = nfs_true;
1026 *tl = txdr_unsigned(vap->va_uid);
1027 } else {
1028 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1029 *tl = nfs_false;
1030 }
1031 if (vap->va_gid != (gid_t)VNOVAL) {
1032 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1033 *tl++ = nfs_true;
1034 *tl = txdr_unsigned(vap->va_gid);
1035 } else {
1036 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1037 *tl = nfs_false;
1038 }
1039 if (vap->va_size != VNOVAL) {
1040 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1041 *tl++ = nfs_true;
1042 txdr_hyper(&vap->va_size, tl);
1043 } else {
1044 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1045 *tl = nfs_false;
1046 }
1047 if (vap->va_atime.tv_sec != VNOVAL) {
1048 if (vap->va_atime.tv_sec != time.tv_sec) {
1049 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1050 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
1051 txdr_nfsv3time(&vap->va_atime, tl);
1052 } else {
1053 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1054 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
1055 }
1056 } else {
1057 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1058 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
1059 }
1060 if (vap->va_mtime.tv_sec != VNOVAL) {
1061 if (vap->va_mtime.tv_sec != time.tv_sec) {
1062 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1063 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
1064 txdr_nfsv3time(&vap->va_mtime, tl);
1065 } else {
1066 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1067 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
1068 }
1069 } else {
1070 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1071 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
1072 }
1073 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1074 *tl = nfs_false;
1075 } else {
1076 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1077 if (vap->va_mode == (u_short)VNOVAL)
1078 sp->sa_mode = VNOVAL;
1079 else
1080 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
1081 if (vap->va_uid == (uid_t)VNOVAL)
1082 sp->sa_uid = VNOVAL;
1083 else
1084 sp->sa_uid = txdr_unsigned(vap->va_uid);
1085 if (vap->va_gid == (gid_t)VNOVAL)
1086 sp->sa_gid = VNOVAL;
1087 else
1088 sp->sa_gid = txdr_unsigned(vap->va_gid);
1089 sp->sa_size = txdr_unsigned(vap->va_size);
1090 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1091 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1092 }
1093 nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
1094 if (v3) {
1095 nfsm_wcc_data(vp, wccflag);
1096 if ((!wccflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */
1097 VTONFS(vp)->n_attrstamp = 0;
1098 } else
1099 nfsm_loadattr(vp, (struct vattr *)0);
1100 nfsm_reqdone;
1101 return (error);
1102 }
1103
1104 /*
1105 * nfs lookup call, one step at a time...
1106 * First look in cache
1107 * If not found, unlock the directory nfsnode and do the rpc
1108 */
1109 static int
1110 nfs_lookup(ap)
1111 struct vop_lookup_args /* {
1112 struct vnodeop_desc *a_desc;
1113 struct vnode *a_dvp;
1114 struct vnode **a_vpp;
1115 struct componentname *a_cnp;
1116 } */ *ap;
1117 {
1118 register struct componentname *cnp = ap->a_cnp;
1119 register struct vnode *dvp = ap->a_dvp;
1120 register struct vnode **vpp = ap->a_vpp;
1121 register int flags = cnp->cn_flags;
1122 register struct vnode *newvp;
1123 register u_long *tl;
1124 register caddr_t cp;
1125 register long t1, t2;
1126 struct nfsmount *nmp;
1127 caddr_t bpos, dpos, cp2;
1128 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1129 long len;
1130 nfsfh_t *fhp;
1131 struct nfsnode *np;
1132 int lockparent, wantparent, error = 0, attrflag, fhsize;
1133 int v3 = NFS_ISV3(dvp);
1134 struct proc *p = cnp->cn_proc;
1135 int worldbuildworkaround = 1;
1136
1137 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1138 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
1139 return (EROFS);
1140 *vpp = NULLVP;
1141 if (dvp->v_type != VDIR)
1142 return (ENOTDIR);
1143 lockparent = flags & LOCKPARENT;
1144 wantparent = flags & (LOCKPARENT|WANTPARENT);
1145 nmp = VFSTONFS(dvp->v_mount);
1146 np = VTONFS(dvp);
1147
1148 if (worldbuildworkaround) {
1149 /*
1150 * Temporary workaround for world builds to not have dvp go
1151 * VBAD on during server calls in this routine. When
1152 * the real ref counting problem is found take this out.
1153 * Note if this was later and before the nfsm_request
1154 * set up, the workaround did not work (NOTE other difference
1155 * was I only put one VREF in that time. Thus it needs
1156 * to be above the cache_lookup branch or with 2 VREFS. Not
1157 * sure which. Can't play with world builds right now to see
1158 * which. VOP_ACCESS could also make it go to server. - EKN
1159 */
1160 VREF(dvp); /* hang on to this dvp - EKN */
1161 VREF(dvp); /* hang on tight - EKN */
1162 }
1163
1164 if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
1165 struct vattr vattr;
1166 int vpid;
1167
1168 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p))) {
1169 *vpp = NULLVP;
1170 goto error_return;
1171 }
1172
1173 /* got to check to make sure the vnode didn't go away if access went to server */
1174 if ((*vpp)->v_type == VBAD) {
1175 error = EINVAL;
1176 goto error_return;
1177 }
1178
1179 newvp = *vpp;
1180 vpid = newvp->v_id;
1181 /*
1182 * See the comment starting `Step through' in ufs/ufs_lookup.c
1183 * for an explanation of the locking protocol
1184 */
1185 if (dvp == newvp) {
1186 VREF(newvp);
1187 error = 0;
1188 } else if (flags & ISDOTDOT) {
1189 VOP_UNLOCK(dvp, 0, p);
1190 error = vget(newvp, LK_EXCLUSIVE, p);
1191 if (!error && lockparent && (flags & ISLASTCN))
1192 error = vn_lock(dvp, LK_EXCLUSIVE, p);
1193 } else {
1194 error = vget(newvp, LK_EXCLUSIVE, p);
1195 if (!lockparent || error || !(flags & ISLASTCN))
1196 VOP_UNLOCK(dvp, 0, p);
1197 }
1198 if (!error) {
1199 if (vpid == newvp->v_id) {
1200 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
1201 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
1202 nfsstats.lookupcache_hits++;
1203 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1204 cnp->cn_flags |= SAVENAME;
1205 error = 0; /* ignore any from VOP_GETATTR */
1206 goto error_return;
1207 }
1208 cache_purge(newvp);
1209 }
1210 vput(newvp);
1211 if (lockparent && dvp != newvp && (flags & ISLASTCN))
1212 VOP_UNLOCK(dvp, 0, p);
1213 }
1214 error = vn_lock(dvp, LK_EXCLUSIVE, p);
1215 *vpp = NULLVP;
1216 if (error)
1217 goto error_return;
1218 }
1219
1220 /*
1221 * Got to check to make sure the vnode didn't go away if VOP_GETATTR went to server
1222 * or callers prior to this blocked and had it go VBAD.
1223 */
1224 if (dvp->v_type == VBAD) {
1225 error = EINVAL;
1226 goto error_return;
1227 }
1228
1229 error = 0;
1230 newvp = NULLVP;
1231 nfsstats.lookupcache_misses++;
1232 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
1233 len = cnp->cn_namelen;
1234 nfsm_reqhead(dvp, NFSPROC_LOOKUP,
1235 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
1236 nfsm_fhtom(dvp, v3);
1237 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1238 /* nfsm_request for NFSv2 causes you to goto to nfsmout upon errors */
1239 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
1240
1241 if (error) {
1242 nfsm_postop_attr(dvp, attrflag);
1243 m_freem(mrep);
1244 goto nfsmout;
1245 }
1246 nfsm_getfh(fhp, fhsize, v3);
1247
1248 /*
1249 * Handle RENAME case...
1250 */
1251 if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
1252 if (NFS_CMPFH(np, fhp, fhsize)) {
1253 m_freem(mrep);
1254 error = EISDIR;
1255 goto error_return;
1256 }
1257 if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) {
1258 m_freem(mrep);
1259 goto error_return;
1260 }
1261 newvp = NFSTOV(np);
1262 if (v3) {
1263 nfsm_postop_attr(newvp, attrflag);
1264 nfsm_postop_attr(dvp, attrflag);
1265 } else
1266 nfsm_loadattr(newvp, (struct vattr *)0);
1267 *vpp = newvp;
1268 m_freem(mrep);
1269 cnp->cn_flags |= SAVENAME;
1270 if (!lockparent)
1271 VOP_UNLOCK(dvp, 0, p);
1272 error = 0;
1273 goto error_return;
1274 }
1275
1276 if (flags & ISDOTDOT) {
1277 VOP_UNLOCK(dvp, 0, p);
1278 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
1279 if (error) {
1280 vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p);
1281 goto error_return;
1282 }
1283 newvp = NFSTOV(np);
1284 if (lockparent && (flags & ISLASTCN) &&
1285 (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
1286 vput(newvp);
1287 goto error_return;
1288 }
1289 } else if (NFS_CMPFH(np, fhp, fhsize)) {
1290 VREF(dvp);
1291 newvp = dvp;
1292 } else {
1293 if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) {
1294 m_freem(mrep);
1295 goto error_return;
1296 }
1297 if (!lockparent || !(flags & ISLASTCN))
1298 VOP_UNLOCK(dvp, 0, p);
1299 newvp = NFSTOV(np);
1300 }
1301 if (v3) {
1302 nfsm_postop_attr(newvp, attrflag);
1303 nfsm_postop_attr(dvp, attrflag);
1304 } else
1305 nfsm_loadattr(newvp, (struct vattr *)0);
1306 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1307 cnp->cn_flags |= SAVENAME;
1308 if ((cnp->cn_flags & MAKEENTRY) &&
1309 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
1310 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
1311 cache_enter(dvp, newvp, cnp);
1312 }
1313 *vpp = newvp;
1314 nfsm_reqdone;
1315 if (error) {
1316 if (newvp != NULLVP) {
1317 vrele(newvp);
1318 *vpp = NULLVP;
1319 }
1320 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
1321 (flags & ISLASTCN) && error == ENOENT) {
1322 if (!lockparent)
1323 VOP_UNLOCK(dvp, 0, p);
1324 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
1325 error = EROFS;
1326 else
1327 error = EJUSTRETURN;
1328 }
1329 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1330 cnp->cn_flags |= SAVENAME;
1331 }
1332 error_return:
1333 /*
1334 * These "vreles" set dvp refcounts back to where they were
1335 * before we took extra 2 VREFS to avoid VBAD vnode on dvp
1336 * during server calls for world builds. Remove when real
1337 * fix is found. - EKN
1338 */
1339 if (worldbuildworkaround) {
1340 vrele(dvp); /* end of hanging on tight to dvp - EKN */
1341 vrele(dvp); /* end of hanging on tight to dvp - EKN */
1342 }
1343
1344 return (error);
1345 }
1346
1347 /*
1348 * nfs read call.
1349 * Just call nfs_bioread() to do the work.
1350 */
1351 static int
1352 nfs_read(ap)
1353 struct vop_read_args /* {
1354 struct vnode *a_vp;
1355 struct uio *a_uio;
1356 int a_ioflag;
1357 struct ucred *a_cred;
1358 } */ *ap;
1359 {
1360 register struct vnode *vp = ap->a_vp;
1361
1362 if (vp->v_type != VREG)
1363 return (EPERM);
1364 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0));
1365 }
1366
1367 /*
1368 * nfs readlink call
1369 */
1370 static int
1371 nfs_readlink(ap)
1372 struct vop_readlink_args /* {
1373 struct vnode *a_vp;
1374 struct uio *a_uio;
1375 struct ucred *a_cred;
1376 } */ *ap;
1377 {
1378 register struct vnode *vp = ap->a_vp;
1379
1380 if (vp->v_type != VLNK)
1381 return (EPERM);
1382 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0));
1383 }
1384
1385 /*
1386 * Do a readlink rpc.
1387 * Called by nfs_doio() from below the buffer cache.
1388 */
1389 int
1390 nfs_readlinkrpc(vp, uiop, cred)
1391 register struct vnode *vp;
1392 struct uio *uiop;
1393 struct ucred *cred;
1394 {
1395 register u_long *tl;
1396 register caddr_t cp;
1397 register long t1, t2;
1398 caddr_t bpos, dpos, cp2;
1399 int error = 0, len, attrflag;
1400 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1401 int v3 = NFS_ISV3(vp);
1402
1403 nfsstats.rpccnt[NFSPROC_READLINK]++;
1404 nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1405 nfsm_fhtom(vp, v3);
1406 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
1407 if (v3)
1408 nfsm_postop_attr(vp, attrflag);
1409 if (!error) {
1410 nfsm_strsiz(len, NFS_MAXPATHLEN);
1411 if (len == NFS_MAXPATHLEN) {
1412 struct nfsnode *np = VTONFS(vp);
1413 #if DIAGNOSTIC
1414 if (!np)
1415 panic("nfs_readlinkrpc: null np");
1416 #endif
1417 if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1418 len = np->n_size;
1419 }
1420 nfsm_mtouio(uiop, len);
1421 }
1422 nfsm_reqdone;
1423 return (error);
1424 }
1425
1426 /*
1427 * nfs read rpc call
1428 * Ditto above
1429 */
1430 int
1431 nfs_readrpc(vp, uiop, cred)
1432 register struct vnode *vp;
1433 struct uio *uiop;
1434 struct ucred *cred;
1435 {
1436 register u_long *tl;
1437 register caddr_t cp;
1438 register long t1, t2;
1439 caddr_t bpos, dpos, cp2;
1440 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1441 struct nfsmount *nmp;
1442 int error = 0, len, retlen, tsiz, eof, attrflag;
1443 int v3 = NFS_ISV3(vp);
1444
1445 #ifndef nolint
1446 eof = 0;
1447 #endif
1448 nmp = VFSTONFS(vp->v_mount);
1449 tsiz = uiop->uio_resid;
1450 if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3)
1451 return (EFBIG);
1452 while (tsiz > 0) {
1453 nfsstats.rpccnt[NFSPROC_READ]++;
1454 len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1455 nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1456 nfsm_fhtom(vp, v3);
1457 nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3);
1458 if (v3) {
1459 txdr_hyper(&uiop->uio_offset, tl);
1460 *(tl + 2) = txdr_unsigned(len);
1461 } else {
1462 *tl++ = txdr_unsigned(uiop->uio_offset);
1463 *tl++ = txdr_unsigned(len);
1464 *tl = 0;
1465 }
1466 nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
1467 if (v3) {
1468 nfsm_postop_attr(vp, attrflag);
1469 if (error) {
1470 m_freem(mrep);
1471 goto nfsmout;
1472 }
1473 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
1474 eof = fxdr_unsigned(int, *(tl + 1));
1475 } else
1476 nfsm_loadattr(vp, (struct vattr *)0);
1477 nfsm_strsiz(retlen, nmp->nm_rsize);
1478 nfsm_mtouio(uiop, retlen);
1479 m_freem(mrep);
1480 tsiz -= retlen;
1481 if (v3) {
1482 if (eof || retlen == 0)
1483 tsiz = 0;
1484 } else if (retlen < len)
1485 tsiz = 0;
1486 }
1487 nfsmout:
1488 return (error);
1489 }
1490
1491 /*
1492 * nfs write call
1493 */
1494 int
1495 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
1496 register struct vnode *vp;
1497 register struct uio *uiop;
1498 struct ucred *cred;
1499 int *iomode, *must_commit;
1500 {
1501 register u_long *tl;
1502 register caddr_t cp;
1503 register int t1, t2, backup;
1504 caddr_t bpos, dpos, cp2;
1505 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1506 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1507 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1508 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1509
1510 #if DIAGNOSTIC
1511 if (uiop->uio_iovcnt != 1)
1512 panic("nfs_writerpc: iovcnt > 1");
1513 #endif
1514 *must_commit = 0;
1515 tsiz = uiop->uio_resid;
1516 if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3)
1517 return (EFBIG);
1518 while (tsiz > 0) {
1519 nfsstats.rpccnt[NFSPROC_WRITE]++;
1520 len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1521 nfsm_reqhead(vp, NFSPROC_WRITE,
1522 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1523 nfsm_fhtom(vp, v3);
1524 if (v3) {
1525 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1526 txdr_hyper(&uiop->uio_offset, tl);
1527 tl += 2;
1528 *tl++ = txdr_unsigned(len);
1529 *tl++ = txdr_unsigned(*iomode);
1530 } else {
1531 nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED);
1532 *++tl = txdr_unsigned(uiop->uio_offset);
1533 tl += 2;
1534 }
1535 *tl = txdr_unsigned(len);
1536 nfsm_uiotom(uiop, len);
1537 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
1538 if (v3) {
1539 wccflag = NFSV3_WCCCHK;
1540 nfsm_wcc_data(vp, wccflag);
1541 if (!error) {
1542 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED +
1543 NFSX_V3WRITEVERF);
1544 rlen = fxdr_unsigned(int, *tl++);
1545 if (rlen <= 0) {
1546 error = NFSERR_IO;
1547 break;
1548 } else if (rlen < len) {
1549 backup = len - rlen;
1550 uiop->uio_iov->iov_base -= backup;
1551 uiop->uio_iov->iov_len += backup;
1552 uiop->uio_offset -= backup;
1553 uiop->uio_resid += backup;
1554 len = rlen;
1555 }
1556 commit = fxdr_unsigned(int, *tl++);
1557
1558 /*
1559 * Return the lowest committment level
1560 * obtained by any of the RPCs.
1561 */
1562 if (committed == NFSV3WRITE_FILESYNC)
1563 committed = commit;
1564 else if (committed == NFSV3WRITE_DATASYNC &&
1565 commit == NFSV3WRITE_UNSTABLE)
1566 committed = commit;
1567 if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) {
1568 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1569 NFSX_V3WRITEVERF);
1570 nmp->nm_flag |= NFSMNT_HASWRITEVERF;
1571 } else if (bcmp((caddr_t)tl,
1572 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1573 *must_commit = 1;
1574 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1575 NFSX_V3WRITEVERF);
1576 }
1577 }
1578 } else
1579 nfsm_loadattr(vp, (struct vattr *)0);
1580 if ((wccflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */
1581 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
1582 m_freem(mrep);
1583 /*
1584 * we seem to have a case where we end up looping on shutdown and taking down nfs servers.
1585 * For V3, error cases, there is no way to terminate loop, if the len was 0, meaning,
1586 * nmp->nm_wsize was trashed. FreeBSD has this fix in it. Let's try it.
1587 */
1588 if (error)
1589 break;
1590 tsiz -= len;
1591 }
1592 nfsmout:
1593 /* does it make sense to even say it was committed if we had an error? EKN */
1594 /* okay well just don't on bad vnodes then. EINVAL will be returned on bad vnodes */
1595 if ((vp->v_type != VBAD) && (vp->v_mount->mnt_flag & MNT_ASYNC))
1596 committed = NFSV3WRITE_FILESYNC;
1597 *iomode = committed;
1598 if (error)
1599 uiop->uio_resid = tsiz;
1600 return (error);
1601 }
1602
1603 /*
1604 * nfs mknod rpc
1605 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1606 * mode set to specify the file type and the size field for rdev.
1607 */
1608 static int
1609 nfs_mknodrpc(dvp, vpp, cnp, vap)
1610 register struct vnode *dvp;
1611 register struct vnode **vpp;
1612 register struct componentname *cnp;
1613 register struct vattr *vap;
1614 {
1615 register struct nfsv2_sattr *sp;
1616 register struct nfsv3_sattr *sp3;
1617 register u_long *tl;
1618 register caddr_t cp;
1619 register long t1, t2;
1620 struct vnode *newvp = (struct vnode *)0;
1621 struct nfsnode *np = (struct nfsnode *)0;
1622 struct vattr vattr;
1623 char *cp2;
1624 caddr_t bpos, dpos;
1625 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1626 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1627 u_long rdev;
1628 int v3 = NFS_ISV3(dvp);
1629
1630 if (vap->va_type == VCHR || vap->va_type == VBLK)
1631 rdev = txdr_unsigned(vap->va_rdev);
1632 else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1633 rdev = 0xffffffff;
1634 else {
1635 VOP_ABORTOP(dvp, cnp);
1636 vput(dvp);
1637 return (EOPNOTSUPP);
1638 }
1639 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
1640 VOP_ABORTOP(dvp, cnp);
1641 vput(dvp);
1642 return (error);
1643 }
1644 nfsstats.rpccnt[NFSPROC_MKNOD]++;
1645 nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1646 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1647 nfsm_fhtom(dvp, v3);
1648 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1649 if (v3) {
1650 nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR);
1651 *tl++ = vtonfsv3_type(vap->va_type);
1652 sp3 = (struct nfsv3_sattr *)tl;
1653 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
1654 if (vap->va_type == VCHR || vap->va_type == VBLK) {
1655 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1656 *tl++ = txdr_unsigned(major(vap->va_rdev));
1657 *tl = txdr_unsigned(minor(vap->va_rdev));
1658 }
1659 } else {
1660 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1661 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1662 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
1663 sp->sa_gid = txdr_unsigned(vattr.va_gid);
1664 sp->sa_size = rdev;
1665 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1666 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1667 }
1668 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
1669 if (!error) {
1670 nfsm_mtofh(dvp, newvp, v3, gotvp);
1671 if (!gotvp) {
1672 if (newvp) {
1673 vput(newvp);
1674 newvp = (struct vnode *)0;
1675 }
1676 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1677 cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1678 if (!error)
1679 newvp = NFSTOV(np);
1680 }
1681 }
1682 if (v3)
1683 nfsm_wcc_data(dvp, wccflag);
1684 nfsm_reqdone;
1685 if (error) {
1686 if (newvp)
1687 vput(newvp);
1688 } else {
1689 if (cnp->cn_flags & MAKEENTRY)
1690 cache_enter(dvp, newvp, cnp);
1691 *vpp = newvp;
1692 }
1693 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
1694 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
1695 VTONFS(dvp)->n_flag |= NMODIFIED;
1696 if (!wccflag)
1697 VTONFS(dvp)->n_attrstamp = 0;
1698 }
1699 vput(dvp);
1700 return (error);
1701 }
1702
1703 /*
1704 * nfs mknod vop
1705 * just call nfs_mknodrpc() to do the work.
1706 */
1707 /* ARGSUSED */
1708 static int
1709 nfs_mknod(ap)
1710 struct vop_mknod_args /* {
1711 struct vnode *a_dvp;
1712 struct vnode **a_vpp;
1713 struct componentname *a_cnp;
1714 struct vattr *a_vap;
1715 } */ *ap;
1716 {
1717 struct vnode *newvp;
1718 int error;
1719
1720 error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
1721 if (!error)
1722 vput(newvp);
1723 return (error);
1724 }
1725
1726 static u_long create_verf;
1727 /*
1728 * nfs file create call
1729 */
1730 static int
1731 nfs_create(ap)
1732 struct vop_create_args /* {
1733 struct vnode *a_dvp;
1734 struct vnode **a_vpp;
1735 struct componentname *a_cnp;
1736 struct vattr *a_vap;
1737 } */ *ap;
1738 {
1739 register struct vnode *dvp = ap->a_dvp;
1740 register struct vattr *vap = ap->a_vap;
1741 register struct componentname *cnp = ap->a_cnp;
1742 register struct nfsv2_sattr *sp;
1743 register struct nfsv3_sattr *sp3;
1744 register u_long *tl;
1745 register caddr_t cp;
1746 register long t1, t2;
1747 struct nfsnode *np = (struct nfsnode *)0;
1748 struct vnode *newvp = (struct vnode *)0;
1749 caddr_t bpos, dpos, cp2;
1750 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1751 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1752 struct vattr vattr;
1753 int v3 = NFS_ISV3(dvp);
1754
1755 /*
1756 * Oops, not for me..
1757 */
1758 if (vap->va_type == VSOCK)
1759 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1760
1761 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
1762 VOP_ABORTOP(dvp, cnp);
1763 vput(dvp);
1764 return (error);
1765 }
1766 if (vap->va_vaflags & VA_EXCLUSIVE)
1767 fmode |= O_EXCL;
1768 again:
1769 nfsstats.rpccnt[NFSPROC_CREATE]++;
1770 nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1771 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1772 nfsm_fhtom(dvp, v3);
1773 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1774 if (v3) {
1775 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1776 if (fmode & O_EXCL) {
1777 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1778 nfsm_build(tl, u_long *, NFSX_V3CREATEVERF);
1779 if (!TAILQ_EMPTY(&in_ifaddrhead))
1780 *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
1781 else
1782 *tl++ = create_verf;
1783 *tl = ++create_verf;
1784 } else {
1785 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1786 nfsm_build(tl, u_long *, NFSX_V3SRVSATTR);
1787 sp3 = (struct nfsv3_sattr *)tl;
1788 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
1789 }
1790 } else {
1791 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1792 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1793 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
1794 sp->sa_gid = txdr_unsigned(vattr.va_gid);
1795 sp->sa_size = 0;
1796 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1797 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1798 }
1799 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
1800 if (!error) {
1801 nfsm_mtofh(dvp, newvp, v3, gotvp);
1802 if (!gotvp) {
1803 if (newvp) {
1804 vput(newvp);
1805 newvp = (struct vnode *)0;
1806 }
1807 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1808 cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1809 if (!error)
1810 newvp = NFSTOV(np);
1811 }
1812 }
1813 if (v3)
1814 nfsm_wcc_data(dvp, wccflag);
1815 nfsm_reqdone;
1816 if (error) {
1817 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1818 fmode &= ~O_EXCL;
1819 goto again;
1820 }
1821 if (newvp)
1822 vput(newvp);
1823 } else if (v3 && (fmode & O_EXCL))
1824 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
1825 if (!error) {
1826 if (cnp->cn_flags & MAKEENTRY)
1827 cache_enter(dvp, newvp, cnp);
1828 *ap->a_vpp = newvp;
1829 }
1830 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
1831 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
1832 VTONFS(dvp)->n_flag |= NMODIFIED;
1833 if (!wccflag)
1834 VTONFS(dvp)->n_attrstamp = 0;
1835 }
1836 vput(dvp);
1837 return (error);
1838 }
1839
1840 /*
1841 * nfs file remove call
1842 * To try and make nfs semantics closer to ufs semantics, a file that has
1843 * other processes using the vnode is renamed instead of removed and then
1844 * removed later on the last close.
1845 * - If v_usecount > 1
1846 * If a rename is not already in the works
1847 * call nfs_sillyrename() to set it up
1848 * else
1849 * do the remove rpc
1850 */
1851 static int
1852 nfs_remove(ap)
1853 struct vop_remove_args /* {
1854 struct vnodeop_desc *a_desc;
1855 struct vnode * a_dvp;
1856 struct vnode * a_vp;
1857 struct componentname * a_cnp;
1858 } */ *ap;
1859 {
1860 register struct vnode *vp = ap->a_vp;
1861 register struct vnode *dvp = ap->a_dvp;
1862 register struct componentname *cnp = ap->a_cnp;
1863 register struct nfsnode *np = VTONFS(vp);
1864 int error = 0;
1865 struct vattr vattr;
1866 int file_deleted = 0;
1867
1868 #if DIAGNOSTIC
1869 if ((cnp->cn_flags & HASBUF) == 0)
1870 panic("nfs_remove: no name");
1871 if (vp->v_usecount < 1)
1872 panic("nfs_remove: bad v_usecount");
1873 #endif
1874 if (vp->v_usecount == 1 ||
1875 (UBCISVALID(vp)&&(vp->v_usecount==2)) ||
1876 (np->n_sillyrename &&
1877 VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
1878 vattr.va_nlink > 1)) {
1879 /*
1880 * Purge the name cache so that the chance of a lookup for
1881 * the name succeeding while the remove is in progress is
1882 * minimized. Without node locking it can still happen, such
1883 * that an I/O op returns ESTALE, but since you get this if
1884 * another host removes the file..
1885 */
1886 cache_purge(vp);
1887 /*
1888 * throw away biocache buffers, mainly to avoid
1889 * unnecessary delayed writes later.
1890 */
1891 error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
1892 ubc_setsize(vp, (off_t)0);
1893 /* Do the rpc */
1894 if (error != EINTR)
1895 error = nfs_removerpc(dvp, cnp->cn_nameptr,
1896 cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
1897 /*
1898 * Kludge City: If the first reply to the remove rpc is lost..
1899 * the reply to the retransmitted request will be ENOENT
1900 * since the file was in fact removed
1901 * Therefore, we cheat and return success.
1902 */
1903 if (error == ENOENT)
1904 error = 0;
1905 file_deleted = 1;
1906 } else if (!np->n_sillyrename) {
1907 error = nfs_sillyrename(dvp, vp, cnp);
1908 }
1909
1910 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
1911 np->n_attrstamp = 0;
1912 vput(dvp);
1913
1914 VOP_UNLOCK(vp, 0, cnp->cn_proc);
1915
1916 if (file_deleted)
1917 ubc_uncache(vp);
1918
1919 vrele(vp);
1920
1921 return (error);
1922 }
1923
1924 /*
1925 * nfs file remove rpc called from nfs_inactive
1926 */
1927 int
1928 nfs_removeit(sp)
1929 register struct sillyrename *sp;
1930 {
1931
1932 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1933 (struct proc *)0));
1934 }
1935
1936 /*
1937 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1938 */
1939 static int
1940 nfs_removerpc(dvp, name, namelen, cred, proc)
1941 register struct vnode *dvp;
1942 char *name;
1943 int namelen;
1944 struct ucred *cred;
1945 struct proc *proc;
1946 {
1947 register u_long *tl;
1948 register caddr_t cp;
1949 register long t1, t2;
1950 caddr_t bpos, dpos, cp2;
1951 int error = 0, wccflag = NFSV3_WCCRATTR;
1952 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1953 int v3 = NFS_ISV3(dvp);
1954
1955 nfsstats.rpccnt[NFSPROC_REMOVE]++;
1956 nfsm_reqhead(dvp, NFSPROC_REMOVE,
1957 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1958 nfsm_fhtom(dvp, v3);
1959 nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1960 nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
1961 if (v3)
1962 nfsm_wcc_data(dvp, wccflag);
1963 nfsm_reqdone;
1964 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
1965 VTONFS(dvp)->n_flag |= NMODIFIED;
1966 if (!wccflag)
1967 VTONFS(dvp)->n_attrstamp = 0;
1968 }
1969 return (error);
1970 }
1971
1972 /*
1973 * nfs file rename call
1974 */
1975 static int
1976 nfs_rename(ap)
1977 struct vop_rename_args /* {
1978 struct vnode *a_fdvp;
1979 struct vnode *a_fvp;
1980 struct componentname *a_fcnp;
1981 struct vnode *a_tdvp;
1982 struct vnode *a_tvp;
1983 struct componentname *a_tcnp;
1984 } */ *ap;
1985 {
1986 register struct vnode *fvp = ap->a_fvp;
1987 register struct vnode *tvp = ap->a_tvp;
1988 register struct vnode *fdvp = ap->a_fdvp;
1989 register struct vnode *tdvp = ap->a_tdvp;
1990 register struct componentname *tcnp = ap->a_tcnp;
1991 register struct componentname *fcnp = ap->a_fcnp;
1992 int error;
1993
1994 #if DIAGNOSTIC
1995 if ((tcnp->cn_flags & HASBUF) == 0 ||
1996 (fcnp->cn_flags & HASBUF) == 0)
1997 panic("nfs_rename: no name");
1998 #endif
1999 /* Check for cross-device rename */
2000 if ((fvp->v_mount != tdvp->v_mount) ||
2001 (tvp && (fvp->v_mount != tvp->v_mount))) {
2002 error = EXDEV;
2003 goto out;
2004 }
2005
2006 /*
2007 * If the tvp exists and is in use, sillyrename it before doing the
2008 * rename of the new file over it.
2009 * XXX Can't sillyrename a directory.
2010 */
2011 if (tvp && (tvp->v_usecount>(UBCISVALID(tvp) ? 2 : 1)) &&
2012 !VTONFS(tvp)->n_sillyrename &&
2013 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
2014 vput(tvp);
2015 tvp = NULL;
2016 }
2017
2018 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
2019 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
2020 tcnp->cn_proc);
2021
2022 if (fvp->v_type == VDIR) {
2023 if (tvp != NULL && tvp->v_type == VDIR)
2024 cache_purge(tdvp);
2025 cache_purge(fdvp);
2026 }
2027 out:
2028 if (tdvp == tvp)
2029 vrele(tdvp);
2030 else
2031 vput(tdvp);
2032 if (tvp)
2033 vput(tvp);
2034 vrele(fdvp);
2035 vrele(fvp);
2036 /*
2037 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
2038 */
2039 if (error == ENOENT)
2040 error = 0;
2041 return (error);
2042 }
2043
2044 /*
2045 * nfs file rename rpc called from nfs_remove() above
2046 */
2047 static int
2048 nfs_renameit(sdvp, scnp, sp)
2049 struct vnode *sdvp;
2050 struct componentname *scnp;
2051 register struct sillyrename *sp;
2052 {
2053 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
2054 sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
2055 }
2056
2057 /*
2058 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
2059 */
2060 static int
2061 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
2062 register struct vnode *fdvp;
2063 char *fnameptr;
2064 int fnamelen;
2065 register struct vnode *tdvp;
2066 char *tnameptr;
2067 int tnamelen;
2068 struct ucred *cred;
2069 struct proc *proc;
2070 {
2071 register u_long *tl;
2072 register caddr_t cp;
2073 register long t1, t2;
2074 caddr_t bpos, dpos, cp2;
2075 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
2076 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2077 int v3 = NFS_ISV3(fdvp);
2078
2079 nfsstats.rpccnt[NFSPROC_RENAME]++;
2080 nfsm_reqhead(fdvp, NFSPROC_RENAME,
2081 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
2082 nfsm_rndup(tnamelen));
2083 nfsm_fhtom(fdvp, v3);
2084 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
2085 nfsm_fhtom(tdvp, v3);
2086 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
2087 nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
2088 if (v3) {
2089 nfsm_wcc_data(fdvp, fwccflag);
2090 nfsm_wcc_data(tdvp, twccflag);
2091 }
2092 nfsm_reqdone;
2093 if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2094 VTONFS(fdvp)->n_flag |= NMODIFIED;
2095 if (!fwccflag)
2096 VTONFS(fdvp)->n_attrstamp = 0;
2097 }
2098 if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2099 VTONFS(tdvp)->n_flag |= NMODIFIED;
2100 if (!twccflag)
2101 VTONFS(tdvp)->n_attrstamp = 0;
2102 }
2103 return (error);
2104 }
2105
2106 /*
2107 * nfs hard link create call
2108 */
2109 static int
2110 nfs_link(ap)
2111 struct vop_link_args /* {
2112 struct vnode *a_vp;
2113 struct vnode *a_tdvp;
2114 struct componentname *a_cnp;
2115 } */ *ap;
2116 {
2117 register struct vnode *vp = ap->a_vp;
2118 register struct vnode *tdvp = ap->a_tdvp;
2119 register struct componentname *cnp = ap->a_cnp;
2120 register u_long *tl;
2121 register caddr_t cp;
2122 register long t1, t2;
2123 caddr_t bpos, dpos, cp2;
2124 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
2125 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2126 int v3 = NFS_ISV3(vp);
2127
2128 if (vp->v_mount != tdvp->v_mount) {
2129 VOP_ABORTOP(vp, cnp);
2130 if (tdvp == vp)
2131 vrele(tdvp);
2132 else
2133 vput(tdvp);
2134 return (EXDEV);
2135 }
2136
2137 /*
2138 * Push all writes to the server, so that the attribute cache
2139 * doesn't get "out of sync" with the server.
2140 * XXX There should be a better way!
2141 */
2142 VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
2143
2144 nfsstats.rpccnt[NFSPROC_LINK]++;
2145 nfsm_reqhead(vp, NFSPROC_LINK,
2146 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2147 nfsm_fhtom(vp, v3);
2148 nfsm_fhtom(tdvp, v3);
2149 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2150 nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
2151 if (v3) {
2152 nfsm_postop_attr(vp, attrflag);
2153 nfsm_wcc_data(tdvp, wccflag);
2154 }
2155 nfsm_reqdone;
2156 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
2157
2158 VTONFS(tdvp)->n_flag |= NMODIFIED;
2159 if ((!attrflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */
2160 VTONFS(vp)->n_attrstamp = 0;
2161 if ((!wccflag) && (tdvp->v_type != VBAD)) /* EINVAL set on VBAD vnode */
2162 VTONFS(tdvp)->n_attrstamp = 0;
2163 vput(tdvp);
2164 /*
2165 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2166 */
2167 if (error == EEXIST)
2168 error = 0;
2169 return (error);
2170 }
2171
2172 /*
2173 * nfs symbolic link create call
2174 */
2175 static int
2176 nfs_symlink(ap)
2177 struct vop_symlink_args /* {
2178 struct vnode *a_dvp;
2179 struct vnode **a_vpp;
2180 struct componentname *a_cnp;
2181 struct vattr *a_vap;
2182 char *a_target;
2183 } */ *ap;
2184 {
2185 register struct vnode *dvp = ap->a_dvp;
2186 register struct vattr *vap = ap->a_vap;
2187 register struct componentname *cnp = ap->a_cnp;
2188 register struct nfsv2_sattr *sp;
2189 register struct nfsv3_sattr *sp3;
2190 register u_long *tl;
2191 register caddr_t cp;
2192 register long t1, t2;
2193 caddr_t bpos, dpos, cp2;
2194 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
2195 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2196 struct vnode *newvp = (struct vnode *)0;
2197 int v3 = NFS_ISV3(dvp);
2198
2199 nfsstats.rpccnt[NFSPROC_SYMLINK]++;
2200 slen = strlen(ap->a_target);
2201 nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
2202 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
2203 nfsm_fhtom(dvp, v3);
2204 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2205 if (v3) {
2206 nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
2207 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid,
2208 cnp->cn_cred->cr_gid);
2209 }
2210 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
2211 if (!v3) {
2212 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2213 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
2214 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
2215 sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
2216 sp->sa_size = -1;
2217 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2218 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2219 }
2220 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
2221 if (v3) {
2222 if (!error)
2223 nfsm_mtofh(dvp, newvp, v3, gotvp);
2224 nfsm_wcc_data(dvp, wccflag);
2225 }
2226 nfsm_reqdone;
2227 if (newvp)
2228 vput(newvp);
2229 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
2230 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2231 VTONFS(dvp)->n_flag |= NMODIFIED;
2232 if (!wccflag)
2233 VTONFS(dvp)->n_attrstamp = 0;
2234 }
2235 vput(dvp);
2236 /*
2237 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2238 */
2239 if (error == EEXIST)
2240 error = 0;
2241 return (error);
2242 }
2243
2244 /*
2245 * nfs make dir call
2246 */
2247 static int
2248 nfs_mkdir(ap)
2249 struct vop_mkdir_args /* {
2250 struct vnode *a_dvp;
2251 struct vnode **a_vpp;
2252 struct componentname *a_cnp;
2253 struct vattr *a_vap;
2254 } */ *ap;
2255 {
2256 register struct vnode *dvp = ap->a_dvp;
2257 register struct vattr *vap = ap->a_vap;
2258 register struct componentname *cnp = ap->a_cnp;
2259 register struct nfsv2_sattr *sp;
2260 register struct nfsv3_sattr *sp3;
2261 register u_long *tl;
2262 register caddr_t cp;
2263 register long t1, t2;
2264 register int len;
2265 struct nfsnode *np = (struct nfsnode *)0;
2266 struct vnode *newvp = (struct vnode *)0;
2267 caddr_t bpos, dpos, cp2;
2268 int error = 0, wccflag = NFSV3_WCCRATTR;
2269 int gotvp = 0;
2270 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2271 struct vattr vattr;
2272 int v3 = NFS_ISV3(dvp);
2273
2274 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
2275 VOP_ABORTOP(dvp, cnp);
2276 vput(dvp);
2277 return (error);
2278 }
2279 len = cnp->cn_namelen;
2280 nfsstats.rpccnt[NFSPROC_MKDIR]++;
2281 nfsm_reqhead(dvp, NFSPROC_MKDIR,
2282 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
2283 nfsm_fhtom(dvp, v3);
2284 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
2285 if (v3) {
2286 nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
2287 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
2288 } else {
2289 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2290 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2291 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
2292 sp->sa_gid = txdr_unsigned(vattr.va_gid);
2293 sp->sa_size = -1;
2294 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2295 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2296 }
2297 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
2298 if (!error)
2299 nfsm_mtofh(dvp, newvp, v3, gotvp);
2300 if (v3)
2301 nfsm_wcc_data(dvp, wccflag);
2302 nfsm_reqdone;
2303 if (dvp->v_type != VBAD) { /* EINVAL set on this case */
2304 VTONFS(dvp)->n_flag |= NMODIFIED;
2305 if (!wccflag)
2306 VTONFS(dvp)->n_attrstamp = 0;
2307 }
2308 /*
2309 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
2310 * if we can succeed in looking up the directory.
2311 */
2312 if (error == EEXIST || (!error && !gotvp)) {
2313 if (newvp) {
2314 vrele(newvp);
2315 newvp = (struct vnode *)0;
2316 }
2317 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
2318 cnp->cn_proc, &np);
2319 if (!error) {
2320 newvp = NFSTOV(np);
2321 if (newvp->v_type != VDIR)
2322 error = EEXIST;
2323 }
2324 }
2325 if (error) {
2326 if (newvp)
2327 vrele(newvp);
2328 } else
2329 *ap->a_vpp = newvp;
2330 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
2331 vput(dvp);
2332 return (error);
2333 }
2334
2335 /*
2336 * nfs remove directory call
2337 */
2338 static int
2339 nfs_rmdir(ap)
2340 struct vop_rmdir_args /* {
2341 struct vnode *a_dvp;
2342 struct vnode *a_vp;
2343 struct componentname *a_cnp;
2344 } */ *ap;
2345 {
2346 register struct vnode *vp = ap->a_vp;
2347 register struct vnode *dvp = ap->a_dvp;
2348 register struct componentname *cnp = ap->a_cnp;
2349 register u_long *tl;
2350 register caddr_t cp;
2351 register long t1, t2;
2352 caddr_t bpos, dpos, cp2;
2353 int error = 0, wccflag = NFSV3_WCCRATTR;
2354 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2355 int v3 = NFS_ISV3(dvp);
2356
2357 nfsstats.rpccnt[NFSPROC_RMDIR]++;
2358 nfsm_reqhead(dvp, NFSPROC_RMDIR,
2359 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2360 nfsm_fhtom(dvp, v3);
2361 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2362 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
2363 if (v3)
2364 nfsm_wcc_data(dvp, wccflag);
2365 nfsm_reqdone;
2366 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
2367 if (dvp->v_type != VBAD) { /* EINVAL set on this case */
2368 VTONFS(dvp)->n_flag |= NMODIFIED;
2369 if (!wccflag)
2370 VTONFS(dvp)->n_attrstamp = 0;
2371 }
2372 cache_purge(dvp);
2373 cache_purge(vp);
2374 vput(vp);
2375 vput(dvp);
2376 /*
2377 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2378 */
2379 if (error == ENOENT)
2380 error = 0;
2381 return (error);
2382 }
2383
2384 /*
2385 * nfs readdir call
2386 */
2387 static int
2388 nfs_readdir(ap)
2389 struct vop_readdir_args /* {
2390 struct vnode *a_vp;
2391 struct uio *a_uio;
2392 struct ucred *a_cred;
2393 } */ *ap;
2394 {
2395 register struct vnode *vp = ap->a_vp;
2396 register struct nfsnode *np = VTONFS(vp);
2397 register struct uio *uio = ap->a_uio;
2398 int tresid, error;
2399 struct vattr vattr;
2400
2401 if (vp->v_type != VDIR)
2402 return (EPERM);
2403 /*
2404 * First, check for hit on the EOF offset cache
2405 */
2406 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2407 (np->n_flag & NMODIFIED) == 0) {
2408 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
2409 if (NQNFS_CKCACHABLE(vp, ND_READ)) {
2410 nfsstats.direofcache_hits++;
2411 return (0);
2412 }
2413 } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
2414 np->n_mtime == vattr.va_mtime.tv_sec) {
2415 nfsstats.direofcache_hits++;
2416 return (0);
2417 }
2418 }
2419
2420 /*
2421 * Call nfs_bioread() to do the real work.
2422 */
2423 tresid = uio->uio_resid;
2424 error = nfs_bioread(vp, uio, 0, ap->a_cred, 0);
2425
2426 if (!error && uio->uio_resid == tresid)
2427 nfsstats.direofcache_misses++;
2428 return (error);
2429 }
2430
2431 /*
2432 * Readdir rpc call.
2433 * Called from below the buffer cache by nfs_doio().
2434 */
2435 int
2436 nfs_readdirrpc(vp, uiop, cred)
2437 struct vnode *vp;
2438 register struct uio *uiop;
2439 struct ucred *cred;
2440
2441 {
2442 register int len, left;
2443 register struct dirent *dp;
2444 register u_long *tl;
2445 register caddr_t cp;
2446 register long t1, t2;
2447 register nfsuint64 *cookiep;
2448 caddr_t bpos, dpos, cp2;
2449 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2450 nfsuint64 cookie;
2451 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2452 struct nfsnode *dnp = VTONFS(vp);
2453 u_quad_t fileno;
2454 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2455 int attrflag;
2456 int v3 = NFS_ISV3(vp);
2457
2458 #ifndef nolint
2459 dp = (struct dirent *)0;
2460 #endif
2461 #if DIAGNOSTIC
2462 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) ||
2463 (uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
2464 panic("nfs_readdirrpc: bad uio");
2465 #endif
2466
2467 /*
2468 * If there is no cookie, assume directory was stale.
2469 */
2470 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2471 if (cookiep)
2472 cookie = *cookiep;
2473 else
2474 return (NFSERR_BAD_COOKIE);
2475 /*
2476 * Loop around doing readdir rpc's of size nm_readdirsize
2477 * truncated to a multiple of DIRBLKSIZ.
2478 * The stopping criteria is EOF or buffer full.
2479 */
2480 while (more_dirs && bigenough) {
2481 nfsstats.rpccnt[NFSPROC_READDIR]++;
2482 nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2483 NFSX_READDIR(v3));
2484 nfsm_fhtom(vp, v3);
2485 if (v3) {
2486 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
2487 *tl++ = cookie.nfsuquad[0];
2488 *tl++ = cookie.nfsuquad[1];
2489 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2490 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2491 } else {
2492 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
2493 *tl++ = cookie.nfsuquad[0];
2494 }
2495 *tl = txdr_unsigned(nmp->nm_readdirsize);
2496 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
2497 if (v3) {
2498 nfsm_postop_attr(vp, attrflag);
2499 if (!error) {
2500 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2501 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2502 dnp->n_cookieverf.nfsuquad[1] = *tl;
2503 } else {
2504 m_freem(mrep);
2505 goto nfsmout;
2506 }
2507 }
2508 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2509 more_dirs = fxdr_unsigned(int, *tl);
2510
2511 /* loop thru the dir entries, doctoring them to 4bsd form */
2512 while (more_dirs && bigenough) {
2513 if (v3) {
2514 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2515 fxdr_hyper(tl, &fileno);
2516 len = fxdr_unsigned(int, *(tl + 2));
2517 } else {
2518 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2519 fileno = fxdr_unsigned(u_quad_t, *tl++);
2520 len = fxdr_unsigned(int, *tl);
2521 }
2522 if (len <= 0 || len > NFS_MAXNAMLEN) {
2523 error = EBADRPC;
2524 m_freem(mrep);
2525 goto nfsmout;
2526 }
2527 tlen = nfsm_rndup(len);
2528 if (tlen == len)
2529 tlen += 4; /* To ensure null termination */
2530 left = DIRBLKSIZ - blksiz;
2531 if ((tlen + DIRHDSIZ) > left) {
2532 dp->d_reclen += left;
2533 uiop->uio_iov->iov_base += left;
2534 uiop->uio_iov->iov_len -= left;
2535 uiop->uio_offset += left;
2536 uiop->uio_resid -= left;
2537 blksiz = 0;
2538 }
2539 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2540 bigenough = 0;
2541 if (bigenough) {
2542 dp = (struct dirent *)uiop->uio_iov->iov_base;
2543 dp->d_fileno = (int)fileno;
2544 dp->d_namlen = len;
2545 dp->d_reclen = tlen + DIRHDSIZ;
2546 dp->d_type = DT_UNKNOWN;
2547 blksiz += dp->d_reclen;
2548 if (blksiz == DIRBLKSIZ)
2549 blksiz = 0;
2550 uiop->uio_offset += DIRHDSIZ;
2551 uiop->uio_resid -= DIRHDSIZ;
2552 uiop->uio_iov->iov_base += DIRHDSIZ;
2553 uiop->uio_iov->iov_len -= DIRHDSIZ;
2554 nfsm_mtouio(uiop, len);
2555 cp = uiop->uio_iov->iov_base;
2556 tlen -= len;
2557 *cp = '\0'; /* null terminate */
2558 uiop->uio_iov->iov_base += tlen;
2559 uiop->uio_iov->iov_len -= tlen;
2560 uiop->uio_offset += tlen;
2561 uiop->uio_resid -= tlen;
2562 } else
2563 nfsm_adv(nfsm_rndup(len));
2564 if (v3) {
2565 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2566 } else {
2567 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2568 }
2569 if (bigenough) {
2570 cookie.nfsuquad[0] = *tl++;
2571 if (v3)
2572 cookie.nfsuquad[1] = *tl++;
2573 } else if (v3)
2574 tl += 2;
2575 else
2576 tl++;
2577 more_dirs = fxdr_unsigned(int, *tl);
2578 }
2579 /*
2580 * If at end of rpc data, get the eof boolean
2581 */
2582 if (!more_dirs) {
2583 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2584 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2585 }
2586 m_freem(mrep);
2587 }
2588 /*
2589 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2590 * by increasing d_reclen for the last record.
2591 */
2592 if (blksiz > 0) {
2593 left = DIRBLKSIZ - blksiz;
2594 dp->d_reclen += left;
2595 uiop->uio_iov->iov_base += left;
2596 uiop->uio_iov->iov_len -= left;
2597 uiop->uio_offset += left;
2598 uiop->uio_resid -= left;
2599 }
2600
2601 /*
2602 * We are now either at the end of the directory or have filled the
2603 * block.
2604 */
2605 if (bigenough)
2606 dnp->n_direofoffset = uiop->uio_offset;
2607 else {
2608 if (uiop->uio_resid > 0)
2609 printf("EEK! readdirrpc resid > 0\n");
2610 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2611 *cookiep = cookie;
2612 }
2613 nfsmout:
2614 return (error);
2615 }
2616
2617 /*
2618 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2619 */
2620 int
2621 nfs_readdirplusrpc(vp, uiop, cred)
2622 struct vnode *vp;
2623 register struct uio *uiop;
2624 struct ucred *cred;
2625 {
2626 register int len, left;
2627 register struct dirent *dp;
2628 register u_long *tl;
2629 register caddr_t cp;
2630 register long t1, t2;
2631 register struct vnode *newvp;
2632 register nfsuint64 *cookiep;
2633 caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
2634 struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
2635 struct nameidata nami, *ndp = &nami;
2636 struct componentname *cnp = &ndp->ni_cnd;
2637 nfsuint64 cookie;
2638 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2639 struct nfsnode *dnp = VTONFS(vp), *np;
2640 nfsfh_t *fhp;
2641 u_quad_t fileno;
2642 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2643 int attrflag, fhsize;
2644
2645 #ifndef nolint
2646 dp = (struct dirent *)0;
2647 #endif
2648 #if DIAGNOSTIC
2649 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2650 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2651 panic("nfs_readdirplusrpc: bad uio");
2652 #endif
2653 ndp->ni_dvp = vp;
2654 newvp = NULLVP;
2655
2656 /*
2657 * If there is no cookie, assume directory was stale.
2658 */
2659 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2660 if (cookiep)
2661 cookie = *cookiep;
2662 else
2663 return (NFSERR_BAD_COOKIE);
2664 /*
2665 * Loop around doing readdir rpc's of size nm_readdirsize
2666 * truncated to a multiple of DIRBLKSIZ.
2667 * The stopping criteria is EOF or buffer full.
2668 */
2669 while (more_dirs && bigenough) {
2670 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2671 nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2672 NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2673 nfsm_fhtom(vp, 1);
2674 nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED);
2675 *tl++ = cookie.nfsuquad[0];
2676 *tl++ = cookie.nfsuquad[1];
2677 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2678 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2679 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
2680 *tl = txdr_unsigned(nmp->nm_rsize);
2681 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
2682 nfsm_postop_attr(vp, attrflag);
2683 if (error) {
2684 m_freem(mrep);
2685 goto nfsmout;
2686 }
2687 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2688 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2689 dnp->n_cookieverf.nfsuquad[1] = *tl++;
2690 more_dirs = fxdr_unsigned(int, *tl);
2691
2692 /* loop thru the dir entries, doctoring them to 4bsd form */
2693 while (more_dirs && bigenough) {
2694 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2695 fxdr_hyper(tl, &fileno);
2696 len = fxdr_unsigned(int, *(tl + 2));
2697 if (len <= 0 || len > NFS_MAXNAMLEN) {
2698 error = EBADRPC;
2699 m_freem(mrep);
2700 goto nfsmout;
2701 }
2702 tlen = nfsm_rndup(len);
2703 if (tlen == len)
2704 tlen += 4; /* To ensure null termination*/
2705 left = DIRBLKSIZ - blksiz;
2706 if ((tlen + DIRHDSIZ) > left) {
2707 dp->d_reclen += left;
2708 uiop->uio_iov->iov_base += left;
2709 uiop->uio_iov->iov_len -= left;
2710 uiop->uio_offset += left;
2711 uiop->uio_resid -= left;
2712 blksiz = 0;
2713 }
2714 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2715 bigenough = 0;
2716 if (bigenough) {
2717 dp = (struct dirent *)uiop->uio_iov->iov_base;
2718 dp->d_fileno = (int)fileno;
2719 dp->d_namlen = len;
2720 dp->d_reclen = tlen + DIRHDSIZ;
2721 dp->d_type = DT_UNKNOWN;
2722 blksiz += dp->d_reclen;
2723 if (blksiz == DIRBLKSIZ)
2724 blksiz = 0;
2725 uiop->uio_offset += DIRHDSIZ;
2726 uiop->uio_resid -= DIRHDSIZ;
2727 uiop->uio_iov->iov_base += DIRHDSIZ;
2728 uiop->uio_iov->iov_len -= DIRHDSIZ;
2729 cnp->cn_nameptr = uiop->uio_iov->iov_base;
2730 cnp->cn_namelen = len;
2731 nfsm_mtouio(uiop, len);
2732 cp = uiop->uio_iov->iov_base;
2733 tlen -= len;
2734 *cp = '\0';
2735 uiop->uio_iov->iov_base += tlen;
2736 uiop->uio_iov->iov_len -= tlen;
2737 uiop->uio_offset += tlen;
2738 uiop->uio_resid -= tlen;
2739 } else
2740 nfsm_adv(nfsm_rndup(len));
2741 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2742 if (bigenough) {
2743 cookie.nfsuquad[0] = *tl++;
2744 cookie.nfsuquad[1] = *tl++;
2745 } else
2746 tl += 2;
2747
2748 /*
2749 * Since the attributes are before the file handle
2750 * (sigh), we must skip over the attributes and then
2751 * come back and get them.
2752 */
2753 attrflag = fxdr_unsigned(int, *tl);
2754 if (attrflag) {
2755 dpossav1 = dpos;
2756 mdsav1 = md;
2757 nfsm_adv(NFSX_V3FATTR);
2758 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2759 doit = fxdr_unsigned(int, *tl);
2760 if (doit) {
2761 nfsm_getfh(fhp, fhsize, 1);
2762 if (NFS_CMPFH(dnp, fhp, fhsize)) {
2763 VREF(vp);
2764 newvp = vp;
2765 np = dnp;
2766 } else {
2767 if ((error = nfs_nget(vp->v_mount, fhp,
2768 fhsize, &np)))
2769 doit = 0;
2770 else
2771 newvp = NFSTOV(np);
2772 }
2773 }
2774 if (doit) {
2775 dpossav2 = dpos;
2776 dpos = dpossav1;
2777 mdsav2 = md;
2778 md = mdsav1;
2779 nfsm_loadattr(newvp, (struct vattr *)0);
2780 dpos = dpossav2;
2781 md = mdsav2;
2782 dp->d_type =
2783 IFTODT(VTTOIF(np->n_vattr.va_type));
2784 ndp->ni_vp = newvp;
2785 cnp->cn_hash = 0;
2786 for (cp = cnp->cn_nameptr, i = 1; i <= len;
2787 i++, cp++)
2788 cnp->cn_hash += (unsigned char)*cp * i;
2789 if (cnp->cn_namelen <= NCHNAMLEN)
2790 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2791 }
2792 } else {
2793 /* Just skip over the file handle */
2794 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2795 i = fxdr_unsigned(int, *tl);
2796 nfsm_adv(nfsm_rndup(i));
2797 }
2798 if (newvp != NULLVP) {
2799 vrele(newvp);
2800 newvp = NULLVP;
2801 }
2802 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2803 more_dirs = fxdr_unsigned(int, *tl);
2804 }
2805 /*
2806 * If at end of rpc data, get the eof boolean
2807 */
2808 if (!more_dirs) {
2809 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2810 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2811 }
2812 m_freem(mrep);
2813 }
2814 /*
2815 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
2816 * by increasing d_reclen for the last record.
2817 */
2818 if (blksiz > 0) {
2819 left = DIRBLKSIZ - blksiz;
2820 dp->d_reclen += left;
2821 uiop->uio_iov->iov_base += left;
2822 uiop->uio_iov->iov_len -= left;
2823 uiop->uio_offset += left;
2824 uiop->uio_resid -= left;
2825 }
2826
2827 /*
2828 * We are now either at the end of the directory or have filled the
2829 * block.
2830 */
2831 if (bigenough)
2832 dnp->n_direofoffset = uiop->uio_offset;
2833 else {
2834 if (uiop->uio_resid > 0)
2835 printf("EEK! readdirplusrpc resid > 0\n");
2836 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2837 *cookiep = cookie;
2838 }
2839 nfsmout:
2840 if (newvp != NULLVP) {
2841 if (newvp == vp)
2842 vrele(newvp);
2843 else
2844 vput(newvp);
2845 newvp = NULLVP;
2846 }
2847 return (error);
2848 }
2849
2850 /*
2851 * Silly rename. To make the NFS filesystem that is stateless look a little
2852 * more like the "ufs" a remove of an active vnode is translated to a rename
2853 * to a funny looking filename that is removed by nfs_inactive on the
2854 * nfsnode. There is the potential for another process on a different client
2855 * to create the same funny name between the nfs_lookitup() fails and the
2856 * nfs_rename() completes, but...
2857 */
2858 static int
2859 nfs_sillyrename(dvp, vp, cnp)
2860 struct vnode *dvp, *vp;
2861 struct componentname *cnp;
2862 {
2863 register struct sillyrename *sp;
2864 struct nfsnode *np;
2865 int error;
2866 short pid;
2867 struct ucred *cred;
2868
2869 cache_purge(dvp);
2870 np = VTONFS(vp);
2871 #if DIAGNOSTIC
2872 if (vp->v_type == VDIR)
2873 panic("nfs_sillyrename: dir");
2874 #endif
2875 MALLOC_ZONE(sp, struct sillyrename *,
2876 sizeof (struct sillyrename), M_NFSREQ, M_WAITOK);
2877 sp->s_cred = crdup(cnp->cn_cred);
2878 sp->s_dvp = dvp;
2879 VREF(dvp);
2880
2881 /* Fudge together a funny name */
2882 pid = cnp->cn_proc->p_pid;
2883 sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
2884
2885 /* Try lookitups until we get one that isn't there */
2886 while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2887 cnp->cn_proc, (struct nfsnode **)0) == 0) {
2888 sp->s_name[4]++;
2889 if (sp->s_name[4] > 'z') {
2890 error = EINVAL;
2891 goto bad;
2892 }
2893 }
2894 if ((error = nfs_renameit(dvp, cnp, sp)))
2895 goto bad;
2896 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2897 cnp->cn_proc, &np);
2898 #if DIAGNOSTIC
2899 kprintf("sillyrename: %s, vp=%x, np=%x, dvp=%x\n",
2900 &sp->s_name[0], (unsigned)vp, (unsigned)np, (unsigned)dvp);
2901 #endif
2902 np->n_sillyrename = sp;
2903 return (0);
2904 bad:
2905 vrele(sp->s_dvp);
2906 cred = sp->s_cred;
2907 sp->s_cred = NOCRED;
2908 crfree(cred);
2909 _FREE_ZONE((caddr_t)sp, sizeof (struct sillyrename), M_NFSREQ);
2910 return (error);
2911 }
2912
2913 /*
2914 * Look up a file name and optionally either update the file handle or
2915 * allocate an nfsnode, depending on the value of npp.
2916 * npp == NULL --> just do the lookup
2917 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2918 * handled too
2919 * *npp != NULL --> update the file handle in the vnode
2920 */
2921 static int
2922 nfs_lookitup(dvp, name, len, cred, procp, npp)
2923 register struct vnode *dvp;
2924 char *name;
2925 int len;
2926 struct ucred *cred;
2927 struct proc *procp;
2928 struct nfsnode **npp;
2929 {
2930 register u_long *tl;
2931 register caddr_t cp;
2932 register long t1, t2;
2933 struct vnode *newvp = (struct vnode *)0;
2934 struct nfsnode *np, *dnp = VTONFS(dvp);
2935 caddr_t bpos, dpos, cp2;
2936 int error = 0, fhlen, attrflag;
2937 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2938 nfsfh_t *nfhp;
2939 int v3 = NFS_ISV3(dvp);
2940
2941 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2942 nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2943 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2944 nfsm_fhtom(dvp, v3);
2945 nfsm_strtom(name, len, NFS_MAXNAMLEN);
2946 nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
2947 if (npp && !error) {
2948 nfsm_getfh(nfhp, fhlen, v3);
2949 if (*npp) {
2950 np = *npp;
2951 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2952 _FREE_ZONE((caddr_t)np->n_fhp,
2953 np->n_fhsize, M_NFSBIGFH);
2954 np->n_fhp = &np->n_fh;
2955 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2956 MALLOC_ZONE(np->n_fhp, nfsfh_t *,
2957 fhlen, M_NFSBIGFH, M_WAITOK);
2958 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2959 np->n_fhsize = fhlen;
2960 newvp = NFSTOV(np);
2961 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2962 VREF(dvp);
2963 newvp = dvp;
2964 } else {
2965 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
2966 if (error) {
2967 m_freem(mrep);
2968 return (error);
2969 }
2970 newvp = NFSTOV(np);
2971 }
2972 if (v3) {
2973 nfsm_postop_attr(newvp, attrflag);
2974 if (!attrflag && *npp == NULL) {
2975 m_freem(mrep);
2976 if (newvp == dvp)
2977 vrele(newvp);
2978 else
2979 vput(newvp);
2980 return (ENOENT);
2981 }
2982 } else
2983 nfsm_loadattr(newvp, (struct vattr *)0);
2984 }
2985 nfsm_reqdone;
2986 if (npp && *npp == NULL) {
2987 if (error) {
2988 if (newvp)
2989 if (newvp == dvp)
2990 vrele(newvp);
2991 else
2992 vput(newvp);
2993 } else
2994 *npp = np;
2995 }
2996 return (error);
2997 }
2998
2999 /*
3000 * Nfs Version 3 commit rpc
3001 */
3002 static int
3003 nfs_commit(vp, offset, cnt, cred, procp)
3004 register struct vnode *vp;
3005 u_quad_t offset;
3006 int cnt;
3007 struct ucred *cred;
3008 struct proc *procp;
3009 {
3010 register caddr_t cp;
3011 register u_long *tl;
3012 register int t1, t2;
3013 register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3014 caddr_t bpos, dpos, cp2;
3015 int error = 0, wccflag = NFSV3_WCCRATTR;
3016 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
3017
3018 if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
3019 return (0);
3020 nfsstats.rpccnt[NFSPROC_COMMIT]++;
3021 nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
3022 nfsm_fhtom(vp, 1);
3023 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
3024 txdr_hyper(&offset, tl);
3025 tl += 2;
3026 *tl = txdr_unsigned(cnt);
3027 nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
3028 nfsm_wcc_data(vp, wccflag);
3029 if (!error) {
3030 nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF);
3031 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
3032 NFSX_V3WRITEVERF)) {
3033 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
3034 NFSX_V3WRITEVERF);
3035 error = NFSERR_STALEWRITEVERF;
3036 }
3037 }
3038 nfsm_reqdone;
3039 return (error);
3040 }
3041
3042 /*
3043 * Kludge City..
3044 * - make nfs_bmap() essentially a no-op that does no translation
3045 * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
3046 * (Maybe I could use the process's page mapping, but I was concerned that
3047 * Kernel Write might not be enabled and also figured copyout() would do
3048 * a lot more work than bcopy() and also it currently happens in the
3049 * context of the swapper process (2).
3050 */
3051 static int
3052 nfs_bmap(ap)
3053 struct vop_bmap_args /* {
3054 struct vnode *a_vp;
3055 daddr_t a_bn;
3056 struct vnode **a_vpp;
3057 daddr_t *a_bnp;
3058 int *a_runp;
3059 int *a_runb;
3060 } */ *ap;
3061 {
3062 register struct vnode *vp = ap->a_vp;
3063 int devBlockSize = DEV_BSIZE;
3064
3065 if (ap->a_vpp != NULL)
3066 *ap->a_vpp = vp;
3067 if (ap->a_bnp != NULL)
3068 *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize,
3069 devBlockSize);
3070 if (ap->a_runp != NULL)
3071 *ap->a_runp = 0;
3072 #ifdef notyet
3073 if (ap->a_runb != NULL)
3074 *ap->a_runb = 0;
3075 #endif
3076 return (0);
3077 }
3078
3079 /*
3080 * Strategy routine.
3081 * For async requests when nfsiod(s) are running, queue the request by
3082 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
3083 * request.
3084 */
3085 static int
3086 nfs_strategy(ap)
3087 struct vop_strategy_args *ap;
3088 {
3089 register struct buf *bp = ap->a_bp;
3090 struct ucred *cr;
3091 struct proc *p;
3092 int error = 0;
3093
3094 if (ISSET(bp->b_flags, B_PHYS))
3095 panic("nfs_strategy: physio");
3096 if (ISSET(bp->b_flags, B_ASYNC))
3097 p = (struct proc *)0;
3098 else
3099 p = current_proc(); /* XXX */
3100 if (ISSET(bp->b_flags, B_READ))
3101 cr = bp->b_rcred;
3102 else
3103 cr = bp->b_wcred;
3104 /*
3105 * If the op is asynchronous and an i/o daemon is waiting
3106 * queue the request, wake it up and wait for completion
3107 * otherwise just do it ourselves.
3108 */
3109 if (!ISSET(bp->b_flags, B_ASYNC) || nfs_asyncio(bp, NOCRED))
3110 error = nfs_doio(bp, cr, p);
3111 return (error);
3112 }
3113
3114 /*
3115 * Mmap a file
3116 *
3117 * NB Currently unsupported.
3118 */
3119 /* ARGSUSED */
3120 static int
3121 nfs_mmap(ap)
3122 struct vop_mmap_args /* {
3123 struct vnode *a_vp;
3124 int a_fflags;
3125 struct ucred *a_cred;
3126 struct proc *a_p;
3127 } */ *ap;
3128 {
3129
3130 return (EINVAL);
3131 }
3132
3133 /*
3134 * fsync vnode op. Just call nfs_flush() with commit == 1.
3135 */
3136 /* ARGSUSED */
3137 static int
3138 nfs_fsync(ap)
3139 struct vop_fsync_args /* {
3140 struct vnodeop_desc *a_desc;
3141 struct vnode * a_vp;
3142 struct ucred * a_cred;
3143 int a_waitfor;
3144 struct proc * a_p;
3145 } */ *ap;
3146 {
3147
3148 return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
3149 }
3150
3151 /*
3152 * Flush all the blocks associated with a vnode.
3153 * Walk through the buffer pool and push any dirty pages
3154 * associated with the vnode.
3155 */
3156 static int
3157 nfs_flush(vp, cred, waitfor, p, commit)
3158 register struct vnode *vp;
3159 struct ucred *cred;
3160 int waitfor;
3161 struct proc *p;
3162 int commit;
3163 {
3164 register struct nfsnode *np = VTONFS(vp);
3165 register struct buf *bp;
3166 register int i;
3167 struct buf *nbp;
3168 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3169 int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos, err;
3170 int passone = 1;
3171 u_quad_t off, endoff, toff;
3172 struct ucred* wcred = NULL;
3173 struct buf **bvec = NULL;
3174 kern_return_t kret;
3175 upl_t *upls = NULL;
3176
3177
3178 #ifndef NFS_COMMITBVECSIZ
3179 #define NFS_COMMITBVECSIZ 20
3180 #endif
3181 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
3182 struct upl_t *upls_on_stack[NFS_COMMITBVECSIZ];
3183 int bvecsize = 0, bveccount, buplpos;
3184
3185 if (nmp->nm_flag & NFSMNT_INT)
3186 slpflag = PCATCH;
3187 if (!commit)
3188 passone = 0;
3189
3190 /*
3191 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
3192 * server, but nas not been committed to stable storage on the server
3193 * yet. On the first pass, the byte range is worked out and the commit
3194 * rpc is done. On the second pass, nfs_writebp() is called to do the
3195 * job.
3196 */
3197 again:
3198 if (vp->v_dirtyblkhd.lh_first)
3199 np->n_flag |= NMODIFIED;
3200 off = (u_quad_t)-1;
3201 endoff = 0;
3202 bvecpos = 0;
3203 buplpos = 0;
3204 if (NFS_ISV3(vp) && commit) {
3205 s = splbio();
3206 /*
3207 * Count up how many buffers waiting for a commit.
3208 */
3209 bveccount = 0;
3210 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
3211 nbp = bp->b_vnbufs.le_next;
3212 if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
3213 == (B_DELWRI | B_NEEDCOMMIT))
3214 bveccount++;
3215 }
3216 /*
3217 * Allocate space to remember the list of bufs to commit. It is
3218 * important to use M_NOWAIT here to avoid a race with nfs_write.
3219 * If we can't get memory (for whatever reason), we will end up
3220 * committing the buffers one-by-one in the loop below.
3221 */
3222 if (bvec != NULL && bvec != bvec_on_stack)
3223 _FREE(bvec, M_TEMP);
3224 if (upls != NULL && upls != (upl_t *) upls_on_stack)
3225 _FREE(upls, M_TEMP);
3226
3227 bvecsize = NFS_COMMITBVECSIZ;
3228 if (bveccount > NFS_COMMITBVECSIZ) {
3229 MALLOC(bvec, struct buf **,
3230 bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT);
3231 MALLOC(upls, upl_t *,
3232 bveccount * sizeof(upl_t), M_TEMP, M_NOWAIT);
3233 if ((bvec == NULL) || (upls == NULL)) {
3234 if (bvec)
3235 _FREE(bvec, M_TEMP);
3236 if (upls)
3237 _FREE(upls, M_TEMP);
3238 bvec = bvec_on_stack;
3239 upls = (upl_t *) upls_on_stack;
3240 } else
3241 bvecsize = bveccount;
3242 } else {
3243 bvec = bvec_on_stack;
3244 upls = (upl_t *) upls_on_stack;
3245 }
3246
3247 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
3248 nbp = bp->b_vnbufs.le_next;
3249 if (bvecpos >= bvecsize)
3250 break;
3251 if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
3252 != (B_DELWRI | B_NEEDCOMMIT))
3253 continue;
3254 bremfree(bp);
3255 /*
3256 * Work out if all buffers are using the same cred
3257 * so we can deal with them all with one commit.
3258 */
3259 if (wcred == NULL)
3260 wcred = bp->b_wcred;
3261 else if (wcred != bp->b_wcred)
3262 wcred = NOCRED;
3263 SET(bp->b_flags, (B_BUSY | B_WRITEINPROG));
3264
3265 /*
3266 * we need ubc_create_upl so if vm decides to
3267 * do paging while we are waiting on commit rpc,
3268 * that it doesn't pick these pages.
3269 */
3270 if (!ISSET(bp->b_flags, B_PAGELIST)) {
3271 kret = ubc_create_upl(vp,
3272 ubc_blktooff(vp, bp->b_lblkno),
3273 bp->b_bufsize,
3274 &(upls[buplpos]),
3275 NULL,
3276 UPL_PRECIOUS);
3277 if (kret != KERN_SUCCESS)
3278 panic("nfs_getcacheblk: get pagelists failed with (%d)", kret);
3279
3280 #ifdef UBC_DEBUG
3281 upl_ubc_alias_set(upls[buplpos], ioaddr, 1);
3282 #endif /* UBC_DEBUG */
3283 buplpos++; /* not same as bvecpos if upl existed already */
3284 }
3285
3286 /*
3287 * A list of these buffers is kept so that the
3288 * second loop knows which buffers have actually
3289 * been committed. This is necessary, since there
3290 * may be a race between the commit rpc and new
3291 * uncommitted writes on the file.
3292 */
3293 bvec[bvecpos++] = bp;
3294 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3295 bp->b_dirtyoff;
3296 if (toff < off)
3297 off = toff;
3298 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
3299 if (toff > endoff)
3300 endoff = toff;
3301 }
3302 splx(s);
3303 }
3304 if (bvecpos > 0) {
3305 /*
3306 * Commit data on the server, as required.
3307 * If all bufs are using the same wcred, then use that with
3308 * one call for all of them, otherwise commit each one
3309 * separately.
3310 */
3311 if (wcred != NOCRED)
3312 retv = nfs_commit(vp, off, (int)(endoff - off),
3313 wcred, p);
3314 else {
3315 retv = 0;
3316 for (i = 0; i < bvecpos; i++) {
3317 off_t off, size;
3318 bp = bvec[i];
3319 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3320 bp->b_dirtyoff;
3321 size = (u_quad_t)(bp->b_dirtyend
3322 - bp->b_dirtyoff);
3323 retv = nfs_commit(vp, off, (int)size,
3324 bp->b_wcred, p);
3325 if (retv) break;
3326 }
3327 }
3328
3329 if (retv == NFSERR_STALEWRITEVERF)
3330 nfs_clearcommit(vp->v_mount);
3331
3332 for (i = 0; i < buplpos; i++) {
3333 /*
3334 * Before the VOP_BWRITE and biodone(ASYNC)/brelse, we have to undo
3335 * holding the vm page or we we will deadlock on another vm_fault_list_request.
3336 * Here's a convenient place to put it.
3337 * Better if we could hold it by setting the PAGELIST flag and kernel_upl_map
3338 * as does nfs_writebp. Then normal biodones and brelse will clean it up and
3339 * we can avoid this abort. For now make minimal changes.
3340 */
3341 err = ubc_upl_abort(upls[i], NULL);
3342 if (err)
3343 printf("nfs_flush: kernel_upl_abort %d\n", err);
3344 }
3345
3346
3347 /*
3348 * Now, either mark the blocks I/O done or mark the
3349 * blocks dirty, depending on whether the commit
3350 * succeeded.
3351 */
3352 for (i = 0; i < bvecpos; i++) {
3353
3354 bp = bvec[i];
3355 CLR(bp->b_flags, (B_NEEDCOMMIT | B_WRITEINPROG));
3356 if (retv) {
3357 brelse(bp);
3358 } else {
3359 vp->v_numoutput++;
3360 SET(bp->b_flags, B_ASYNC);
3361 s = splbio();
3362 CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI));
3363 bp->b_dirtyoff = bp->b_dirtyend = 0;
3364 reassignbuf(bp, vp);
3365 splx(s);
3366 biodone(bp);
3367 }
3368 }
3369
3370 }
3371
3372 /*
3373 * Start/do any write(s) that are required.
3374 * There is a window here where B_BUSY protects the buffer. The vm pages have been
3375 * freed up, yet B_BUSY is set. Don't think you will hit any busy/incore problems while
3376 * we sleep, but not absolutely sure. Keep an eye on it. Otherwise we will have to hold
3377 * vm page across this locked. - EKN
3378 */
3379 loop:
3380 if (current_thread_aborted()) {
3381 error = EINTR;
3382 goto done;
3383 }
3384 s = splbio();
3385 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
3386 nbp = bp->b_vnbufs.le_next;
3387 if (ISSET(bp->b_flags, B_BUSY)) {
3388 if (waitfor != MNT_WAIT || passone)
3389 continue;
3390 SET(bp->b_flags, B_WANTED);
3391 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
3392 "nfsfsync", slptimeo);
3393 splx(s);
3394 if (error) {
3395 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
3396 error = EINTR;
3397 goto done;
3398 }
3399 if (slpflag == PCATCH) {
3400 slpflag = 0;
3401 slptimeo = 2 * hz;
3402 }
3403 }
3404 goto loop;
3405 }
3406 if (!ISSET(bp->b_flags, B_DELWRI))
3407 panic("nfs_fsync: not dirty");
3408 if ((passone || !commit) && ISSET(bp->b_flags, B_NEEDCOMMIT))
3409 continue;
3410 bremfree(bp);
3411 if (passone || !commit)
3412 SET(bp->b_flags, (B_BUSY|B_ASYNC));
3413 else
3414 SET(bp->b_flags, (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT));
3415
3416 splx(s);
3417 VOP_BWRITE(bp);
3418 goto loop;
3419 }
3420 splx(s);
3421 if (passone) {
3422 passone = 0;
3423 goto again;
3424 }
3425 if (waitfor == MNT_WAIT) {
3426 while (vp->v_numoutput) {
3427 vp->v_flag |= VBWAIT;
3428 error = tsleep((caddr_t)&vp->v_numoutput,
3429 slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
3430 if (error) {
3431 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
3432 error = EINTR;
3433 goto done;
3434 }
3435 if (slpflag == PCATCH) {
3436 slpflag = 0;
3437 slptimeo = 2 * hz;
3438 }
3439 }
3440 }
3441 if (vp->v_dirtyblkhd.lh_first && commit) {
3442 goto loop;
3443 }
3444 }
3445 if (np->n_flag & NWRITEERR) {
3446 error = np->n_error;
3447 np->n_flag &= ~NWRITEERR;
3448 }
3449 done:
3450 if (bvec != NULL && bvec != bvec_on_stack)
3451 _FREE(bvec, M_TEMP);
3452 if (upls != NULL && upls != (upl_t *) upls_on_stack)
3453 _FREE(upls, M_TEMP);
3454 return (error);
3455 }
3456
3457 /*
3458 * Return POSIX pathconf information applicable to nfs.
3459 *
3460 * The NFS V2 protocol doesn't support this, so just return EINVAL
3461 * for V2.
3462 */
3463 /* ARGSUSED */
3464 static int
3465 nfs_pathconf(ap)
3466 struct vop_pathconf_args /* {
3467 struct vnode *a_vp;
3468 int a_name;
3469 int *a_retval;
3470 } */ *ap;
3471 {
3472
3473 return (EINVAL);
3474 }
3475
3476 /*
3477 * NFS advisory byte-level locks.
3478 * Currently unsupported.
3479 */
3480 static int
3481 nfs_advlock(ap)
3482 struct vop_advlock_args /* {
3483 struct vnode *a_vp;
3484 caddr_t a_id;
3485 int a_op;
3486 struct flock *a_fl;
3487 int a_flags;
3488 } */ *ap;
3489 {
3490 #ifdef __FreeBSD__
3491 register struct nfsnode *np = VTONFS(ap->a_vp);
3492
3493 /*
3494 * The following kludge is to allow diskless support to work
3495 * until a real NFS lockd is implemented. Basically, just pretend
3496 * that this is a local lock.
3497 */
3498 return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3499 #else
3500 #if DIAGNOSTIC
3501 printf("nfs_advlock: pid %d comm %s\n", current_proc()->p_pid, current_proc()->p_comm);
3502 #endif
3503 return (EOPNOTSUPP);
3504 #endif
3505 }
3506
3507 /*
3508 * Print out the contents of an nfsnode.
3509 */
3510 static int
3511 nfs_print(ap)
3512 struct vop_print_args /* {
3513 struct vnode *a_vp;
3514 } */ *ap;
3515 {
3516 register struct vnode *vp = ap->a_vp;
3517 register struct nfsnode *np = VTONFS(vp);
3518
3519 printf("tag VT_NFS, fileid %ld fsid 0x%lx",
3520 np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3521 if (vp->v_type == VFIFO)
3522 fifo_printinfo(vp);
3523 printf("\n");
3524 return (0);
3525 }
3526
3527 /*
3528 * NFS directory offset lookup.
3529 * Currently unsupported.
3530 */
3531 static int
3532 nfs_blkatoff(ap)
3533 struct vop_blkatoff_args /* {
3534 struct vnode *a_vp;
3535 off_t a_offset;
3536 char **a_res;
3537 struct buf **a_bpp;
3538 } */ *ap;
3539 {
3540
3541 #if DIAGNOSTIC
3542 printf("nfs_blkatoff: unimplemented!!");
3543 #endif
3544 return (EOPNOTSUPP);
3545 }
3546
3547 /*
3548 * NFS flat namespace allocation.
3549 * Currently unsupported.
3550 */
3551 static int
3552 nfs_valloc(ap)
3553 struct vop_valloc_args /* {
3554 struct vnode *a_pvp;
3555 int a_mode;
3556 struct ucred *a_cred;
3557 struct vnode **a_vpp;
3558 } */ *ap;
3559 {
3560
3561 return (EOPNOTSUPP);
3562 }
3563
3564 /*
3565 * NFS flat namespace free.
3566 * Currently unsupported.
3567 */
3568 static int
3569 nfs_vfree(ap)
3570 struct vop_vfree_args /* {
3571 struct vnode *a_pvp;
3572 ino_t a_ino;
3573 int a_mode;
3574 } */ *ap;
3575 {
3576
3577 #if DIAGNOSTIC
3578 printf("nfs_vfree: unimplemented!!");
3579 #endif
3580 return (EOPNOTSUPP);
3581 }
3582
3583 /*
3584 * NFS file truncation.
3585 */
3586 static int
3587 nfs_truncate(ap)
3588 struct vop_truncate_args /* {
3589 struct vnode *a_vp;
3590 off_t a_length;
3591 int a_flags;
3592 struct ucred *a_cred;
3593 struct proc *a_p;
3594 } */ *ap;
3595 {
3596
3597 /* Use nfs_setattr */
3598 #if DIAGNOSTIC
3599 printf("nfs_truncate: unimplemented!!");
3600 #endif
3601 return (EOPNOTSUPP);
3602 }
3603
3604 /*
3605 * NFS update.
3606 */
3607 static int
3608 nfs_update(ap)
3609 struct vop_update_args /* {
3610 struct vnode *a_vp;
3611 struct timeval *a_ta;
3612 struct timeval *a_tm;
3613 int a_waitfor;
3614 } */ *ap;
3615 {
3616
3617 /* Use nfs_setattr */
3618 #if DIAGNOSTIC
3619 printf("nfs_update: unimplemented!!");
3620 #endif
3621 return (EOPNOTSUPP);
3622 }
3623
3624 int nfs_aio_threads = 0; /* 1 per nfd (arbitrary) */
3625 struct slock nfs_aio_slock;
3626 TAILQ_HEAD(bqueues, buf) nfs_aio_bufq;
3627 int nfs_aio_bufq_len = 0; /* diagnostic only */
3628
3629 void
3630 nfs_aio_thread()
3631 { /* see comment below in nfs_bwrite() for some rationale */
3632 struct buf *bp;
3633 boolean_t funnel_state;
3634
3635 funnel_state = thread_funnel_set(kernel_flock, TRUE);
3636 for(;;) {
3637 simple_lock(&nfs_aio_slock);
3638 if ((bp = nfs_aio_bufq.tqh_first)) {
3639 TAILQ_REMOVE(&nfs_aio_bufq, bp, b_freelist);
3640 nfs_aio_bufq_len--;
3641 simple_unlock(&nfs_aio_slock);
3642 nfs_writebp(bp, 1);
3643 } else { /* nothing to do - goodnight */
3644 assert_wait(&nfs_aio_bufq, THREAD_UNINT);
3645 simple_unlock(&nfs_aio_slock);
3646 (void)tsleep((caddr_t)0, PRIBIO+1, "nfs_aio_bufq", 0);
3647 }
3648 }
3649 (void) thread_funnel_set(kernel_flock, FALSE);
3650 }
3651
3652
3653 void
3654 nfs_aio_thread_init()
3655 {
3656 if (nfs_aio_threads++ == 0) {
3657 simple_lock_init(&nfs_aio_slock);
3658 TAILQ_INIT(&nfs_aio_bufq);
3659 }
3660 kernel_thread(kernel_task, nfs_aio_thread);
3661 }
3662
3663
3664 /*
3665 * Just call nfs_writebp() with the force argument set to 1.
3666 */
3667 static int
3668 nfs_bwrite(ap)
3669 struct vop_bwrite_args /* {
3670 struct vnode *a_bp;
3671 } */ *ap;
3672 {
3673 extern void wakeup_one(caddr_t chan);
3674
3675 /*
3676 * nfs_writebp will issue a synchronous rpc to if B_ASYNC then
3677 * to avoid distributed deadlocks we handoff the write to the
3678 * nfs_aio threads. Doing so allows us to complete the
3679 * current request, rather than blocking on a server which may
3680 * be ourself (or blocked on ourself).
3681 *
3682 * Note the loopback deadlocks happened when the thread
3683 * invoking us was nfsd, and also when it was the pagedaemon.
3684 *
3685 * This solution has one known problem. If *ALL* buffers get
3686 * on the nfs_aio queue then no forward progress can be made
3687 * until one of those writes complete. And if the current
3688 * nfs_aio writes-in-progress block due to a non-responsive server we
3689 * are in a deadlock circle. Probably the cure is to limit the
3690 * async write concurrency in getnewbuf as in FreeBSD 3.2.
3691 */
3692 if (nfs_aio_threads && ISSET(ap->a_bp->b_flags, B_ASYNC)) {
3693 simple_lock(&nfs_aio_slock);
3694 nfs_aio_bufq_len++;
3695 TAILQ_INSERT_TAIL(&nfs_aio_bufq, ap->a_bp, b_freelist);
3696 simple_unlock(&nfs_aio_slock);
3697 wakeup_one((caddr_t)&nfs_aio_bufq);
3698 return (0);
3699 }
3700 return (nfs_writebp(ap->a_bp, 1));
3701 }
3702
3703 /*
3704 * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
3705 * the force flag is one and it also handles the B_NEEDCOMMIT flag.
3706 */
3707 int
3708 nfs_writebp(bp, force)
3709 register struct buf *bp;
3710 int force;
3711 {
3712 int s;
3713 register int oldflags = bp->b_flags, retv = 1;
3714 off_t off;
3715 upl_t upl;
3716 kern_return_t kret;
3717 struct vnode *vp = bp->b_vp;
3718 upl_page_info_t *pl;
3719
3720 if(!ISSET(bp->b_flags, B_BUSY))
3721 panic("nfs_writebp: buffer is not busy???");
3722
3723 s = splbio();
3724 CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI));
3725
3726 if (ISSET(oldflags, (B_ASYNC|B_DELWRI))) {
3727 reassignbuf(bp, vp);
3728 }
3729
3730 vp->v_numoutput++;
3731 current_proc()->p_stats->p_ru.ru_oublock++;
3732 splx(s);
3733
3734 /*
3735 * Since the B_BUSY flag is set, we need to lock the page before doing nfs_commit.
3736 * Otherwise we may block and get a busy incore pages during a vm pageout.
3737 * Move the existing code up before the commit.
3738 */
3739
3740 if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp)) {
3741
3742 if (!ISSET(bp->b_flags, B_PAGELIST)) {
3743 kret = ubc_create_upl(vp,
3744 ubc_blktooff(vp, bp->b_lblkno),
3745 bp->b_bufsize,
3746 &upl,
3747 &pl,
3748 UPL_PRECIOUS);
3749 if (kret != KERN_SUCCESS) {
3750 panic("nfs_writebp: get pagelists failed with (%d)", kret);
3751 }
3752
3753 #ifdef UBC_DEBUG
3754 upl_ubc_alias_set(upl, ioaddr, 2);
3755 #endif /* UBC_DEBUG */
3756
3757 s = splbio();
3758
3759 bp->b_pagelist = upl;
3760 SET(bp->b_flags, B_PAGELIST);
3761 splx(s);
3762
3763 kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
3764 if (kret != KERN_SUCCESS) {
3765 panic("nfs_writebp: ubc_upl_map() failed with (%d)", kret);
3766 }
3767 if(bp->b_data == 0)
3768 panic("nfs_writebp: upl_map mapped 0");
3769
3770 if (!upl_page_present(pl, 0)) {
3771 /*
3772 * may be the page got paged out.
3773 * let's just read it in. It is marked
3774 * busy so we should not have any one
3775 * yanking this page underneath the fileIO
3776 */
3777 panic("nfs_writebp: nopage");
3778 }
3779 }
3780 }
3781
3782 /*
3783 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
3784 * an actual write will have to be scheduled via. VOP_STRATEGY().
3785 * If B_WRITEINPROG is already set, then push it with a write anyhow.
3786 */
3787 if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
3788 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
3789 SET(bp->b_flags, B_WRITEINPROG);
3790 retv = nfs_commit(vp, off, bp->b_dirtyend-bp->b_dirtyoff,
3791 bp->b_wcred, bp->b_proc);
3792 CLR(bp->b_flags, B_WRITEINPROG);
3793 if (!retv) {
3794 bp->b_dirtyoff = bp->b_dirtyend = 0;
3795 CLR(bp->b_flags, B_NEEDCOMMIT);
3796 biodone(bp); /* on B_ASYNC will brelse the buffer */
3797
3798 } else if (retv == NFSERR_STALEWRITEVERF)
3799 nfs_clearcommit(vp->v_mount);
3800 }
3801 if (retv) {
3802 if (force)
3803 SET(bp->b_flags, B_WRITEINPROG);
3804 VOP_STRATEGY(bp);
3805 }
3806
3807 if( (oldflags & B_ASYNC) == 0) {
3808 int rtval = biowait(bp);
3809
3810 if (oldflags & B_DELWRI) {
3811 s = splbio();
3812 reassignbuf(bp, vp);
3813 splx(s);
3814 }
3815 brelse(bp);
3816 return (rtval);
3817 }
3818
3819 return (0);
3820 }
3821
3822 /*
3823 * nfs special file access vnode op.
3824 * Essentially just get vattr and then imitate iaccess() since the device is
3825 * local to the client.
3826 */
3827 static int
3828 nfsspec_access(ap)
3829 struct vop_access_args /* {
3830 struct vnode *a_vp;
3831 int a_mode;
3832 struct ucred *a_cred;
3833 struct proc *a_p;
3834 } */ *ap;
3835 {
3836 register struct vattr *vap;
3837 register gid_t *gp;
3838 register struct ucred *cred = ap->a_cred;
3839 struct vnode *vp = ap->a_vp;
3840 mode_t mode = ap->a_mode;
3841 struct vattr vattr;
3842 register int i;
3843 int error;
3844
3845 /*
3846 * Disallow write attempts on filesystems mounted read-only;
3847 * unless the file is a socket, fifo, or a block or character
3848 * device resident on the filesystem.
3849 */
3850 if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3851 switch (vp->v_type) {
3852 case VREG: case VDIR: case VLNK:
3853 return (EROFS);
3854 }
3855 }
3856 /*
3857 * If you're the super-user,
3858 * you always get access.
3859 */
3860 if (cred->cr_uid == 0)
3861 return (0);
3862 vap = &vattr;
3863 error = VOP_GETATTR(vp, vap, cred, ap->a_p);
3864 if (error)
3865 return (error);
3866 /*
3867 * Access check is based on only one of owner, group, public.
3868 * If not owner, then check group. If not a member of the
3869 * group, then check public access.
3870 */
3871 if (cred->cr_uid != vap->va_uid) {
3872 mode >>= 3;
3873 gp = cred->cr_groups;
3874 for (i = 0; i < cred->cr_ngroups; i++, gp++)
3875 if (vap->va_gid == *gp)
3876 goto found;
3877 mode >>= 3;
3878 found:
3879 ;
3880 }
3881 error = (vap->va_mode & mode) == mode ? 0 : EACCES;
3882 return (error);
3883 }
3884
3885 /*
3886 * Read wrapper for special devices.
3887 */
3888 static int
3889 nfsspec_read(ap)
3890 struct vop_read_args /* {
3891 struct vnode *a_vp;
3892 struct uio *a_uio;
3893 int a_ioflag;
3894 struct ucred *a_cred;
3895 } */ *ap;
3896 {
3897 register struct nfsnode *np = VTONFS(ap->a_vp);
3898
3899 /*
3900 * Set access flag.
3901 */
3902 np->n_flag |= NACC;
3903 np->n_atim.tv_sec = time.tv_sec;
3904 np->n_atim.tv_nsec = time.tv_usec * 1000;
3905 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
3906 }
3907
3908 /*
3909 * Write wrapper for special devices.
3910 */
3911 static int
3912 nfsspec_write(ap)
3913 struct vop_write_args /* {
3914 struct vnode *a_vp;
3915 struct uio *a_uio;
3916 int a_ioflag;
3917 struct ucred *a_cred;
3918 } */ *ap;
3919 {
3920 register struct nfsnode *np = VTONFS(ap->a_vp);
3921
3922 /*
3923 * Set update flag.
3924 */
3925 np->n_flag |= NUPD;
3926 np->n_mtim.tv_sec = time.tv_sec;
3927 np->n_mtim.tv_nsec = time.tv_usec * 1000;
3928 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
3929 }
3930
3931 /*
3932 * Close wrapper for special devices.
3933 *
3934 * Update the times on the nfsnode then do device close.
3935 */
3936 static int
3937 nfsspec_close(ap)
3938 struct vop_close_args /* {
3939 struct vnode *a_vp;
3940 int a_fflag;
3941 struct ucred *a_cred;
3942 struct proc *a_p;
3943 } */ *ap;
3944 {
3945 register struct vnode *vp = ap->a_vp;
3946 register struct nfsnode *np = VTONFS(vp);
3947 struct vattr vattr;
3948
3949 if (np->n_flag & (NACC | NUPD)) {
3950 np->n_flag |= NCHG;
3951 if (vp->v_usecount == 1 &&
3952 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3953 VATTR_NULL(&vattr);
3954 if (np->n_flag & NACC)
3955 vattr.va_atime = np->n_atim;
3956 if (np->n_flag & NUPD)
3957 vattr.va_mtime = np->n_mtim;
3958 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3959 }
3960 }
3961 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
3962 }
3963
3964 /*
3965 * Read wrapper for fifos.
3966 */
3967 static int
3968 nfsfifo_read(ap)
3969 struct vop_read_args /* {
3970 struct vnode *a_vp;
3971 struct uio *a_uio;
3972 int a_ioflag;
3973 struct ucred *a_cred;
3974 } */ *ap;
3975 {
3976 extern vop_t **fifo_vnodeop_p;
3977 register struct nfsnode *np = VTONFS(ap->a_vp);
3978
3979 /*
3980 * Set access flag.
3981 */
3982 np->n_flag |= NACC;
3983 np->n_atim.tv_sec = time.tv_sec;
3984 np->n_atim.tv_nsec = time.tv_usec * 1000;
3985 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
3986 }
3987
3988 /*
3989 * Write wrapper for fifos.
3990 */
3991 static int
3992 nfsfifo_write(ap)
3993 struct vop_write_args /* {
3994 struct vnode *a_vp;
3995 struct uio *a_uio;
3996 int a_ioflag;
3997 struct ucred *a_cred;
3998 } */ *ap;
3999 {
4000 extern vop_t **fifo_vnodeop_p;
4001 register struct nfsnode *np = VTONFS(ap->a_vp);
4002
4003 /*
4004 * Set update flag.
4005 */
4006 np->n_flag |= NUPD;
4007 np->n_mtim.tv_sec = time.tv_sec;
4008 np->n_mtim.tv_nsec = time.tv_usec * 1000;
4009 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
4010 }
4011
4012 /*
4013 * Close wrapper for fifos.
4014 *
4015 * Update the times on the nfsnode then do fifo close.
4016 */
4017 static int
4018 nfsfifo_close(ap)
4019 struct vop_close_args /* {
4020 struct vnode *a_vp;
4021 int a_fflag;
4022 struct ucred *a_cred;
4023 struct proc *a_p;
4024 } */ *ap;
4025 {
4026 register struct vnode *vp = ap->a_vp;
4027 register struct nfsnode *np = VTONFS(vp);
4028 struct vattr vattr;
4029 extern vop_t **fifo_vnodeop_p;
4030
4031 if (np->n_flag & (NACC | NUPD)) {
4032 if (np->n_flag & NACC) {
4033 np->n_atim.tv_sec = time.tv_sec;
4034 np->n_atim.tv_nsec = time.tv_usec * 1000;
4035 }
4036 if (np->n_flag & NUPD) {
4037 np->n_mtim.tv_sec = time.tv_sec;
4038 np->n_mtim.tv_nsec = time.tv_usec * 1000;
4039 }
4040 np->n_flag |= NCHG;
4041 if (vp->v_usecount == 1 &&
4042 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
4043 VATTR_NULL(&vattr);
4044 if (np->n_flag & NACC)
4045 vattr.va_atime = np->n_atim;
4046 if (np->n_flag & NUPD)
4047 vattr.va_mtime = np->n_mtim;
4048 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
4049 }
4050 }
4051 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
4052 }
4053
4054 static int
4055 nfs_ioctl(ap)
4056 struct vop_ioctl_args *ap;
4057 {
4058
4059 /*
4060 * XXX we were once bogusly enoictl() which returned this (ENOTTY).
4061 * Probably we should return ENODEV.
4062 */
4063 return (ENOTTY);
4064 }
4065
4066 static int
4067 nfs_select(ap)
4068 struct vop_select_args *ap;
4069 {
4070
4071 /*
4072 * We were once bogusly seltrue() which returns 1. Is this right?
4073 */
4074 return (1);
4075 }
4076
4077 /* XXX Eliminate use of struct bp here */
4078 /*
4079 * Vnode op for pagein using getblk_pages
4080 * derived from nfs_bioread()
4081 * No read aheads are started from pagein operation
4082 */
4083 static int
4084 nfs_pagein(ap)
4085 struct vop_pagein_args /* {
4086 struct vnode *a_vp,
4087 upl_t a_pl,
4088 vm_offset_t a_pl_offset,
4089 off_t a_f_offset,
4090 size_t a_size,
4091 struct ucred *a_cred,
4092 int a_flags
4093 } */ *ap;
4094 {
4095 register struct vnode *vp = ap->a_vp;
4096 upl_t pl = ap->a_pl;
4097 size_t size= ap->a_size;
4098 off_t f_offset = ap->a_f_offset;
4099 vm_offset_t pl_offset = ap->a_pl_offset;
4100 int flags = ap->a_flags;
4101 struct ucred *cred;
4102 register struct nfsnode *np = VTONFS(vp);
4103 register int biosize;
4104 register int xsize;
4105 struct vattr vattr;
4106 struct proc *p = current_proc();
4107 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4108 int error = 0;
4109 vm_offset_t ioaddr;
4110 struct uio auio;
4111 struct iovec aiov;
4112 struct uio * uio = &auio;
4113 int nocommit = flags & UPL_NOCOMMIT;
4114
4115 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
4116 (int)f_offset, size, pl, pl_offset, 0);
4117
4118 if (UBCINVALID(vp)) {
4119 #if DIAGNOSTIC
4120 panic("nfs_pagein: invalid vp");
4121 #endif /* DIAGNOSTIC */
4122 return (EPERM);
4123 }
4124
4125 UBCINFOCHECK("nfs_pagein", vp);
4126 if(pl == (upl_t)NULL) {
4127 panic("nfs_pagein: no upl");
4128 }
4129
4130 cred = ubc_getcred(vp);
4131 if (cred == NOCRED)
4132 cred = ap->a_cred;
4133
4134 if (size <= 0)
4135 return (EINVAL);
4136
4137 if (f_offset < 0 || f_offset >= np->n_size
4138 || (f_offset & PAGE_MASK_64)) {
4139 if (!nocommit)
4140 ubc_upl_abort_range(pl, pl_offset, size,
4141 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4142 return (EINVAL);
4143 }
4144
4145 auio.uio_iov = &aiov;
4146 auio.uio_iovcnt = 1;
4147 auio.uio_offset = f_offset;
4148 auio.uio_segflg = UIO_SYSSPACE;
4149 auio.uio_rw = UIO_READ;
4150 auio.uio_procp = NULL;
4151
4152
4153 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
4154 (void)nfs_fsinfo(nmp, vp, cred, p);
4155 biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
4156
4157 if (biosize & PAGE_MASK)
4158 panic("nfs_pagein(%x): biosize not page aligned", biosize);
4159
4160 #if 0 /* Why bother? */
4161 /* DO NOT BOTHER WITH "approximately maintained cache consistency" */
4162 /* Does not make sense in paging paths -- Umesh*/
4163 /*
4164 * For nfs, cache consistency can only be maintained approximately.
4165 * Although RFC1094 does not specify the criteria, the following is
4166 * believed to be compatible with the reference port.
4167 * For nqnfs, full cache consistency is maintained within the loop.
4168 * For nfs:
4169 * If the file's modify time on the server has changed since the
4170 * last read rpc or you have written to the file,
4171 * you may have lost data cache consistency with the
4172 * server, so flush all of the file's data out of the cache.
4173 * Then force a getattr rpc to ensure that you have up to date
4174 * attributes.
4175 * NB: This implies that cache data can be read when up to
4176 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
4177 * attributes this could be forced by setting n_attrstamp to 0 before
4178 * the VOP_GETATTR() call.
4179 */
4180 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
4181 if (np->n_flag & NMODIFIED) {
4182 np->n_attrstamp = 0;
4183 error = VOP_GETATTR(vp, &vattr, cred, p);
4184 if (error) {
4185 if (!nocommit)
4186 ubc_upl_abort_range(pl, pl_offset, size,
4187 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4188 return (error);
4189 }
4190 np->n_mtime = vattr.va_mtime.tv_sec;
4191 } else {
4192 error = VOP_GETATTR(vp, &vattr, cred, p);
4193 if (error){
4194 if (!nocommit)
4195 ubc_upl_abort_range(pl, pl_offset, size,
4196 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4197 return (error);
4198 }
4199 if (np->n_mtime != vattr.va_mtime.tv_sec) {
4200 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4201 if (error){
4202 if (!nocommit)
4203 ubc_upl_abort_range(pl, pl_offset, size,
4204 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4205 return (error);
4206 }
4207 np->n_mtime = vattr.va_mtime.tv_sec;
4208 }
4209 }
4210 }
4211 #endif 0 /* Why bother? */
4212
4213 ubc_upl_map(pl, &ioaddr);
4214 ioaddr += pl_offset;
4215 xsize = size;
4216
4217 do {
4218 uio->uio_resid = min(biosize, xsize);
4219 aiov.iov_len = uio->uio_resid;
4220 aiov.iov_base = (caddr_t)ioaddr;
4221
4222 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
4223 (int)uio->uio_offset, uio->uio_resid, ioaddr, xsize, 0);
4224
4225 #warning nfs_pagein does not support NQNFS yet.
4226 #if 0 /* why bother? */
4227 /* NO RESOURCES TO FIX NQNFS CASE */
4228 /* We need to deal with this later -- Umesh */
4229 /*
4230 * Get a valid lease. If cached data is stale, flush it.
4231 */
4232 if (nmp->nm_flag & NFSMNT_NQNFS) {
4233 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
4234 do {
4235 error = nqnfs_getlease(vp, ND_READ, cred, p);
4236 } while (error == NQNFS_EXPIRED);
4237 if (error){
4238 ubc_upl_unmap(pl);
4239 if (!nocommit)
4240 ubc_upl_abort_range(pl, pl_offset, size,
4241 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4242
4243 return (error);
4244 }
4245 if (np->n_lrev != np->n_brev ||
4246 (np->n_flag & NQNFSNONCACHE)) {
4247 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4248 if (error) {
4249 ubc_upl_unmap(pl);
4250 if (!nocommit)
4251 ubc_upl_abort_range(pl, pl_offset, size,
4252 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4253 return (error);
4254 }
4255 np->n_brev = np->n_lrev;
4256 }
4257 }
4258 }
4259 #endif 0 /* why bother? */
4260
4261 if (np->n_flag & NQNFSNONCACHE) {
4262 error = nfs_readrpc(vp, uio, cred);
4263 ubc_upl_unmap(pl);
4264
4265 if (!nocommit) {
4266 if(error)
4267 ubc_upl_abort_range(pl, pl_offset, size,
4268 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4269 else
4270 ubc_upl_commit_range(pl, pl_offset, size,
4271 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
4272 }
4273 return (error);
4274 }
4275
4276 /*
4277 * With UBC we get here only when the file data is not in the VM
4278 * page cache, so go ahead and read in.
4279 */
4280 #ifdef UBC_DEBUG
4281 upl_ubc_alias_set(pl, ioaddr, 2);
4282 #endif /* UBC_DEBUG */
4283 nfsstats.pageins++;
4284 error = nfs_readrpc(vp, uio, cred);
4285
4286 if (!error) {
4287 int zoff;
4288 int zcnt;
4289
4290 if (uio->uio_resid) {
4291 /*
4292 * If uio_resid > 0, there is a hole in the file and
4293 * no writes after the hole have been pushed to
4294 * the server yet... or we're at the EOF
4295 * Just zero fill the rest of the valid area.
4296 */
4297 zcnt = uio->uio_resid;
4298 zoff = biosize - zcnt;
4299 bzero((char *)ioaddr + zoff, zcnt);
4300
4301 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 324)) | DBG_FUNC_NONE,
4302 (int)uio->uio_offset, zoff, zcnt, ioaddr, 0);
4303
4304 uio->uio_offset += zcnt;
4305 }
4306 ioaddr += biosize;
4307 xsize -= biosize;
4308 } else
4309 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE,
4310 (int)uio->uio_offset, uio->uio_resid, error, -1, 0);
4311
4312 if (p && (vp->v_flag & VTEXT) &&
4313 (((nmp->nm_flag & NFSMNT_NQNFS) &&
4314 NQNFS_CKINVALID(vp, np, ND_READ) &&
4315 np->n_lrev != np->n_brev) ||
4316 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
4317 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
4318 uprintf("Process killed due to text file modification\n");
4319 psignal(p, SIGKILL);
4320 p->p_flag |= P_NOSWAP;
4321 }
4322
4323 } while (error == 0 && xsize > 0);
4324
4325 ubc_upl_unmap(pl);
4326
4327 if (!nocommit) {
4328 if (error)
4329 ubc_upl_abort_range(pl, pl_offset, size,
4330 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4331 else
4332 ubc_upl_commit_range(pl, pl_offset, size,
4333 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
4334 }
4335
4336 return (error);
4337 }
4338
4339
4340
4341 /*
4342 * Vnode op for pageout using UPL
4343 * Derived from nfs_write()
4344 * File size changes are not permitted in pageout.
4345 */
4346 static int
4347 nfs_pageout(ap)
4348 struct vop_pageout_args /* {
4349 struct vnode *a_vp,
4350 upl_t a_pl,
4351 vm_offset_t a_pl_offset,
4352 off_t a_f_offset,
4353 size_t a_size,
4354 struct ucred *a_cred,
4355 int a_flags
4356 } */ *ap;
4357 {
4358 register struct vnode *vp = ap->a_vp;
4359 upl_t pl = ap->a_pl;
4360 size_t size= ap->a_size;
4361 off_t f_offset = ap->a_f_offset;
4362 vm_offset_t pl_offset = ap->a_pl_offset;
4363 int flags = ap->a_flags;
4364 int ioflag = ap->a_flags;
4365 register int biosize;
4366 struct proc *p = current_proc();
4367 struct nfsnode *np = VTONFS(vp);
4368 register struct ucred *cred;
4369 struct buf *bp;
4370 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4371 daddr_t lbn;
4372 int bufsize;
4373 int n = 0, on, error = 0, iomode, must_commit, s;
4374 off_t off;
4375 vm_offset_t ioaddr;
4376 struct uio auio;
4377 struct iovec aiov;
4378 struct uio * uio = &auio;
4379 int nocommit = flags & UPL_NOCOMMIT;
4380 int iosize;
4381 int pgsize;
4382
4383 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 323)) | DBG_FUNC_NONE,
4384 (int)f_offset, size, pl, pl_offset, 0);
4385
4386 if (UBCINVALID(vp)) {
4387 #if DIAGNOSTIC
4388 panic("nfs_pageout: invalid vnode");
4389 #endif
4390 return (EIO);
4391 }
4392 UBCINFOCHECK("nfs_pageout", vp);
4393
4394 if (size <= 0)
4395 return (EINVAL);
4396
4397 if (pl == (upl_t)NULL) {
4398 panic("nfs_pageout: no upl");
4399 }
4400
4401 /*
4402 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
4403 * will be the same size within a filesystem. nfs_writerpc will
4404 * still use nm_wsize when sizing the rpc's.
4405 */
4406 biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
4407
4408 if (biosize & PAGE_MASK)
4409 panic("nfs_pageout(%x): biosize not page aligned", biosize);
4410
4411
4412 /*
4413 * Check to see whether the buffer is incore
4414 * If incore and not busy invalidate it from the cache
4415 * we should not find it BUSY, since we always do a
4416 * vm_fault_list_request in 'getblk' before returning
4417 * which would block on the page busy status
4418 */
4419 lbn = f_offset / PAGE_SIZE; /* to match the size getblk uses */
4420
4421 for (iosize = size; iosize > 0; iosize -= PAGE_SIZE, lbn++) {
4422
4423 s = splbio();
4424 if (bp = incore(vp, lbn)) {
4425 if (ISSET(bp->b_flags, B_BUSY)) {
4426 /* don't panic incore. just tell vm we are busy */
4427 (void) ubc_upl_abort(pl, NULL);
4428 return(EBUSY);
4429 };
4430
4431 bremfree(bp);
4432 SET(bp->b_flags, (B_BUSY | B_INVAL));
4433 brelse(bp);
4434 }
4435 splx(s);
4436 }
4437
4438 cred = ubc_getcred(vp);
4439 if (cred == NOCRED)
4440 cred = ap->a_cred;
4441
4442 if (np->n_flag & NWRITEERR) {
4443 np->n_flag &= ~NWRITEERR;
4444 if (!nocommit)
4445 ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
4446 return (np->n_error);
4447 }
4448 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
4449 (void)nfs_fsinfo(nmp, vp, cred, p);
4450
4451 if (f_offset < 0 || f_offset >= np->n_size ||
4452 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
4453 if (!nocommit)
4454 ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
4455 return (EINVAL);
4456 }
4457
4458 ubc_upl_map(pl, &ioaddr);
4459
4460 if ((f_offset + size) > np->n_size)
4461 iosize = np->n_size - f_offset;
4462 else
4463 iosize = size;
4464
4465 pgsize = (iosize + (PAGE_SIZE - 1)) & ~PAGE_MASK;
4466
4467 if (size > pgsize) {
4468 if (!nocommit)
4469 ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
4470 UPL_ABORT_FREE_ON_EMPTY);
4471 }
4472 auio.uio_iov = &aiov;
4473 auio.uio_iovcnt = 1;
4474 auio.uio_offset = f_offset;
4475 auio.uio_segflg = UIO_SYSSPACE;
4476 auio.uio_rw = UIO_READ;
4477 auio.uio_resid = iosize;
4478 auio.uio_procp = NULL;
4479
4480 aiov.iov_len = iosize;
4481 aiov.iov_base = (caddr_t)ioaddr + pl_offset;
4482
4483 /*
4484 * check for partial page and clear the
4485 * contents past end of the file before
4486 * releasing it in the VM page cache
4487 */
4488 if ((f_offset < np->n_size) && (f_offset + size) > np->n_size) {
4489 size_t io = np->n_size - f_offset;
4490
4491 bzero((caddr_t)(ioaddr + pl_offset + io), size - io);
4492
4493 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 321)) | DBG_FUNC_NONE,
4494 (int)np->n_size, (int)f_offset, (int)f_offset + io, size - io, 0);
4495 }
4496
4497 do {
4498
4499 #warning nfs_pageout does not support NQNFS yet.
4500 #if 0 /* why bother? */
4501 /* NO RESOURCES TO FIX NQNFS CASE */
4502 /* We need to deal with this later -- Umesh */
4503
4504 /*
4505 * Check for a valid write lease.
4506 */
4507 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
4508 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
4509 do {
4510 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
4511 } while (error == NQNFS_EXPIRED);
4512 if (error) {
4513 ubc_upl_unmap(pl);
4514 if (!nocommit)
4515 ubc_upl_abort_range(pl, pl_offset, size,
4516 UPL_ABORT_FREE_ON_EMPTY);
4517 return (error);
4518 }
4519 if (np->n_lrev != np->n_brev ||
4520 (np->n_flag & NQNFSNONCACHE)) {
4521 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4522 if (error) {
4523 ubc_upl_unmap(pl);
4524 if (!nocommit)
4525 ubc_upl_abort_range(pl, pl_offset, size,
4526 UPL_ABORT_FREE_ON_EMPTY);
4527 return (error);
4528 }
4529 np->n_brev = np->n_lrev;
4530 }
4531 }
4532 #endif 0 /* why bother? */
4533
4534 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
4535 iomode = NFSV3WRITE_FILESYNC;
4536 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
4537 if (must_commit)
4538 nfs_clearcommit(vp->v_mount);
4539 ubc_upl_unmap(pl);
4540
4541 /* copied from non-nqnfs case below. see there for comments */
4542 if (!nocommit) {
4543 if (error) {
4544 int abortflags;
4545 short action = nfs_pageouterrorhandler(error);
4546
4547 switch (action) {
4548 case DUMP:
4549 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
4550 break;
4551 case DUMPANDLOG:
4552 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
4553 if ((error <= ELAST) && (errorcount[error] % 100 == 0))
4554 printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
4555 errorcount[error]++;
4556 break;
4557 case RETRY:
4558 abortflags = UPL_ABORT_FREE_ON_EMPTY;
4559 break;
4560 case RETRYWITHSLEEP:
4561 abortflags = UPL_ABORT_FREE_ON_EMPTY;
4562 (void) tsleep(&lbolt, PSOCK, "nfspageout", 0); /* pri unused. PSOCK for placeholder. */
4563 break;
4564 case SEVER: /* not implemented */
4565 default:
4566 printf("nfs_pageout: action %d not expected\n", action);
4567 break;
4568 }
4569
4570 ubc_upl_abort_range(pl, pl_offset, size, abortflags);
4571 /* return error in all cases above */
4572
4573 } else
4574 ubc_upl_commit_range(pl,
4575 pl_offset, size,
4576 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
4577 }
4578 return (error); /* note this early return */
4579 }
4580
4581 nfsstats.pageouts++;
4582 lbn = uio->uio_offset / biosize;
4583 on = uio->uio_offset & (biosize-1);
4584 n = min((unsigned)(biosize - on), uio->uio_resid);
4585 again:
4586 bufsize = biosize;
4587 #if 0
4588 if ((lbn + 1) * biosize > np->n_size) {
4589 bufsize = np->n_size - lbn * biosize;
4590 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
4591 }
4592 #endif
4593 vp->v_numoutput++;
4594
4595 np->n_flag |= NMODIFIED;
4596
4597 #if 0 /* why bother? */
4598 /* NO RESOURCES TO FIX NQNFS CASE */
4599 /* We need to deal with this later -- Umesh */
4600 /*
4601 * Check for valid write lease and get one as required.
4602 * In case getblk() and/or bwrite() delayed us.
4603 */
4604 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
4605 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
4606 do {
4607 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
4608 } while (error == NQNFS_EXPIRED);
4609 if (error)
4610 goto cleanup;
4611
4612 if (np->n_lrev != np->n_brev ||
4613 (np->n_flag & NQNFSNONCACHE)) {
4614 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4615 if (error) {
4616 ubc_upl_unmap(pl);
4617 if (!nocommit)
4618 ubc_upl_abort_range(pl, pl_offset, size,
4619 UPL_ABORT_FREE_ON_EMPTY);
4620
4621 return (error);
4622 }
4623 np->n_brev = np->n_lrev;
4624 goto again;
4625 }
4626 }
4627 #endif 0 /* why bother? */
4628
4629 iomode = NFSV3WRITE_FILESYNC;
4630 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
4631 if (must_commit)
4632 nfs_clearcommit(vp->v_mount);
4633 vp->v_numoutput--;
4634
4635 if (error)
4636 goto cleanup;
4637
4638 if (n > 0) {
4639 uio->uio_resid -= n;
4640 uio->uio_offset += n;
4641 uio->uio_iov->iov_base += n;
4642 uio->uio_iov->iov_len -= n;
4643 }
4644 } while (uio->uio_resid > 0 && n > 0);
4645
4646 cleanup:
4647 ubc_upl_unmap(pl);
4648 /*
4649 * We've had several different solutions on what to do when the pageout
4650 * gets an error. If we don't handle it, and return an error to the
4651 * caller, vm, it will retry . This can end in endless looping
4652 * between vm and here doing retries of the same page. Doing a dump
4653 * back to vm, will get it out of vm's knowledge and we lose whatever
4654 * data existed. This is risky, but in some cases necessary. For
4655 * example, the initial fix here was to do that for ESTALE. In that case
4656 * the server is telling us that the file is no longer the same. We
4657 * would not want to keep paging out to that. We also saw some 151
4658 * errors from Auspex server and NFSv3 can return errors higher than
4659 * ELAST. Those along with NFS known server errors we will "dump" from vm.
4660 * Errors we don't expect to occur, we dump and log for further
4661 * analysis. Errors that could be transient, networking ones,
4662 * we let vm "retry". Lastly, errors that we retry, but may have potential
4663 * to storm the network, we "retrywithsleep". "sever" will be used in
4664 * in the future to dump all pages of object for cases like ESTALE.
4665 * All this is the basis for the states returned and first guesses on
4666 * error handling. Tweaking expected as more statistics are gathered.
4667 * Note, in the long run we may need another more robust solution to
4668 * have some kind of persistant store when the vm cannot dump nor keep
4669 * retrying as a solution, but this would be a file architectural change.
4670 */
4671
4672 if (!nocommit) { /* otherwise stacked file system has to handle this */
4673 if (error) {
4674 int abortflags;
4675 short action = nfs_pageouterrorhandler(error);
4676
4677 switch (action) {
4678 case DUMP:
4679 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
4680 break;
4681 case DUMPANDLOG:
4682 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
4683 if ((error <= ELAST) && (errorcount[error] % 100 == 0))
4684 printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
4685 errorcount[error]++;
4686 break;
4687 case RETRY:
4688 abortflags = UPL_ABORT_FREE_ON_EMPTY;
4689 break;
4690 case RETRYWITHSLEEP:
4691 abortflags = UPL_ABORT_FREE_ON_EMPTY;
4692 (void) tsleep(&lbolt, PSOCK, "nfspageout", 0); /* pri unused. PSOCK for placeholder. */
4693 break;
4694 case SEVER: /* not implemented */
4695 default:
4696 printf("nfs_pageout: action %d not expected\n", action);
4697 break;
4698 }
4699
4700 ubc_upl_abort_range(pl, pl_offset, size, abortflags);
4701 /* return error in all cases above */
4702
4703 } else
4704 ubc_upl_commit_range(pl, pl_offset, pgsize,
4705 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
4706 }
4707 return (error);
4708 }
4709
4710 /* Blktooff derives file offset given a logical block number */
4711 static int
4712 nfs_blktooff(ap)
4713 struct vop_blktooff_args /* {
4714 struct vnode *a_vp;
4715 daddr_t a_lblkno;
4716 off_t *a_offset;
4717 } */ *ap;
4718 {
4719 int biosize;
4720 register struct vnode *vp = ap->a_vp;
4721
4722 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* nfs_bio.c */
4723
4724 *ap->a_offset = (off_t)(ap->a_lblkno * biosize);
4725
4726 return (0);
4727 }
4728
4729 /* Blktooff derives file offset given a logical block number */
4730 static int
4731 nfs_offtoblk(ap)
4732 struct vop_offtoblk_args /* {
4733 struct vnode *a_vp;
4734 off_t a_offset;
4735 daddr_t *a_lblkno;
4736 } */ *ap;
4737 {
4738 int biosize;
4739 register struct vnode *vp = ap->a_vp;
4740
4741 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* nfs_bio.c */
4742
4743 *ap->a_lblkno = (daddr_t)(ap->a_offset / biosize);
4744
4745 return (0);
4746 }
4747 static int
4748 nfs_cmap(ap)
4749 struct vop_cmap_args /* {
4750 struct vnode *a_vp;
4751 off_t a_offset;
4752 size_t a_size;
4753 daddr_t *a_bpn;
4754 size_t *a_run;
4755 void *a_poff;
4756 } */ *ap;
4757 {
4758 return (EOPNOTSUPP);
4759 }