]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_vnops.c
xnu-344.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_vnops.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
59 * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
60 */
61
62
63/*
64 * vnode op calls for Sun NFS version 2 and 3
65 */
1c79356b
A
66#include <sys/param.h>
67#include <sys/kernel.h>
68#include <sys/systm.h>
69#include <sys/resourcevar.h>
70#include <sys/proc.h>
71#include <sys/mount.h>
72#include <sys/buf.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/conf.h>
76#include <sys/namei.h>
77#include <sys/vnode.h>
78#include <sys/dirent.h>
79#include <sys/fcntl.h>
80#include <sys/lockf.h>
81#include <sys/ubc.h>
82
83#include <ufs/ufs/dir.h>
84#include <vfs/vfs_support.h>
85
86#include <sys/vm.h>
87#include <machine/spl.h>
88#include <vm/vm_pageout.h>
89
90#include <sys/time.h>
91#include <kern/clock.h>
92
93#include <miscfs/fifofs/fifo.h>
94#include <miscfs/specfs/specdev.h>
95
96#include <nfs/rpcv2.h>
97#include <nfs/nfsproto.h>
98#include <nfs/nfs.h>
99#include <nfs/nfsnode.h>
100#include <nfs/nfsmount.h>
101#include <nfs/xdr_subs.h>
102#include <nfs/nfsm_subs.h>
103#include <nfs/nqnfs.h>
104
105#include <net/if.h>
106#include <netinet/in.h>
107#include <netinet/in_var.h>
1c79356b
A
108#include <vm/vm_kern.h>
109
9bccf70c
A
110#include <kern/task.h>
111#include <kern/sched_prim.h>
112
1c79356b
A
113#include <sys/kdebug.h>
114
fa4905b1
A
115#define FSDBG(A, B, C, D, E) \
116 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
117 (int)(B), (int)(C), (int)(D), (int)(E), 0)
118#define FSDBG_TOP(A, B, C, D, E) \
119 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
120 (int)(B), (int)(C), (int)(D), (int)(E), 0)
121#define FSDBG_BOT(A, B, C, D, E) \
122 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
123 (int)(B), (int)(C), (int)(D), (int)(E), 0)
124
1c79356b
A
125#define TRUE 1
126#define FALSE 0
127
128static int nfsspec_read __P((struct vop_read_args *));
129static int nfsspec_write __P((struct vop_write_args *));
130static int nfsfifo_read __P((struct vop_read_args *));
131static int nfsfifo_write __P((struct vop_write_args *));
132static int nfsspec_close __P((struct vop_close_args *));
133static int nfsfifo_close __P((struct vop_close_args *));
134#define nfs_poll vop_nopoll
135static int nfs_ioctl __P((struct vop_ioctl_args *));
136static int nfs_select __P((struct vop_select_args *));
137static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
138static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
139static int nfs_lookup __P((struct vop_lookup_args *));
140static int nfs_create __P((struct vop_create_args *));
141static int nfs_mknod __P((struct vop_mknod_args *));
142static int nfs_open __P((struct vop_open_args *));
143static int nfs_close __P((struct vop_close_args *));
144static int nfs_access __P((struct vop_access_args *));
145static int nfs_getattr __P((struct vop_getattr_args *));
146static int nfs_setattr __P((struct vop_setattr_args *));
147static int nfs_read __P((struct vop_read_args *));
148static int nfs_mmap __P((struct vop_mmap_args *));
149static int nfs_fsync __P((struct vop_fsync_args *));
150static int nfs_remove __P((struct vop_remove_args *));
151static int nfs_link __P((struct vop_link_args *));
152static int nfs_rename __P((struct vop_rename_args *));
153static int nfs_mkdir __P((struct vop_mkdir_args *));
154static int nfs_rmdir __P((struct vop_rmdir_args *));
155static int nfs_symlink __P((struct vop_symlink_args *));
156static int nfs_readdir __P((struct vop_readdir_args *));
157static int nfs_bmap __P((struct vop_bmap_args *));
158static int nfs_strategy __P((struct vop_strategy_args *));
159static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **));
160static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
161static int nfsspec_access __P((struct vop_access_args *));
162static int nfs_readlink __P((struct vop_readlink_args *));
163static int nfs_print __P((struct vop_print_args *));
164static int nfs_pathconf __P((struct vop_pathconf_args *));
165static int nfs_advlock __P((struct vop_advlock_args *));
166static int nfs_blkatoff __P((struct vop_blkatoff_args *));
167static int nfs_bwrite __P((struct vop_bwrite_args *));
168static int nfs_valloc __P((struct vop_valloc_args *));
169static int nfs_vfree __P((struct vop_vfree_args *));
170static int nfs_truncate __P((struct vop_truncate_args *));
171static int nfs_update __P((struct vop_update_args *));
172static int nfs_pagein __P((struct vop_pagein_args *));
173static int nfs_pageout __P((struct vop_pageout_args *));
174static int nfs_blktooff __P((struct vop_blktooff_args *));
175static int nfs_offtoblk __P((struct vop_offtoblk_args *));
176static int nfs_cmap __P((struct vop_cmap_args *));
177
178/*
179 * Global vfs data structures for nfs
180 */
181vop_t **nfsv2_vnodeop_p;
182static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
183 { &vop_default_desc, (vop_t *)vn_default_error },
184 { &vop_lookup_desc, (vop_t *)nfs_lookup }, /* lookup */
185 { &vop_create_desc, (vop_t *)nfs_create }, /* create */
186 { &vop_mknod_desc, (vop_t *)nfs_mknod }, /* mknod */
187 { &vop_open_desc, (vop_t *)nfs_open }, /* open */
188 { &vop_close_desc, (vop_t *)nfs_close }, /* close */
189 { &vop_access_desc, (vop_t *)nfs_access }, /* access */
190 { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */
191 { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */
192 { &vop_read_desc, (vop_t *)nfs_read }, /* read */
193 { &vop_write_desc, (vop_t *)nfs_write }, /* write */
194 { &vop_lease_desc, (vop_t *)nfs_lease_check }, /* lease */
195 { &vop_ioctl_desc, (vop_t *)nfs_ioctl }, /* ioctl */
196 { &vop_select_desc, (vop_t *)nfs_select }, /* select */
197 { &vop_revoke_desc, (vop_t *)nfs_revoke }, /* revoke */
198 { &vop_mmap_desc, (vop_t *)nfs_mmap }, /* mmap */
199 { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */
200 { &vop_seek_desc, (vop_t *)nfs_seek }, /* seek */
201 { &vop_remove_desc, (vop_t *)nfs_remove }, /* remove */
202 { &vop_link_desc, (vop_t *)nfs_link }, /* link */
203 { &vop_rename_desc, (vop_t *)nfs_rename }, /* rename */
204 { &vop_mkdir_desc, (vop_t *)nfs_mkdir }, /* mkdir */
205 { &vop_rmdir_desc, (vop_t *)nfs_rmdir }, /* rmdir */
206 { &vop_symlink_desc, (vop_t *)nfs_symlink }, /* symlink */
207 { &vop_readdir_desc, (vop_t *)nfs_readdir }, /* readdir */
208 { &vop_readlink_desc, (vop_t *)nfs_readlink }, /* readlink */
209 { &vop_abortop_desc, (vop_t *)nfs_abortop }, /* abortop */
210 { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */
211 { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */
212 { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */
213 { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */
214 { &vop_bmap_desc, (vop_t *)nfs_bmap }, /* bmap */
215 { &vop_strategy_desc, (vop_t *)nfs_strategy }, /* strategy */
216 { &vop_print_desc, (vop_t *)nfs_print }, /* print */
217 { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */
218 { &vop_pathconf_desc, (vop_t *)nfs_pathconf }, /* pathconf */
219 { &vop_advlock_desc, (vop_t *)nfs_advlock }, /* advlock */
220 { &vop_blkatoff_desc, (vop_t *)nfs_blkatoff }, /* blkatoff */
221 { &vop_valloc_desc, (vop_t *)nfs_valloc }, /* valloc */
222 { &vop_reallocblks_desc, (vop_t *)nfs_reallocblks }, /* reallocblks */
223 { &vop_vfree_desc, (vop_t *)nfs_vfree }, /* vfree */
224 { &vop_truncate_desc, (vop_t *)nfs_truncate }, /* truncate */
225 { &vop_update_desc, (vop_t *)nfs_update }, /* update */
226 { &vop_bwrite_desc, (vop_t *)nfs_bwrite }, /* bwrite */
227 { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */
228 { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */
229 { &vop_copyfile_desc, (vop_t *)err_copyfile }, /* Copyfile */
230 { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */
231 { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */
232 { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */
233 { NULL, NULL }
234};
235struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
236 { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
237#ifdef __FreeBSD__
238VNODEOP_SET(nfsv2_vnodeop_opv_desc);
239#endif
240
241/*
242 * Special device vnode ops
243 */
244vop_t **spec_nfsv2nodeop_p;
245static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
246 { &vop_default_desc, (vop_t *)vn_default_error },
247 { &vop_lookup_desc, (vop_t *)spec_lookup }, /* lookup */
248 { &vop_create_desc, (vop_t *)spec_create }, /* create */
249 { &vop_mknod_desc, (vop_t *)spec_mknod }, /* mknod */
250 { &vop_open_desc, (vop_t *)spec_open }, /* open */
251 { &vop_close_desc, (vop_t *)nfsspec_close }, /* close */
252 { &vop_access_desc, (vop_t *)nfsspec_access }, /* access */
253 { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */
254 { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */
255 { &vop_read_desc, (vop_t *)nfsspec_read }, /* read */
256 { &vop_write_desc, (vop_t *)nfsspec_write }, /* write */
257 { &vop_lease_desc, (vop_t *)spec_lease_check }, /* lease */
258 { &vop_ioctl_desc, (vop_t *)spec_ioctl }, /* ioctl */
259 { &vop_select_desc, (vop_t *)spec_select }, /* select */
260 { &vop_revoke_desc, (vop_t *)spec_revoke }, /* revoke */
261 { &vop_mmap_desc, (vop_t *)spec_mmap }, /* mmap */
262 { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */
263 { &vop_seek_desc, (vop_t *)spec_seek }, /* seek */
264 { &vop_remove_desc, (vop_t *)spec_remove }, /* remove */
265 { &vop_link_desc, (vop_t *)spec_link }, /* link */
266 { &vop_rename_desc, (vop_t *)spec_rename }, /* rename */
267 { &vop_mkdir_desc, (vop_t *)spec_mkdir }, /* mkdir */
268 { &vop_rmdir_desc, (vop_t *)spec_rmdir }, /* rmdir */
269 { &vop_symlink_desc, (vop_t *)spec_symlink }, /* symlink */
270 { &vop_readdir_desc, (vop_t *)spec_readdir }, /* readdir */
271 { &vop_readlink_desc, (vop_t *)spec_readlink }, /* readlink */
272 { &vop_abortop_desc, (vop_t *)spec_abortop }, /* abortop */
273 { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */
274 { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */
275 { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */
276 { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */
277 { &vop_bmap_desc, (vop_t *)spec_bmap }, /* bmap */
278 { &vop_strategy_desc, (vop_t *)spec_strategy }, /* strategy */
279 { &vop_print_desc, (vop_t *)nfs_print }, /* print */
280 { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */
281 { &vop_pathconf_desc, (vop_t *)spec_pathconf }, /* pathconf */
282 { &vop_advlock_desc, (vop_t *)spec_advlock }, /* advlock */
283 { &vop_blkatoff_desc, (vop_t *)spec_blkatoff }, /* blkatoff */
284 { &vop_valloc_desc, (vop_t *)spec_valloc }, /* valloc */
285 { &vop_reallocblks_desc, (vop_t *)spec_reallocblks }, /* reallocblks */
286 { &vop_vfree_desc, (vop_t *)spec_vfree }, /* vfree */
287 { &vop_truncate_desc, (vop_t *)spec_truncate }, /* truncate */
288 { &vop_update_desc, (vop_t *)nfs_update }, /* update */
289 { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */
290 { &vop_devblocksize_desc, (vop_t *)spec_devblocksize }, /* devblocksize */
291 { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */
292 { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */
293 { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */
294 { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */
295 { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */
296 { NULL, NULL }
297};
298struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
299 { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
300#ifdef __FreeBSD__
301VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
302#endif
303
304vop_t **fifo_nfsv2nodeop_p;
305static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
306 { &vop_default_desc, (vop_t *)vn_default_error },
307 { &vop_lookup_desc, (vop_t *)fifo_lookup }, /* lookup */
308 { &vop_create_desc, (vop_t *)fifo_create }, /* create */
309 { &vop_mknod_desc, (vop_t *)fifo_mknod }, /* mknod */
310 { &vop_open_desc, (vop_t *)fifo_open }, /* open */
311 { &vop_close_desc, (vop_t *)nfsfifo_close }, /* close */
312 { &vop_access_desc, (vop_t *)nfsspec_access }, /* access */
313 { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */
314 { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */
315 { &vop_read_desc, (vop_t *)nfsfifo_read }, /* read */
316 { &vop_write_desc, (vop_t *)nfsfifo_write }, /* write */
317 { &vop_lease_desc, (vop_t *)fifo_lease_check }, /* lease */
318 { &vop_ioctl_desc, (vop_t *)fifo_ioctl }, /* ioctl */
319 { &vop_select_desc, (vop_t *)fifo_select }, /* select */
320 { &vop_revoke_desc, (vop_t *)fifo_revoke }, /* revoke */
321 { &vop_mmap_desc, (vop_t *)fifo_mmap }, /* mmap */
322 { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */
323 { &vop_seek_desc, (vop_t *)fifo_seek }, /* seek */
324 { &vop_remove_desc, (vop_t *)fifo_remove }, /* remove */
325 { &vop_link_desc, (vop_t *)fifo_link }, /* link */
326 { &vop_rename_desc, (vop_t *)fifo_rename }, /* rename */
327 { &vop_mkdir_desc, (vop_t *)fifo_mkdir }, /* mkdir */
328 { &vop_rmdir_desc, (vop_t *)fifo_rmdir }, /* rmdir */
329 { &vop_symlink_desc, (vop_t *)fifo_symlink }, /* symlink */
330 { &vop_readdir_desc, (vop_t *)fifo_readdir }, /* readdir */
331 { &vop_readlink_desc, (vop_t *)fifo_readlink }, /* readlink */
332 { &vop_abortop_desc, (vop_t *)fifo_abortop }, /* abortop */
333 { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */
334 { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */
335 { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */
336 { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */
337 { &vop_bmap_desc, (vop_t *)fifo_bmap }, /* bmap */
0b4e3aa0 338 { &vop_strategy_desc, (vop_t *)fifo_strategy }, /* strategy */
1c79356b
A
339 { &vop_print_desc, (vop_t *)nfs_print }, /* print */
340 { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */
341 { &vop_pathconf_desc, (vop_t *)fifo_pathconf }, /* pathconf */
342 { &vop_advlock_desc, (vop_t *)fifo_advlock }, /* advlock */
343 { &vop_blkatoff_desc, (vop_t *)fifo_blkatoff }, /* blkatoff */
344 { &vop_valloc_desc, (vop_t *)fifo_valloc }, /* valloc */
345 { &vop_reallocblks_desc, (vop_t *)fifo_reallocblks }, /* reallocblks */
346 { &vop_vfree_desc, (vop_t *)fifo_vfree }, /* vfree */
347 { &vop_truncate_desc, (vop_t *)fifo_truncate }, /* truncate */
348 { &vop_update_desc, (vop_t *)nfs_update }, /* update */
349 { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */
350 { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */
351 { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */
352 { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */
353 { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */
354 { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */
355 { NULL, NULL }
356};
357struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
358 { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
359#ifdef __FreeBSD__
360VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
361#endif
362
363static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
364 struct ucred *cred, struct proc *procp));
365static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
366 struct componentname *cnp,
367 struct vattr *vap));
368static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen,
369 struct ucred *cred, struct proc *proc));
370static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr,
371 int fnamelen, struct vnode *tdvp,
372 char *tnameptr, int tnamelen,
373 struct ucred *cred, struct proc *proc));
374static int nfs_renameit __P((struct vnode *sdvp,
375 struct componentname *scnp,
376 struct sillyrename *sp));
377
378/*
379 * Global variables
380 */
381extern u_long nfs_true, nfs_false;
382extern struct nfsstats nfsstats;
383extern nfstype nfsv3_type[9];
384struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
385struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
386int nfs_numasync = 0;
387#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
388
389static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
390/* SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
391 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
392*/
393#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
394 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
395 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
396
0b4e3aa0
A
397
398/*
399 * the following are needed only by nfs_pageout to know how to handle errors
400 * see nfs_pageout comments on explanation of actions.
401 * the errors here are copied from errno.h and errors returned by servers
402 * are expected to match the same numbers here. If not, our actions maybe
403 * erroneous.
404 */
405enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER};
406
407static int errorcount[ELAST+1]; /* better be zeros when initialized */
408
409static const short errortooutcome[ELAST+1] = {
410 NOACTION,
411 DUMP, /* EPERM 1 Operation not permitted */
412 DUMP, /* ENOENT 2 No such file or directory */
413 DUMPANDLOG, /* ESRCH 3 No such process */
414 RETRY, /* EINTR 4 Interrupted system call */
415 DUMP, /* EIO 5 Input/output error */
416 DUMP, /* ENXIO 6 Device not configured */
417 DUMPANDLOG, /* E2BIG 7 Argument list too long */
418 DUMPANDLOG, /* ENOEXEC 8 Exec format error */
419 DUMPANDLOG, /* EBADF 9 Bad file descriptor */
420 DUMPANDLOG, /* ECHILD 10 No child processes */
421 DUMPANDLOG, /* EDEADLK 11 Resource deadlock avoided - was EAGAIN */
422 RETRY, /* ENOMEM 12 Cannot allocate memory */
423 DUMP, /* EACCES 13 Permission denied */
424 DUMPANDLOG, /* EFAULT 14 Bad address */
425 DUMPANDLOG, /* ENOTBLK 15 POSIX - Block device required */
426 RETRY, /* EBUSY 16 Device busy */
427 DUMP, /* EEXIST 17 File exists */
428 DUMP, /* EXDEV 18 Cross-device link */
429 DUMP, /* ENODEV 19 Operation not supported by device */
430 DUMP, /* ENOTDIR 20 Not a directory */
431 DUMP, /* EISDIR 21 Is a directory */
432 DUMP, /* EINVAL 22 Invalid argument */
433 DUMPANDLOG, /* ENFILE 23 Too many open files in system */
434 DUMPANDLOG, /* EMFILE 24 Too many open files */
435 DUMPANDLOG, /* ENOTTY 25 Inappropriate ioctl for device */
436 DUMPANDLOG, /* ETXTBSY 26 Text file busy - POSIX */
437 DUMP, /* EFBIG 27 File too large */
438 DUMP, /* ENOSPC 28 No space left on device */
439 DUMPANDLOG, /* ESPIPE 29 Illegal seek */
440 DUMP, /* EROFS 30 Read-only file system */
441 DUMP, /* EMLINK 31 Too many links */
442 RETRY, /* EPIPE 32 Broken pipe */
443 /* math software */
444 DUMPANDLOG, /* EDOM 33 Numerical argument out of domain */
445 DUMPANDLOG, /* ERANGE 34 Result too large */
446 RETRY, /* EAGAIN/EWOULDBLOCK 35 Resource temporarily unavailable */
447 DUMPANDLOG, /* EINPROGRESS 36 Operation now in progress */
448 DUMPANDLOG, /* EALREADY 37 Operation already in progress */
449 /* ipc/network software -- argument errors */
450 DUMPANDLOG, /* ENOTSOC 38 Socket operation on non-socket */
451 DUMPANDLOG, /* EDESTADDRREQ 39 Destination address required */
452 DUMPANDLOG, /* EMSGSIZE 40 Message too long */
453 DUMPANDLOG, /* EPROTOTYPE 41 Protocol wrong type for socket */
454 DUMPANDLOG, /* ENOPROTOOPT 42 Protocol not available */
455 DUMPANDLOG, /* EPROTONOSUPPORT 43 Protocol not supported */
456 DUMPANDLOG, /* ESOCKTNOSUPPORT 44 Socket type not supported */
457 DUMPANDLOG, /* ENOTSUP 45 Operation not supported */
458 DUMPANDLOG, /* EPFNOSUPPORT 46 Protocol family not supported */
459 DUMPANDLOG, /* EAFNOSUPPORT 47 Address family not supported by protocol family */
460 DUMPANDLOG, /* EADDRINUSE 48 Address already in use */
461 DUMPANDLOG, /* EADDRNOTAVAIL 49 Can't assign requested address */
462 /* ipc/network software -- operational errors */
463 RETRY, /* ENETDOWN 50 Network is down */
464 RETRY, /* ENETUNREACH 51 Network is unreachable */
465 RETRY, /* ENETRESET 52 Network dropped connection on reset */
466 RETRY, /* ECONNABORTED 53 Software caused connection abort */
467 RETRY, /* ECONNRESET 54 Connection reset by peer */
468 RETRY, /* ENOBUFS 55 No buffer space available */
469 RETRY, /* EISCONN 56 Socket is already connected */
470 RETRY, /* ENOTCONN 57 Socket is not connected */
471 RETRY, /* ESHUTDOWN 58 Can't send after socket shutdown */
472 RETRY, /* ETOOMANYREFS 59 Too many references: can't splice */
473 RETRY, /* ETIMEDOUT 60 Operation timed out */
474 RETRY, /* ECONNREFUSED 61 Connection refused */
475
476 DUMPANDLOG, /* ELOOP 62 Too many levels of symbolic links */
477 DUMP, /* ENAMETOOLONG 63 File name too long */
478 RETRY, /* EHOSTDOWN 64 Host is down */
479 RETRY, /* EHOSTUNREACH 65 No route to host */
480 DUMP, /* ENOTEMPTY 66 Directory not empty */
481 /* quotas & mush */
482 DUMPANDLOG, /* PROCLIM 67 Too many processes */
483 DUMPANDLOG, /* EUSERS 68 Too many users */
484 DUMPANDLOG, /* EDQUOT 69 Disc quota exceeded */
485 /* Network File System */
486 DUMP, /* ESTALE 70 Stale NFS file handle */
487 DUMP, /* EREMOTE 71 Too many levels of remote in path */
488 DUMPANDLOG, /* EBADRPC 72 RPC struct is bad */
489 DUMPANDLOG, /* ERPCMISMATCH 73 RPC version wrong */
490 DUMPANDLOG, /* EPROGUNAVAIL 74 RPC prog. not avail */
491 DUMPANDLOG, /* EPROGMISMATCH 75 Program version wrong */
492 DUMPANDLOG, /* EPROCUNAVAIL 76 Bad procedure for program */
493
494 DUMPANDLOG, /* ENOLCK 77 No locks available */
495 DUMPANDLOG, /* ENOSYS 78 Function not implemented */
496 DUMPANDLOG, /* EFTYPE 79 Inappropriate file type or format */
497 DUMPANDLOG, /* EAUTH 80 Authentication error */
498 DUMPANDLOG, /* ENEEDAUTH 81 Need authenticator */
499 /* Intelligent device errors */
500 DUMPANDLOG, /* EPWROFF 82 Device power is off */
501 DUMPANDLOG, /* EDEVERR 83 Device error, e.g. paper out */
502 DUMPANDLOG, /* EOVERFLOW 84 Value too large to be stored in data type */
503 /* Program loading errors */
504 DUMPANDLOG, /* EBADEXEC 85 Bad executable */
505 DUMPANDLOG, /* EBADARCH 86 Bad CPU type in executable */
506 DUMPANDLOG, /* ESHLIBVERS 87 Shared library version mismatch */
507 DUMPANDLOG, /* EBADMACHO 88 Malformed Macho file */
508};
509
510
511static short
512nfs_pageouterrorhandler(error)
513 int error;
514{
515 if (error > ELAST)
516 return(DUMP);
517 else
518 return(errortooutcome[error]);
519}
1c79356b
A
520
521static int
522nfs3_access_otw(struct vnode *vp,
0b4e3aa0
A
523 int wmode,
524 struct proc *p,
525 struct ucred *cred)
1c79356b 526{
0b4e3aa0
A
527 const int v3 = 1;
528 u_int32_t *tl;
529 int error = 0, attrflag;
530
531 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
532 caddr_t bpos, dpos, cp2;
533 register int32_t t1, t2;
534 register caddr_t cp;
535 u_int32_t rmode;
536 struct nfsnode *np = VTONFS(vp);
fa4905b1 537 u_int64_t xid;
0b4e3aa0
A
538
539 nfsstats.rpccnt[NFSPROC_ACCESS]++;
540 nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
541 nfsm_fhtom(vp, v3);
542 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
543 *tl = txdr_unsigned(wmode);
fa4905b1
A
544 nfsm_request(vp, NFSPROC_ACCESS, p, cred, &xid);
545 nfsm_postop_attr(vp, attrflag, &xid);
0b4e3aa0
A
546 if (!error) {
547 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
548 rmode = fxdr_unsigned(u_int32_t, *tl);
549 np->n_mode = rmode;
550 np->n_modeuid = cred->cr_uid;
551 np->n_modestamp = time_second;
fa4905b1 552 }
0b4e3aa0
A
553 nfsm_reqdone;
554 return error;
1c79356b
A
555}
556
557/*
558 * nfs access vnode op.
559 * For nfs version 2, just return ok. File accesses may fail later.
560 * For nfs version 3, use the access rpc to check accessibility. If file modes
561 * are changed on the server, accesses might still fail later.
562 */
563static int
564nfs_access(ap)
565 struct vop_access_args /* {
566 struct vnode *a_vp;
567 int a_mode;
568 struct ucred *a_cred;
569 struct proc *a_p;
570 } */ *ap;
571{
572 register struct vnode *vp = ap->a_vp;
0b4e3aa0
A
573 int error = 0;
574 u_long mode, wmode;
1c79356b 575 int v3 = NFS_ISV3(vp);
0b4e3aa0 576 struct nfsnode *np = VTONFS(vp);
1c79356b
A
577
578 /*
579 * For nfs v3, do an access rpc, otherwise you are stuck emulating
580 * ufs_access() locally using the vattr. This may not be correct,
581 * since the server may apply other access criteria such as
582 * client uid-->server uid mapping that we do not know about, but
583 * this is better than just returning anything that is lying about
584 * in the cache.
585 */
586 if (v3) {
587 if (ap->a_mode & VREAD)
588 mode = NFSV3ACCESS_READ;
589 else
590 mode = 0;
591 if (vp->v_type == VDIR) {
592 if (ap->a_mode & VWRITE)
fa4905b1
A
593 mode |= NFSV3ACCESS_MODIFY |
594 NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE;
1c79356b
A
595 if (ap->a_mode & VEXEC)
596 mode |= NFSV3ACCESS_LOOKUP;
597 } else {
598 if (ap->a_mode & VWRITE)
fa4905b1 599 mode |= NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND;
1c79356b
A
600 if (ap->a_mode & VEXEC)
601 mode |= NFSV3ACCESS_EXECUTE;
602 }
0b4e3aa0
A
603 /* XXX safety belt, only make blanket request if caching */
604 if (nfsaccess_cache_timeout > 0) {
605 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
fa4905b1
A
606 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
607 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
0b4e3aa0
A
608 } else
609 wmode = mode;
1c79356b 610
0b4e3aa0
A
611 /*
612 * Does our cached result allow us to give a definite yes to
613 * this request?
614 */
fa4905b1
A
615 if (time_second < np->n_modestamp + nfsaccess_cache_timeout &&
616 ap->a_cred->cr_uid == np->n_modeuid &&
617 (np->n_mode & mode) == mode) {
0b4e3aa0
A
618 /* nfsstats.accesscache_hits++; */
619 } else {
620 /*
621 * Either a no, or a don't know. Go to the wire.
622 */
623 /* nfsstats.accesscache_misses++; */
624 error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
625 if (!error) {
626 if ((np->n_mode & mode) != mode)
627 error = EACCES;
fa4905b1
A
628 }
629 }
1c79356b 630 } else
0b4e3aa0 631 return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here */
1c79356b
A
632 /*
633 * Disallow write attempts on filesystems mounted read-only;
634 * unless the file is a socket, fifo, or a block or character
635 * device resident on the filesystem.
0b4e3aa0
A
636 * CSM - moved EROFS check down per NetBSD rev 1.71. So you
637 * get the correct error value with layered filesystems.
638 * EKN - moved the return(error) below this so it does get called.
1c79356b
A
639 */
640 if (!error && (ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
641 switch (vp->v_type) {
0b4e3aa0
A
642 case VREG: case VDIR: case VLNK:
643 error = EROFS;
644 default:
645 break;
1c79356b 646 }
fa4905b1 647 }
0b4e3aa0 648 return (error);
1c79356b
A
649}
650
651/*
652 * nfs open vnode op
653 * Check to see if the type is ok
654 * and that deletion is not in progress.
655 * For paged in text files, you will need to flush the page cache
656 * if consistency is lost.
657 */
658/* ARGSUSED */
fa4905b1 659
1c79356b
A
660static int
661nfs_open(ap)
662 struct vop_open_args /* {
663 struct vnode *a_vp;
664 int a_mode;
665 struct ucred *a_cred;
666 struct proc *a_p;
667 } */ *ap;
668{
669 register struct vnode *vp = ap->a_vp;
670 struct nfsnode *np = VTONFS(vp);
671 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
672 struct vattr vattr;
673 int error;
674
fa4905b1 675 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
1c79356b 676 return (EACCES);
fa4905b1 677 }
1c79356b
A
678 /*
679 * Get a valid lease. If cached data is stale, flush it.
680 */
681 if (nmp->nm_flag & NFSMNT_NQNFS) {
682 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
683 do {
684 error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
685 ap->a_p);
686 } while (error == NQNFS_EXPIRED);
687 if (error)
688 return (error);
689 if (np->n_lrev != np->n_brev ||
690 (np->n_flag & NQNFSNONCACHE)) {
691 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
692 ap->a_p, 1)) == EINTR)
693 return (error);
694 np->n_brev = np->n_lrev;
695 }
696 }
697 } else {
698 if (np->n_flag & NMODIFIED) {
699 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
700 ap->a_p, 1)) == EINTR)
701 return (error);
702 np->n_attrstamp = 0;
703 if (vp->v_type == VDIR)
704 np->n_direofoffset = 0;
705 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
706 if (error)
707 return (error);
708 np->n_mtime = vattr.va_mtime.tv_sec;
709 } else {
710 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
711 if (error)
712 return (error);
713 if (np->n_mtime != vattr.va_mtime.tv_sec) {
714 if (vp->v_type == VDIR)
715 np->n_direofoffset = 0;
716 if ((error = nfs_vinvalbuf(vp, V_SAVE,
717 ap->a_cred, ap->a_p, 1)) == EINTR)
718 return (error);
719 np->n_mtime = vattr.va_mtime.tv_sec;
720 }
721 }
722 }
723 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
724 np->n_attrstamp = 0; /* For Open/Close consistency */
725 return (0);
726}
727
728/*
729 * nfs close vnode op
730 * What an NFS client should do upon close after writing is a debatable issue.
731 * Most NFS clients push delayed writes to the server upon close, basically for
732 * two reasons:
733 * 1 - So that any write errors may be reported back to the client process
734 * doing the close system call. By far the two most likely errors are
735 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
736 * 2 - To put a worst case upper bound on cache inconsistency between
737 * multiple clients for the file.
738 * There is also a consistency problem for Version 2 of the protocol w.r.t.
739 * not being able to tell if other clients are writing a file concurrently,
740 * since there is no way of knowing if the changed modify time in the reply
741 * is only due to the write for this client.
742 * (NFS Version 3 provides weak cache consistency data in the reply that
743 * should be sufficient to detect and handle this case.)
744 *
745 * The current code does the following:
746 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
747 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
748 * or commit them (this satisfies 1 and 2 except for the
749 * case where the server crashes after this close but
750 * before the commit RPC, which is felt to be "good
751 * enough". Changing the last argument to nfs_flush() to
752 * a 1 would force a commit operation, if it is felt a
753 * commit is necessary now.
754 * for NQNFS - do nothing now, since 2 is dealt with via leases and
755 * 1 should be dealt with via an fsync() system call for
756 * cases where write errors are important.
757 */
758/* ARGSUSED */
759static int
760nfs_close(ap)
761 struct vop_close_args /* {
762 struct vnodeop_desc *a_desc;
763 struct vnode *a_vp;
764 int a_fflag;
765 struct ucred *a_cred;
766 struct proc *a_p;
767 } */ *ap;
768{
769 register struct vnode *vp = ap->a_vp;
770 register struct nfsnode *np = VTONFS(vp);
771 int error = 0;
772
773 if (vp->v_type == VREG) {
774#if DIAGNOSTIC
775 register struct sillyrename *sp = np->n_sillyrename;
776 if (sp)
777 kprintf("nfs_close: %s, dvp=%x, vp=%x, ap=%x, np=%x, sp=%x\n",
778 &sp->s_name[0], (unsigned)(sp->s_dvp), (unsigned)vp,
779 (unsigned)ap, (unsigned)np, (unsigned)sp);
780#endif
781 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
782 (np->n_flag & NMODIFIED)) {
783 if (NFS_ISV3(vp)) {
fa4905b1
A
784 error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 1);
785 /*
786 * We cannot clear the NMODIFIED bit in np->n_flag due to
9bccf70c 787 * potential races with other processes
fa4905b1
A
788 * NMODIFIED is a hint
789 */
790 /* np->n_flag &= ~NMODIFIED; */
1c79356b
A
791 } else
792 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
793 np->n_attrstamp = 0;
794 }
795 if (np->n_flag & NWRITEERR) {
796 np->n_flag &= ~NWRITEERR;
797 error = np->n_error;
798 }
799 }
800 return (error);
801}
802
803/*
804 * nfs getattr call from vfs.
805 */
806static int
807nfs_getattr(ap)
808 struct vop_getattr_args /* {
809 struct vnode *a_vp;
810 struct vattr *a_vap;
811 struct ucred *a_cred;
812 struct proc *a_p;
813 } */ *ap;
814{
815 register struct vnode *vp = ap->a_vp;
816 register struct nfsnode *np = VTONFS(vp);
817 register caddr_t cp;
818 register u_long *tl;
819 register int t1, t2;
820 caddr_t bpos, dpos;
821 int error = 0;
822 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
823 int v3 = NFS_ISV3(vp);
fa4905b1
A
824 u_int64_t xid;
825 int avoidfloods;
1c79356b 826
fa4905b1 827 FSDBG_TOP(513, np->n_size, np, np->n_vattr.va_size, np->n_flag);
1c79356b
A
828 /*
829 * Update local times for special files.
830 */
831 if (np->n_flag & (NACC | NUPD))
832 np->n_flag |= NCHG;
1c79356b
A
833 /*
834 * First look in the cache.
835 */
836 if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) {
fa4905b1 837 FSDBG_BOT(513, np->n_size, 0, np->n_vattr.va_size, np->n_flag);
1c79356b
A
838 return (0);
839 }
fa4905b1
A
840 if (error != ENOENT) {
841 FSDBG_BOT(513, np->n_size, error, np->n_vattr.va_size,
842 np->n_flag);
1c79356b 843 return (error);
fa4905b1 844 }
1c79356b 845 error = 0;
fa4905b1 846
1c79356b
A
847 if (v3 && nfsaccess_cache_timeout > 0) {
848 /* nfsstats.accesscache_misses++; */
fa4905b1
A
849 if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p,
850 ap->a_cred))
851 return (error);
1c79356b
A
852 if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0)
853 return (0);
854 if (error != ENOENT)
855 return (error);
856 error = 0;
857 }
fa4905b1
A
858 avoidfloods = 0;
859tryagain:
1c79356b
A
860 nfsstats.rpccnt[NFSPROC_GETATTR]++;
861 nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
862 nfsm_fhtom(vp, v3);
fa4905b1 863 nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred, &xid);
1c79356b 864 if (!error) {
fa4905b1
A
865 nfsm_loadattr(vp, ap->a_vap, &xid);
866 if (!xid) { /* out-of-order rpc - attributes were dropped */
867 m_freem(mrep);
868 FSDBG(513, -1, np, np->n_xid << 32, np->n_xid);
869 if (avoidfloods++ < 100)
870 goto tryagain;
871 /*
872 * avoidfloods>1 is bizarre. at 100 pull the plug
873 */
874 panic("nfs_getattr: getattr flood\n");
875 }
1c79356b 876 if (np->n_mtime != ap->a_vap->va_mtime.tv_sec) {
fa4905b1 877 FSDBG(513, -1, np, -1, vp);
1c79356b
A
878 if (vp->v_type == VDIR)
879 nfs_invaldir(vp);
880 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
881 ap->a_p, 1);
fa4905b1
A
882 FSDBG(513, -1, np, -2, error);
883 if (!error)
1c79356b 884 np->n_mtime = ap->a_vap->va_mtime.tv_sec;
1c79356b
A
885 }
886 }
887 nfsm_reqdone;
888
fa4905b1 889 FSDBG_BOT(513, np->n_size, -1, np->n_vattr.va_size, error);
1c79356b
A
890 return (error);
891}
892
893/*
894 * nfs setattr call.
895 */
896static int
897nfs_setattr(ap)
898 struct vop_setattr_args /* {
899 struct vnodeop_desc *a_desc;
900 struct vnode *a_vp;
901 struct vattr *a_vap;
902 struct ucred *a_cred;
903 struct proc *a_p;
904 } */ *ap;
905{
906 register struct vnode *vp = ap->a_vp;
907 register struct nfsnode *np = VTONFS(vp);
908 register struct vattr *vap = ap->a_vap;
909 int error = 0;
910 u_quad_t tsize;
911
912#ifndef nolint
913 tsize = (u_quad_t)0;
914#endif
fa4905b1
A
915
916#ifdef XXX /* enable this code soon! (but test it first) */
917 /*
918 * Setting of flags is not supported.
919 */
920 if (vap->va_flags != VNOVAL)
921 return (EOPNOTSUPP);
922#endif
923
1c79356b
A
924 /*
925 * Disallow write attempts if the filesystem is mounted read-only.
926 */
927 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
928 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
929 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
930 (vp->v_mount->mnt_flag & MNT_RDONLY))
931 return (EROFS);
932 if (vap->va_size != VNOVAL) {
933 switch (vp->v_type) {
934 case VDIR:
935 return (EISDIR);
936 case VCHR:
937 case VBLK:
938 case VSOCK:
939 case VFIFO:
940 if (vap->va_mtime.tv_sec == VNOVAL &&
941 vap->va_atime.tv_sec == VNOVAL &&
942 vap->va_mode == (u_short)VNOVAL &&
943 vap->va_uid == (uid_t)VNOVAL &&
944 vap->va_gid == (gid_t)VNOVAL)
945 return (0);
946 vap->va_size = VNOVAL;
947 break;
948 default:
949 /*
950 * Disallow write attempts if the filesystem is
951 * mounted read-only.
952 */
953 if (vp->v_mount->mnt_flag & MNT_RDONLY)
954 return (EROFS);
fa4905b1
A
955 FSDBG_TOP(512, np->n_size, vap->va_size,
956 np->n_vattr.va_size, np->n_flag);
957 if (np->n_flag & NMODIFIED) {
958 if (vap->va_size == 0)
959 error = nfs_vinvalbuf(vp, 0,
960 ap->a_cred, ap->a_p, 1);
961 else
962 error = nfs_vinvalbuf(vp, V_SAVE,
963 ap->a_cred, ap->a_p, 1);
964 if (error) {
965 printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
966 FSDBG_BOT(512, np->n_size, vap->va_size,
967 np->n_vattr.va_size, -1);
968 return (error);
969 }
970 } else if (np->n_size > vap->va_size) { /* shrinking? */
971 daddr_t obn, bn;
972 int biosize;
973 struct buf *bp;
974
975 biosize = min(vp->v_mount->mnt_stat.f_iosize,
976 PAGE_SIZE);
977 obn = (np->n_size - 1) / biosize;
978 bn = vap->va_size / biosize;
979 for ( ; obn >= bn; obn--)
980 if (incore(vp, obn)) {
981 bp = getblk(vp, obn, biosize, 0,
982 0, BLK_READ);
983 FSDBG(512, bp, bp->b_flags,
984 0, obn);
985 SET(bp->b_flags, B_INVAL);
986 brelse(bp);
987 }
1c79356b 988 }
fa4905b1 989 tsize = np->n_size;
1c79356b 990 np->n_size = np->n_vattr.va_size = vap->va_size;
fa4905b1 991 ubc_setsize(vp, (off_t)vap->va_size); /* XXX */
1c79356b
A
992 };
993 } else if ((vap->va_mtime.tv_sec != VNOVAL ||
fa4905b1
A
994 vap->va_atime.tv_sec != VNOVAL) &&
995 (np->n_flag & NMODIFIED) && vp->v_type == VREG &&
1c79356b
A
996 (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
997 ap->a_p, 1)) == EINTR)
fa4905b1 998 return (error);
1c79356b 999 error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
fa4905b1 1000 FSDBG_BOT(512, np->n_size, vap->va_size, np->n_vattr.va_size, error);
1c79356b
A
1001 if (error && vap->va_size != VNOVAL) {
1002 /* make every effort to resync file size w/ server... */
1003 int err = 0; /* preserve "error" for return */
1004
1005 printf("nfs_setattr: nfs_setattrrpc %d\n", error);
1c79356b 1006 np->n_size = np->n_vattr.va_size = tsize;
fa4905b1 1007 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
1c79356b
A
1008 vap->va_size = tsize;
1009 err = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
1c79356b
A
1010 if (err)
1011 printf("nfs_setattr1: nfs_setattrrpc %d\n", err);
1c79356b
A
1012 }
1013 return (error);
1014}
1015
1016/*
1017 * Do an nfs setattr rpc.
1018 */
1019static int
1020nfs_setattrrpc(vp, vap, cred, procp)
1021 register struct vnode *vp;
1022 register struct vattr *vap;
1023 struct ucred *cred;
1024 struct proc *procp;
1025{
1026 register struct nfsv2_sattr *sp;
1027 register caddr_t cp;
1028 register long t1, t2;
1029 caddr_t bpos, dpos, cp2;
1030 u_long *tl;
1031 int error = 0, wccflag = NFSV3_WCCRATTR;
1032 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1033 int v3 = NFS_ISV3(vp);
fa4905b1 1034 u_int64_t xid;
1c79356b
A
1035
1036 nfsstats.rpccnt[NFSPROC_SETATTR]++;
1037 nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
1038 nfsm_fhtom(vp, v3);
1039 if (v3) {
1040 if (vap->va_mode != (u_short)VNOVAL) {
1041 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1042 *tl++ = nfs_true;
1043 *tl = txdr_unsigned(vap->va_mode);
1044 } else {
1045 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1046 *tl = nfs_false;
1047 }
1048 if (vap->va_uid != (uid_t)VNOVAL) {
1049 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1050 *tl++ = nfs_true;
1051 *tl = txdr_unsigned(vap->va_uid);
1052 } else {
1053 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1054 *tl = nfs_false;
1055 }
1056 if (vap->va_gid != (gid_t)VNOVAL) {
1057 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1058 *tl++ = nfs_true;
1059 *tl = txdr_unsigned(vap->va_gid);
1060 } else {
1061 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1062 *tl = nfs_false;
1063 }
1064 if (vap->va_size != VNOVAL) {
1065 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1066 *tl++ = nfs_true;
1067 txdr_hyper(&vap->va_size, tl);
1068 } else {
1069 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1070 *tl = nfs_false;
1071 }
1072 if (vap->va_atime.tv_sec != VNOVAL) {
1073 if (vap->va_atime.tv_sec != time.tv_sec) {
1074 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1075 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
1076 txdr_nfsv3time(&vap->va_atime, tl);
1077 } else {
1078 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1079 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
1080 }
1081 } else {
1082 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1083 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
1084 }
1085 if (vap->va_mtime.tv_sec != VNOVAL) {
1086 if (vap->va_mtime.tv_sec != time.tv_sec) {
1087 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1088 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
1089 txdr_nfsv3time(&vap->va_mtime, tl);
1090 } else {
1091 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1092 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
1093 }
1094 } else {
1095 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1096 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
1097 }
1098 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1099 *tl = nfs_false;
1100 } else {
1101 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1102 if (vap->va_mode == (u_short)VNOVAL)
1103 sp->sa_mode = VNOVAL;
1104 else
1105 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
1106 if (vap->va_uid == (uid_t)VNOVAL)
1107 sp->sa_uid = VNOVAL;
1108 else
1109 sp->sa_uid = txdr_unsigned(vap->va_uid);
1110 if (vap->va_gid == (gid_t)VNOVAL)
1111 sp->sa_gid = VNOVAL;
1112 else
1113 sp->sa_gid = txdr_unsigned(vap->va_gid);
1114 sp->sa_size = txdr_unsigned(vap->va_size);
1115 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1116 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1117 }
fa4905b1 1118 nfsm_request(vp, NFSPROC_SETATTR, procp, cred, &xid);
1c79356b 1119 if (v3) {
fa4905b1
A
1120 nfsm_wcc_data(vp, wccflag, &xid);
1121 if (!wccflag && vp->v_type != VBAD) /* EINVAL on VBAD node */
1122 VTONFS(vp)->n_attrstamp = 0;
1c79356b 1123 } else
fa4905b1 1124 nfsm_loadattr(vp, (struct vattr *)0, &xid);
1c79356b
A
1125 nfsm_reqdone;
1126 return (error);
1127}
1128
1129/*
1130 * nfs lookup call, one step at a time...
1131 * First look in cache
1132 * If not found, unlock the directory nfsnode and do the rpc
1133 */
1134static int
1135nfs_lookup(ap)
1136 struct vop_lookup_args /* {
1137 struct vnodeop_desc *a_desc;
1138 struct vnode *a_dvp;
1139 struct vnode **a_vpp;
1140 struct componentname *a_cnp;
1141 } */ *ap;
1142{
1143 register struct componentname *cnp = ap->a_cnp;
1144 register struct vnode *dvp = ap->a_dvp;
1145 register struct vnode **vpp = ap->a_vpp;
1146 register int flags = cnp->cn_flags;
1147 register struct vnode *newvp;
1148 register u_long *tl;
1149 register caddr_t cp;
1150 register long t1, t2;
1151 struct nfsmount *nmp;
1152 caddr_t bpos, dpos, cp2;
1153 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1154 long len;
1155 nfsfh_t *fhp;
1156 struct nfsnode *np;
1157 int lockparent, wantparent, error = 0, attrflag, fhsize;
1158 int v3 = NFS_ISV3(dvp);
1159 struct proc *p = cnp->cn_proc;
0b4e3aa0 1160 int worldbuildworkaround = 1;
fa4905b1 1161 u_int64_t xid;
1c79356b
A
1162
1163 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1164 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
1165 return (EROFS);
1166 *vpp = NULLVP;
1167 if (dvp->v_type != VDIR)
1168 return (ENOTDIR);
1169 lockparent = flags & LOCKPARENT;
1170 wantparent = flags & (LOCKPARENT|WANTPARENT);
1171 nmp = VFSTONFS(dvp->v_mount);
1172 np = VTONFS(dvp);
fa4905b1 1173
0b4e3aa0
A
1174 if (worldbuildworkaround) {
1175 /*
1176 * Temporary workaround for world builds to not have dvp go
1177 * VBAD on during server calls in this routine. When
1178 * the real ref counting problem is found take this out.
1179 * Note if this was later and before the nfsm_request
1180 * set up, the workaround did not work (NOTE other difference
1181 * was I only put one VREF in that time. Thus it needs
1182 * to be above the cache_lookup branch or with 2 VREFS. Not
1183 * sure which. Can't play with world builds right now to see
1184 * which. VOP_ACCESS could also make it go to server. - EKN
1185 */
1186 VREF(dvp); /* hang on to this dvp - EKN */
1187 VREF(dvp); /* hang on tight - EKN */
1188 }
1c79356b
A
1189
1190 if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
1191 struct vattr vattr;
1192 int vpid;
1193
1194 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p))) {
1195 *vpp = NULLVP;
0b4e3aa0
A
1196 goto error_return;
1197 }
fa4905b1 1198
0b4e3aa0
A
1199 /* got to check to make sure the vnode didn't go away if access went to server */
1200 if ((*vpp)->v_type == VBAD) {
1201 error = EINVAL;
1202 goto error_return;
1203 }
1c79356b
A
1204
1205 newvp = *vpp;
1206 vpid = newvp->v_id;
1207 /*
1208 * See the comment starting `Step through' in ufs/ufs_lookup.c
1209 * for an explanation of the locking protocol
1210 */
1211 if (dvp == newvp) {
1212 VREF(newvp);
1213 error = 0;
1214 } else if (flags & ISDOTDOT) {
1215 VOP_UNLOCK(dvp, 0, p);
1216 error = vget(newvp, LK_EXCLUSIVE, p);
1217 if (!error && lockparent && (flags & ISLASTCN))
1218 error = vn_lock(dvp, LK_EXCLUSIVE, p);
1219 } else {
1220 error = vget(newvp, LK_EXCLUSIVE, p);
1221 if (!lockparent || error || !(flags & ISLASTCN))
1222 VOP_UNLOCK(dvp, 0, p);
1223 }
1224 if (!error) {
1225 if (vpid == newvp->v_id) {
1226 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
0b4e3aa0
A
1227 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
1228 nfsstats.lookupcache_hits++;
1229 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1230 cnp->cn_flags |= SAVENAME;
1231 error = 0; /* ignore any from VOP_GETATTR */
1232 goto error_return;
1233 }
1234 cache_purge(newvp);
1235 }
1c79356b
A
1236 vput(newvp);
1237 if (lockparent && dvp != newvp && (flags & ISLASTCN))
1238 VOP_UNLOCK(dvp, 0, p);
1239 }
1240 error = vn_lock(dvp, LK_EXCLUSIVE, p);
1241 *vpp = NULLVP;
0b4e3aa0
A
1242 if (error)
1243 goto error_return;
1c79356b 1244 }
fa4905b1 1245
1c79356b 1246 /*
0b4e3aa0 1247 * Got to check to make sure the vnode didn't go away if VOP_GETATTR went to server
1c79356b 1248 * or callers prior to this blocked and had it go VBAD.
0b4e3aa0
A
1249 */
1250 if (dvp->v_type == VBAD) {
1251 error = EINVAL;
1252 goto error_return;
1253 }
1c79356b
A
1254
1255 error = 0;
1256 newvp = NULLVP;
1257 nfsstats.lookupcache_misses++;
1258 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
1259 len = cnp->cn_namelen;
1260 nfsm_reqhead(dvp, NFSPROC_LOOKUP,
1261 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
1262 nfsm_fhtom(dvp, v3);
1263 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
0b4e3aa0 1264 /* nfsm_request for NFSv2 causes you to goto to nfsmout upon errors */
fa4905b1 1265 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred, &xid);
1c79356b
A
1266
1267 if (error) {
fa4905b1 1268 nfsm_postop_attr(dvp, attrflag, &xid);
1c79356b
A
1269 m_freem(mrep);
1270 goto nfsmout;
1271 }
1272 nfsm_getfh(fhp, fhsize, v3);
1273
1274 /*
1275 * Handle RENAME case...
1276 */
1277 if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
1278 if (NFS_CMPFH(np, fhp, fhsize)) {
1279 m_freem(mrep);
0b4e3aa0
A
1280 error = EISDIR;
1281 goto error_return;
1c79356b
A
1282 }
1283 if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) {
1284 m_freem(mrep);
0b4e3aa0 1285 goto error_return;
1c79356b
A
1286 }
1287 newvp = NFSTOV(np);
1288 if (v3) {
fa4905b1
A
1289 u_int64_t dxid = xid;
1290
1291 nfsm_postop_attr(newvp, attrflag, &xid);
1292 nfsm_postop_attr(dvp, attrflag, &dxid);
1c79356b 1293 } else
fa4905b1 1294 nfsm_loadattr(newvp, (struct vattr *)0, &xid);
1c79356b
A
1295 *vpp = newvp;
1296 m_freem(mrep);
1297 cnp->cn_flags |= SAVENAME;
1298 if (!lockparent)
1299 VOP_UNLOCK(dvp, 0, p);
0b4e3aa0
A
1300 error = 0;
1301 goto error_return;
1c79356b
A
1302 }
1303
1304 if (flags & ISDOTDOT) {
1305 VOP_UNLOCK(dvp, 0, p);
1306 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
1307 if (error) {
1308 vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p);
0b4e3aa0 1309 goto error_return;
1c79356b
A
1310 }
1311 newvp = NFSTOV(np);
1312 if (lockparent && (flags & ISLASTCN) &&
1313 (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
1314 vput(newvp);
0b4e3aa0 1315 goto error_return;
1c79356b
A
1316 }
1317 } else if (NFS_CMPFH(np, fhp, fhsize)) {
1318 VREF(dvp);
1319 newvp = dvp;
1320 } else {
1321 if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) {
1322 m_freem(mrep);
0b4e3aa0 1323 goto error_return;
1c79356b
A
1324 }
1325 if (!lockparent || !(flags & ISLASTCN))
1326 VOP_UNLOCK(dvp, 0, p);
1327 newvp = NFSTOV(np);
1328 }
1329 if (v3) {
fa4905b1
A
1330 u_int64_t dxid = xid;
1331
1332 nfsm_postop_attr(newvp, attrflag, &xid);
1333 nfsm_postop_attr(dvp, attrflag, &dxid);
1c79356b 1334 } else
fa4905b1 1335 nfsm_loadattr(newvp, (struct vattr *)0, &xid);
1c79356b
A
1336 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1337 cnp->cn_flags |= SAVENAME;
1338 if ((cnp->cn_flags & MAKEENTRY) &&
1339 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
1340 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
1341 cache_enter(dvp, newvp, cnp);
1342 }
1343 *vpp = newvp;
1344 nfsm_reqdone;
1345 if (error) {
1346 if (newvp != NULLVP) {
1347 vrele(newvp);
1348 *vpp = NULLVP;
1349 }
1350 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
1351 (flags & ISLASTCN) && error == ENOENT) {
1352 if (!lockparent)
1353 VOP_UNLOCK(dvp, 0, p);
1354 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
1355 error = EROFS;
1356 else
1357 error = EJUSTRETURN;
1358 }
1359 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1360 cnp->cn_flags |= SAVENAME;
1361 }
0b4e3aa0
A
1362error_return:
1363 /*
1364 * These "vreles" set dvp refcounts back to where they were
1365 * before we took extra 2 VREFS to avoid VBAD vnode on dvp
1366 * during server calls for world builds. Remove when real
1367 * fix is found. - EKN
1368 */
1369 if (worldbuildworkaround) {
1370 vrele(dvp); /* end of hanging on tight to dvp - EKN */
1371 vrele(dvp); /* end of hanging on tight to dvp - EKN */
1372 }
1373
1c79356b
A
1374 return (error);
1375}
1376
1377/*
1378 * nfs read call.
1379 * Just call nfs_bioread() to do the work.
1380 */
1381static int
1382nfs_read(ap)
1383 struct vop_read_args /* {
1384 struct vnode *a_vp;
1385 struct uio *a_uio;
1386 int a_ioflag;
1387 struct ucred *a_cred;
1388 } */ *ap;
1389{
1390 register struct vnode *vp = ap->a_vp;
1391
1392 if (vp->v_type != VREG)
1393 return (EPERM);
1394 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0));
1395}
1396
fa4905b1 1397
1c79356b
A
1398/*
1399 * nfs readlink call
1400 */
1401static int
1402nfs_readlink(ap)
1403 struct vop_readlink_args /* {
1404 struct vnode *a_vp;
1405 struct uio *a_uio;
1406 struct ucred *a_cred;
1407 } */ *ap;
1408{
1409 register struct vnode *vp = ap->a_vp;
1410
1411 if (vp->v_type != VLNK)
1412 return (EPERM);
1413 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0));
1414}
1415
1416/*
1417 * Do a readlink rpc.
1418 * Called by nfs_doio() from below the buffer cache.
1419 */
1420int
1421nfs_readlinkrpc(vp, uiop, cred)
1422 register struct vnode *vp;
1423 struct uio *uiop;
1424 struct ucred *cred;
1425{
1426 register u_long *tl;
1427 register caddr_t cp;
1428 register long t1, t2;
1429 caddr_t bpos, dpos, cp2;
1430 int error = 0, len, attrflag;
1431 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1432 int v3 = NFS_ISV3(vp);
fa4905b1 1433 u_int64_t xid;
1c79356b
A
1434
1435 nfsstats.rpccnt[NFSPROC_READLINK]++;
1436 nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1437 nfsm_fhtom(vp, v3);
fa4905b1 1438 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred, &xid);
1c79356b 1439 if (v3)
fa4905b1 1440 nfsm_postop_attr(vp, attrflag, &xid);
1c79356b
A
1441 if (!error) {
1442 nfsm_strsiz(len, NFS_MAXPATHLEN);
fa4905b1
A
1443 if (len == NFS_MAXPATHLEN) {
1444 struct nfsnode *np = VTONFS(vp);
1c79356b
A
1445#if DIAGNOSTIC
1446 if (!np)
1447 panic("nfs_readlinkrpc: null np");
1448#endif
1449 if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1450 len = np->n_size;
1451 }
1452 nfsm_mtouio(uiop, len);
1453 }
1454 nfsm_reqdone;
1455 return (error);
1456}
1457
1458/*
1459 * nfs read rpc call
1460 * Ditto above
1461 */
1462int
1463nfs_readrpc(vp, uiop, cred)
1464 register struct vnode *vp;
1465 struct uio *uiop;
1466 struct ucred *cred;
1467{
1468 register u_long *tl;
1469 register caddr_t cp;
1470 register long t1, t2;
1471 caddr_t bpos, dpos, cp2;
1472 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1473 struct nfsmount *nmp;
1474 int error = 0, len, retlen, tsiz, eof, attrflag;
1475 int v3 = NFS_ISV3(vp);
fa4905b1 1476 u_int64_t xid;
1c79356b
A
1477
1478#ifndef nolint
1479 eof = 0;
1480#endif
1481 nmp = VFSTONFS(vp->v_mount);
1482 tsiz = uiop->uio_resid;
fa4905b1
A
1483 if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) &&
1484 !v3)
1c79356b
A
1485 return (EFBIG);
1486 while (tsiz > 0) {
1487 nfsstats.rpccnt[NFSPROC_READ]++;
1488 len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1489 nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1490 nfsm_fhtom(vp, v3);
1491 nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3);
1492 if (v3) {
1493 txdr_hyper(&uiop->uio_offset, tl);
1494 *(tl + 2) = txdr_unsigned(len);
1495 } else {
1496 *tl++ = txdr_unsigned(uiop->uio_offset);
1497 *tl++ = txdr_unsigned(len);
1498 *tl = 0;
1499 }
fa4905b1 1500 nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred, &xid);
1c79356b 1501 if (v3) {
fa4905b1 1502 nfsm_postop_attr(vp, attrflag, &xid);
1c79356b
A
1503 if (error) {
1504 m_freem(mrep);
1505 goto nfsmout;
1506 }
1507 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
1508 eof = fxdr_unsigned(int, *(tl + 1));
1509 } else
fa4905b1 1510 nfsm_loadattr(vp, (struct vattr *)0, &xid);
1c79356b
A
1511 nfsm_strsiz(retlen, nmp->nm_rsize);
1512 nfsm_mtouio(uiop, retlen);
1513 m_freem(mrep);
1514 tsiz -= retlen;
1515 if (v3) {
1516 if (eof || retlen == 0)
1517 tsiz = 0;
1518 } else if (retlen < len)
1519 tsiz = 0;
1520 }
1521nfsmout:
1522 return (error);
1523}
1524
1525/*
1526 * nfs write call
1527 */
1528int
1529nfs_writerpc(vp, uiop, cred, iomode, must_commit)
1530 register struct vnode *vp;
1531 register struct uio *uiop;
1532 struct ucred *cred;
1533 int *iomode, *must_commit;
1534{
1535 register u_long *tl;
1536 register caddr_t cp;
1537 register int t1, t2, backup;
1538 caddr_t bpos, dpos, cp2;
1539 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1540 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1541 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1542 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
fa4905b1 1543 u_int64_t xid;
1c79356b
A
1544
1545#if DIAGNOSTIC
1546 if (uiop->uio_iovcnt != 1)
1547 panic("nfs_writerpc: iovcnt > 1");
1548#endif
1549 *must_commit = 0;
1550 tsiz = uiop->uio_resid;
1551 if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3)
1552 return (EFBIG);
1553 while (tsiz > 0) {
1554 nfsstats.rpccnt[NFSPROC_WRITE]++;
1555 len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1556 nfsm_reqhead(vp, NFSPROC_WRITE,
1557 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1558 nfsm_fhtom(vp, v3);
1559 if (v3) {
1560 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1561 txdr_hyper(&uiop->uio_offset, tl);
1562 tl += 2;
1563 *tl++ = txdr_unsigned(len);
1564 *tl++ = txdr_unsigned(*iomode);
1565 } else {
1566 nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED);
1567 *++tl = txdr_unsigned(uiop->uio_offset);
1568 tl += 2;
1569 }
1570 *tl = txdr_unsigned(len);
1571 nfsm_uiotom(uiop, len);
fa4905b1 1572 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred, &xid);
1c79356b
A
1573 if (v3) {
1574 wccflag = NFSV3_WCCCHK;
fa4905b1 1575 nfsm_wcc_data(vp, wccflag, &xid);
1c79356b
A
1576 if (!error) {
1577 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED +
1578 NFSX_V3WRITEVERF);
1579 rlen = fxdr_unsigned(int, *tl++);
1580 if (rlen <= 0) {
1581 error = NFSERR_IO;
1582 break;
1583 } else if (rlen < len) {
1584 backup = len - rlen;
1585 uiop->uio_iov->iov_base -= backup;
1586 uiop->uio_iov->iov_len += backup;
1587 uiop->uio_offset -= backup;
1588 uiop->uio_resid += backup;
1589 len = rlen;
1590 }
1591 commit = fxdr_unsigned(int, *tl++);
1592
1593 /*
1594 * Return the lowest committment level
1595 * obtained by any of the RPCs.
1596 */
1597 if (committed == NFSV3WRITE_FILESYNC)
1598 committed = commit;
1599 else if (committed == NFSV3WRITE_DATASYNC &&
1600 commit == NFSV3WRITE_UNSTABLE)
1601 committed = commit;
1602 if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) {
1603 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1604 NFSX_V3WRITEVERF);
1605 nmp->nm_flag |= NFSMNT_HASWRITEVERF;
1606 } else if (bcmp((caddr_t)tl,
1607 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1608 *must_commit = 1;
1609 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1610 NFSX_V3WRITEVERF);
1611 }
1612 }
1613 } else
fa4905b1
A
1614 nfsm_loadattr(vp, (struct vattr *)0, &xid);
1615
1616 if (wccflag && vp->v_type != VBAD) /* EINVAL set on VBAD node */
1c79356b
A
1617 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
1618 m_freem(mrep);
fa4905b1
A
1619 /*
1620 * we seem to have a case where we end up looping on shutdown
1621 * and taking down nfs servers. For V3, error cases, there is
1622 * no way to terminate loop, if the len was 0, meaning,
1623 * nmp->nm_wsize was trashed. FreeBSD has this fix in it.
1624 * Let's try it.
1625 */
1626 if (error)
1627 break;
1628 tsiz -= len;
1c79356b
A
1629 }
1630nfsmout:
fa4905b1
A
1631 /* EKN
1632 * does it make sense to even say it was committed if we had an error?
1633 * okay well just don't on bad vnodes then. EINVAL will be
1634 * returned on bad vnodes
1635 */
1636 if (vp->v_type != VBAD && (vp->v_mount->mnt_flag & MNT_ASYNC))
1c79356b
A
1637 committed = NFSV3WRITE_FILESYNC;
1638 *iomode = committed;
1639 if (error)
1640 uiop->uio_resid = tsiz;
1641 return (error);
1642}
1643
1644/*
1645 * nfs mknod rpc
1646 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1647 * mode set to specify the file type and the size field for rdev.
1648 */
1649static int
1650nfs_mknodrpc(dvp, vpp, cnp, vap)
1651 register struct vnode *dvp;
1652 register struct vnode **vpp;
1653 register struct componentname *cnp;
1654 register struct vattr *vap;
1655{
1656 register struct nfsv2_sattr *sp;
1657 register struct nfsv3_sattr *sp3;
1658 register u_long *tl;
1659 register caddr_t cp;
1660 register long t1, t2;
1661 struct vnode *newvp = (struct vnode *)0;
1662 struct nfsnode *np = (struct nfsnode *)0;
1663 struct vattr vattr;
1664 char *cp2;
1665 caddr_t bpos, dpos;
1666 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1667 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1668 u_long rdev;
fa4905b1 1669 u_int64_t xid;
1c79356b
A
1670 int v3 = NFS_ISV3(dvp);
1671
1672 if (vap->va_type == VCHR || vap->va_type == VBLK)
1673 rdev = txdr_unsigned(vap->va_rdev);
1674 else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1675 rdev = 0xffffffff;
1676 else {
1677 VOP_ABORTOP(dvp, cnp);
1678 vput(dvp);
1679 return (EOPNOTSUPP);
1680 }
1681 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
1682 VOP_ABORTOP(dvp, cnp);
1683 vput(dvp);
1684 return (error);
1685 }
1686 nfsstats.rpccnt[NFSPROC_MKNOD]++;
1687 nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1688 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1689 nfsm_fhtom(dvp, v3);
1690 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1691 if (v3) {
1692 nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR);
1693 *tl++ = vtonfsv3_type(vap->va_type);
1694 sp3 = (struct nfsv3_sattr *)tl;
1695 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
1696 if (vap->va_type == VCHR || vap->va_type == VBLK) {
1697 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1698 *tl++ = txdr_unsigned(major(vap->va_rdev));
1699 *tl = txdr_unsigned(minor(vap->va_rdev));
1700 }
1701 } else {
1702 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1703 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1704 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
1705 sp->sa_gid = txdr_unsigned(vattr.va_gid);
1706 sp->sa_size = rdev;
1707 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1708 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1709 }
fa4905b1 1710 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred, &xid);
1c79356b 1711 if (!error) {
fa4905b1 1712 nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
1c79356b
A
1713 if (!gotvp) {
1714 if (newvp) {
1715 vput(newvp);
1716 newvp = (struct vnode *)0;
1717 }
1718 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1719 cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1720 if (!error)
1721 newvp = NFSTOV(np);
1722 }
1723 }
1724 if (v3)
fa4905b1 1725 nfsm_wcc_data(dvp, wccflag, &xid);
1c79356b
A
1726 nfsm_reqdone;
1727 if (error) {
1728 if (newvp)
1729 vput(newvp);
1730 } else {
1731 if (cnp->cn_flags & MAKEENTRY)
1732 cache_enter(dvp, newvp, cnp);
1733 *vpp = newvp;
1734 }
1735 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
fa4905b1
A
1736 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
1737 VTONFS(dvp)->n_flag |= NMODIFIED;
1738 if (!wccflag)
1739 VTONFS(dvp)->n_attrstamp = 0;
1740 }
1c79356b
A
1741 vput(dvp);
1742 return (error);
1743}
1744
1745/*
1746 * nfs mknod vop
1747 * just call nfs_mknodrpc() to do the work.
1748 */
1749/* ARGSUSED */
1750static int
1751nfs_mknod(ap)
1752 struct vop_mknod_args /* {
1753 struct vnode *a_dvp;
1754 struct vnode **a_vpp;
1755 struct componentname *a_cnp;
1756 struct vattr *a_vap;
1757 } */ *ap;
1758{
1759 struct vnode *newvp;
1760 int error;
1761
1762 error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
fa4905b1 1763 if (!error && newvp)
1c79356b 1764 vput(newvp);
fa4905b1 1765 *ap->a_vpp = 0;
1c79356b
A
1766 return (error);
1767}
1768
1769static u_long create_verf;
1770/*
1771 * nfs file create call
1772 */
1773static int
1774nfs_create(ap)
1775 struct vop_create_args /* {
1776 struct vnode *a_dvp;
1777 struct vnode **a_vpp;
1778 struct componentname *a_cnp;
1779 struct vattr *a_vap;
1780 } */ *ap;
1781{
1782 register struct vnode *dvp = ap->a_dvp;
1783 register struct vattr *vap = ap->a_vap;
1784 register struct componentname *cnp = ap->a_cnp;
1785 register struct nfsv2_sattr *sp;
1786 register struct nfsv3_sattr *sp3;
1787 register u_long *tl;
1788 register caddr_t cp;
1789 register long t1, t2;
1790 struct nfsnode *np = (struct nfsnode *)0;
1791 struct vnode *newvp = (struct vnode *)0;
1792 caddr_t bpos, dpos, cp2;
1793 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1794 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1795 struct vattr vattr;
1796 int v3 = NFS_ISV3(dvp);
fa4905b1 1797 u_int64_t xid;
1c79356b
A
1798
1799 /*
1800 * Oops, not for me..
1801 */
1802 if (vap->va_type == VSOCK)
1803 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1804
1805 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
1806 VOP_ABORTOP(dvp, cnp);
1807 vput(dvp);
1808 return (error);
1809 }
1810 if (vap->va_vaflags & VA_EXCLUSIVE)
1811 fmode |= O_EXCL;
1812again:
1813 nfsstats.rpccnt[NFSPROC_CREATE]++;
1814 nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1815 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1816 nfsm_fhtom(dvp, v3);
1817 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1818 if (v3) {
1819 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1820 if (fmode & O_EXCL) {
1821 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1822 nfsm_build(tl, u_long *, NFSX_V3CREATEVERF);
1823 if (!TAILQ_EMPTY(&in_ifaddrhead))
1824 *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
1825 else
1826 *tl++ = create_verf;
1827 *tl = ++create_verf;
1828 } else {
1829 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1830 nfsm_build(tl, u_long *, NFSX_V3SRVSATTR);
1831 sp3 = (struct nfsv3_sattr *)tl;
1832 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
1833 }
1834 } else {
1835 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1836 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1837 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
1838 sp->sa_gid = txdr_unsigned(vattr.va_gid);
1839 sp->sa_size = 0;
1840 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1841 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1842 }
fa4905b1 1843 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred, &xid);
1c79356b 1844 if (!error) {
fa4905b1 1845 nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
1c79356b
A
1846 if (!gotvp) {
1847 if (newvp) {
1848 vput(newvp);
1849 newvp = (struct vnode *)0;
1850 }
1851 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1852 cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1853 if (!error)
1854 newvp = NFSTOV(np);
1855 }
1856 }
1857 if (v3)
fa4905b1 1858 nfsm_wcc_data(dvp, wccflag, &xid);
1c79356b
A
1859 nfsm_reqdone;
1860 if (error) {
1861 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1862 fmode &= ~O_EXCL;
1863 goto again;
1864 }
1865 if (newvp)
1866 vput(newvp);
1867 } else if (v3 && (fmode & O_EXCL))
1868 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
1869 if (!error) {
1870 if (cnp->cn_flags & MAKEENTRY)
1871 cache_enter(dvp, newvp, cnp);
1872 *ap->a_vpp = newvp;
1873 }
1874 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
1875 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
1876 VTONFS(dvp)->n_flag |= NMODIFIED;
1877 if (!wccflag)
1878 VTONFS(dvp)->n_attrstamp = 0;
1879 }
1880 vput(dvp);
1881 return (error);
1882}
1883
1884/*
1885 * nfs file remove call
1886 * To try and make nfs semantics closer to ufs semantics, a file that has
1887 * other processes using the vnode is renamed instead of removed and then
1888 * removed later on the last close.
1889 * - If v_usecount > 1
1890 * If a rename is not already in the works
1891 * call nfs_sillyrename() to set it up
1892 * else
1893 * do the remove rpc
1894 */
1895static int
1896nfs_remove(ap)
1897 struct vop_remove_args /* {
1898 struct vnodeop_desc *a_desc;
1899 struct vnode * a_dvp;
1900 struct vnode * a_vp;
1901 struct componentname * a_cnp;
1902 } */ *ap;
1903{
1904 register struct vnode *vp = ap->a_vp;
1905 register struct vnode *dvp = ap->a_dvp;
1906 register struct componentname *cnp = ap->a_cnp;
1907 register struct nfsnode *np = VTONFS(vp);
9bccf70c 1908 int error = 0, gofree = 0;
1c79356b 1909 struct vattr vattr;
1c79356b
A
1910
1911#if DIAGNOSTIC
1912 if ((cnp->cn_flags & HASBUF) == 0)
1913 panic("nfs_remove: no name");
1914 if (vp->v_usecount < 1)
1915 panic("nfs_remove: bad v_usecount");
1916#endif
9bccf70c
A
1917
1918 if (UBCISVALID(vp)) {
1919 /* regular files */
1920 if (UBCINFOEXISTS(vp))
1921 gofree = (ubc_isinuse(vp, 1)) ? 0 : 1;
1922 else {
1923 /* dead or dying vnode.With vnode locking panic instead of error */
1924 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
1925 vput(dvp);
1926 vput(vp);
1927 return (EIO);
1928 }
1929 } else {
1930 /* UBC not in play */
1931 if (vp->v_usecount == 1)
1932 gofree = 1;
1933 }
1934 if (gofree || (np->n_sillyrename &&
1935 VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
1936 vattr.va_nlink > 1)) {
1c79356b
A
1937 /*
1938 * Purge the name cache so that the chance of a lookup for
1939 * the name succeeding while the remove is in progress is
1940 * minimized. Without node locking it can still happen, such
1941 * that an I/O op returns ESTALE, but since you get this if
1942 * another host removes the file..
1943 */
1944 cache_purge(vp);
1945 /*
1946 * throw away biocache buffers, mainly to avoid
1947 * unnecessary delayed writes later.
1948 */
1949 error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
fa4905b1
A
1950 np->n_size = 0;
1951 ubc_setsize(vp, (off_t)0); /* XXX check error */
1c79356b
A
1952 /* Do the rpc */
1953 if (error != EINTR)
1954 error = nfs_removerpc(dvp, cnp->cn_nameptr,
1955 cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
1956 /*
1957 * Kludge City: If the first reply to the remove rpc is lost..
1958 * the reply to the retransmitted request will be ENOENT
1959 * since the file was in fact removed
1960 * Therefore, we cheat and return success.
1961 */
1962 if (error == ENOENT)
1963 error = 0;
1c79356b
A
1964 } else if (!np->n_sillyrename) {
1965 error = nfs_sillyrename(dvp, vp, cnp);
1966 }
1967
1968 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
1969 np->n_attrstamp = 0;
1970 vput(dvp);
1971
0b4e3aa0 1972 VOP_UNLOCK(vp, 0, cnp->cn_proc);
9bccf70c 1973 ubc_uncache(vp);
0b4e3aa0 1974 vrele(vp);
1c79356b
A
1975
1976 return (error);
1977}
1978
1979/*
1980 * nfs file remove rpc called from nfs_inactive
1981 */
1982int
1983nfs_removeit(sp)
1984 register struct sillyrename *sp;
1985{
1986
1987 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1988 (struct proc *)0));
1989}
1990
1991/*
1992 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1993 */
1994static int
1995nfs_removerpc(dvp, name, namelen, cred, proc)
1996 register struct vnode *dvp;
1997 char *name;
1998 int namelen;
1999 struct ucred *cred;
2000 struct proc *proc;
2001{
2002 register u_long *tl;
2003 register caddr_t cp;
2004 register long t1, t2;
2005 caddr_t bpos, dpos, cp2;
2006 int error = 0, wccflag = NFSV3_WCCRATTR;
2007 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2008 int v3 = NFS_ISV3(dvp);
fa4905b1 2009 u_int64_t xid;
1c79356b
A
2010
2011 nfsstats.rpccnt[NFSPROC_REMOVE]++;
2012 nfsm_reqhead(dvp, NFSPROC_REMOVE,
2013 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
2014 nfsm_fhtom(dvp, v3);
2015 nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
fa4905b1 2016 nfsm_request(dvp, NFSPROC_REMOVE, proc, cred, &xid);
1c79356b 2017 if (v3)
fa4905b1 2018 nfsm_wcc_data(dvp, wccflag, &xid);
1c79356b 2019 nfsm_reqdone;
0b4e3aa0
A
2020 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2021 VTONFS(dvp)->n_flag |= NMODIFIED;
2022 if (!wccflag)
2023 VTONFS(dvp)->n_attrstamp = 0;
fa4905b1 2024 }
1c79356b
A
2025 return (error);
2026}
2027
2028/*
2029 * nfs file rename call
2030 */
2031static int
2032nfs_rename(ap)
2033 struct vop_rename_args /* {
2034 struct vnode *a_fdvp;
2035 struct vnode *a_fvp;
2036 struct componentname *a_fcnp;
2037 struct vnode *a_tdvp;
2038 struct vnode *a_tvp;
2039 struct componentname *a_tcnp;
2040 } */ *ap;
2041{
2042 register struct vnode *fvp = ap->a_fvp;
2043 register struct vnode *tvp = ap->a_tvp;
2044 register struct vnode *fdvp = ap->a_fdvp;
2045 register struct vnode *tdvp = ap->a_tdvp;
2046 register struct componentname *tcnp = ap->a_tcnp;
2047 register struct componentname *fcnp = ap->a_fcnp;
9bccf70c 2048 int error, purged=0, inuse=0;
1c79356b
A
2049
2050#if DIAGNOSTIC
2051 if ((tcnp->cn_flags & HASBUF) == 0 ||
2052 (fcnp->cn_flags & HASBUF) == 0)
2053 panic("nfs_rename: no name");
2054#endif
2055 /* Check for cross-device rename */
2056 if ((fvp->v_mount != tdvp->v_mount) ||
2057 (tvp && (fvp->v_mount != tvp->v_mount))) {
2058 error = EXDEV;
9bccf70c
A
2059 if (tvp)
2060 VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
1c79356b
A
2061 goto out;
2062 }
2063
2064 /*
2065 * If the tvp exists and is in use, sillyrename it before doing the
2066 * rename of the new file over it.
2067 * XXX Can't sillyrename a directory.
9bccf70c
A
2068 * Don't sillyrename if source and target are same vnode (hard
2069 * links or case-variants)
1c79356b 2070 */
9bccf70c
A
2071 if (tvp && tvp != fvp) {
2072 if (UBCISVALID(tvp)) {
2073 /* regular files */
2074 if (UBCINFOEXISTS(tvp))
2075 inuse = (ubc_isinuse(tvp, 1)) ? 1 : 0;
2076 else {
2077 /* dead or dying vnode.With vnode locking panic instead of error */
2078 error = EIO;
2079 VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
2080 goto out;
2081 }
2082 } else {
2083 /* UBC not in play */
2084 if (tvp->v_usecount > 1)
2085 inuse = 1;
2086 }
2087 }
2088 if (inuse && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR) {
2089 if (error = nfs_sillyrename(tdvp, tvp, tcnp)) {
2090 /* sillyrename failed. Instead of pressing on, return error */
2091 goto out; /* should not be ENOENT. */
2092 } else {
2093 /* sillyrename succeeded.*/
2094 VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
2095 ubc_uncache(tvp); /* get the nfs turd file to disappear */
2096 vrele(tvp);
2097 tvp = NULL;
2098 }
1c79356b
A
2099 }
2100
2101 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
2102 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
2103 tcnp->cn_proc);
2104
2105 if (fvp->v_type == VDIR) {
9bccf70c 2106 if (tvp != NULL && tvp->v_type == VDIR) {
1c79356b 2107 cache_purge(tdvp);
9bccf70c
A
2108 if (tvp == tdvp)
2109 purged = 1;
2110 }
1c79356b
A
2111 cache_purge(fdvp);
2112 }
9bccf70c
A
2113
2114 cache_purge(fvp);
2115 if (tvp) {
2116 if (!purged)
2117 cache_purge(tvp);
2118 VOP_UNLOCK(tvp, 0, tcnp->cn_proc);
2119 ubc_uncache(tvp); /* get the nfs turd file to disappear */
2120 }
2121
1c79356b
A
2122out:
2123 if (tdvp == tvp)
2124 vrele(tdvp);
2125 else
2126 vput(tdvp);
2127 if (tvp)
9bccf70c 2128 vrele(tvp); /* already unlocked */
1c79356b
A
2129 vrele(fdvp);
2130 vrele(fvp);
2131 /*
2132 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
2133 */
2134 if (error == ENOENT)
2135 error = 0;
2136 return (error);
2137}
2138
2139/*
2140 * nfs file rename rpc called from nfs_remove() above
2141 */
2142static int
2143nfs_renameit(sdvp, scnp, sp)
2144 struct vnode *sdvp;
2145 struct componentname *scnp;
2146 register struct sillyrename *sp;
2147{
2148 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
2149 sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
2150}
2151
2152/*
2153 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
2154 */
2155static int
2156nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
2157 register struct vnode *fdvp;
2158 char *fnameptr;
2159 int fnamelen;
2160 register struct vnode *tdvp;
2161 char *tnameptr;
2162 int tnamelen;
2163 struct ucred *cred;
2164 struct proc *proc;
2165{
2166 register u_long *tl;
2167 register caddr_t cp;
2168 register long t1, t2;
2169 caddr_t bpos, dpos, cp2;
2170 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
2171 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2172 int v3 = NFS_ISV3(fdvp);
fa4905b1 2173 u_int64_t xid;
1c79356b
A
2174
2175 nfsstats.rpccnt[NFSPROC_RENAME]++;
2176 nfsm_reqhead(fdvp, NFSPROC_RENAME,
fa4905b1
A
2177 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
2178 nfsm_rndup(tnamelen));
1c79356b
A
2179 nfsm_fhtom(fdvp, v3);
2180 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
2181 nfsm_fhtom(tdvp, v3);
2182 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
fa4905b1 2183 nfsm_request(fdvp, NFSPROC_RENAME, proc, cred, &xid);
1c79356b 2184 if (v3) {
fa4905b1
A
2185 u_int64_t txid = xid;
2186
2187 nfsm_wcc_data(fdvp, fwccflag, &xid);
2188 nfsm_wcc_data(tdvp, twccflag, &txid);
1c79356b
A
2189 }
2190 nfsm_reqdone;
fa4905b1
A
2191 if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2192 VTONFS(fdvp)->n_flag |= NMODIFIED;
2193 if (!fwccflag)
2194 VTONFS(fdvp)->n_attrstamp = 0;
2195 }
2196 if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2197 VTONFS(tdvp)->n_flag |= NMODIFIED;
2198 if (!twccflag)
2199 VTONFS(tdvp)->n_attrstamp = 0;
1c79356b
A
2200 }
2201 return (error);
2202}
2203
2204/*
2205 * nfs hard link create call
2206 */
2207static int
2208nfs_link(ap)
2209 struct vop_link_args /* {
2210 struct vnode *a_vp;
2211 struct vnode *a_tdvp;
2212 struct componentname *a_cnp;
2213 } */ *ap;
2214{
2215 register struct vnode *vp = ap->a_vp;
2216 register struct vnode *tdvp = ap->a_tdvp;
2217 register struct componentname *cnp = ap->a_cnp;
2218 register u_long *tl;
2219 register caddr_t cp;
2220 register long t1, t2;
2221 caddr_t bpos, dpos, cp2;
2222 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
2223 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2224 int v3 = NFS_ISV3(vp);
fa4905b1 2225 u_int64_t xid;
1c79356b
A
2226
2227 if (vp->v_mount != tdvp->v_mount) {
2228 VOP_ABORTOP(vp, cnp);
2229 if (tdvp == vp)
2230 vrele(tdvp);
2231 else
2232 vput(tdvp);
2233 return (EXDEV);
2234 }
2235
2236 /*
2237 * Push all writes to the server, so that the attribute cache
2238 * doesn't get "out of sync" with the server.
2239 * XXX There should be a better way!
2240 */
2241 VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
2242
2243 nfsstats.rpccnt[NFSPROC_LINK]++;
2244 nfsm_reqhead(vp, NFSPROC_LINK,
2245 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2246 nfsm_fhtom(vp, v3);
2247 nfsm_fhtom(tdvp, v3);
2248 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
fa4905b1 2249 nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred, &xid);
1c79356b 2250 if (v3) {
fa4905b1
A
2251 u_int64_t txid = xid;
2252
2253 nfsm_postop_attr(vp, attrflag, &xid);
2254 nfsm_wcc_data(tdvp, wccflag, &txid);
1c79356b
A
2255 }
2256 nfsm_reqdone;
2257 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
2258
2259 VTONFS(tdvp)->n_flag |= NMODIFIED;
fa4905b1 2260 if (!attrflag && vp->v_type != VBAD) /* EINVAL set on VBAD vnode */
1c79356b 2261 VTONFS(vp)->n_attrstamp = 0;
fa4905b1 2262 if (!wccflag && tdvp->v_type != VBAD) /* EINVAL set on VBAD vnode */
1c79356b
A
2263 VTONFS(tdvp)->n_attrstamp = 0;
2264 vput(tdvp);
2265 /*
2266 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2267 */
2268 if (error == EEXIST)
2269 error = 0;
2270 return (error);
2271}
2272
2273/*
2274 * nfs symbolic link create call
2275 */
2276static int
2277nfs_symlink(ap)
2278 struct vop_symlink_args /* {
2279 struct vnode *a_dvp;
2280 struct vnode **a_vpp;
2281 struct componentname *a_cnp;
2282 struct vattr *a_vap;
2283 char *a_target;
2284 } */ *ap;
2285{
2286 register struct vnode *dvp = ap->a_dvp;
2287 register struct vattr *vap = ap->a_vap;
2288 register struct componentname *cnp = ap->a_cnp;
2289 register struct nfsv2_sattr *sp;
2290 register struct nfsv3_sattr *sp3;
2291 register u_long *tl;
2292 register caddr_t cp;
2293 register long t1, t2;
2294 caddr_t bpos, dpos, cp2;
2295 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
2296 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2297 struct vnode *newvp = (struct vnode *)0;
2298 int v3 = NFS_ISV3(dvp);
fa4905b1 2299 u_int64_t xid;
1c79356b
A
2300
2301 nfsstats.rpccnt[NFSPROC_SYMLINK]++;
2302 slen = strlen(ap->a_target);
2303 nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
2304 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
2305 nfsm_fhtom(dvp, v3);
2306 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2307 if (v3) {
2308 nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
2309 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid,
2310 cnp->cn_cred->cr_gid);
2311 }
2312 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
2313 if (!v3) {
2314 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2315 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
2316 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
2317 sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
2318 sp->sa_size = -1;
2319 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2320 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2321 }
fa4905b1 2322 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred, &xid);
1c79356b 2323 if (v3) {
fa4905b1
A
2324 u_int64_t dxid = xid;
2325
1c79356b 2326 if (!error)
fa4905b1
A
2327 nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
2328 nfsm_wcc_data(dvp, wccflag, &dxid);
1c79356b
A
2329 }
2330 nfsm_reqdone;
2331 if (newvp)
2332 vput(newvp);
2333 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
fa4905b1
A
2334 if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */
2335 VTONFS(dvp)->n_flag |= NMODIFIED;
2336 if (!wccflag)
2337 VTONFS(dvp)->n_attrstamp = 0;
2338 }
1c79356b
A
2339 vput(dvp);
2340 /*
2341 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2342 */
2343 if (error == EEXIST)
2344 error = 0;
2345 return (error);
2346}
2347
2348/*
2349 * nfs make dir call
2350 */
2351static int
2352nfs_mkdir(ap)
2353 struct vop_mkdir_args /* {
2354 struct vnode *a_dvp;
2355 struct vnode **a_vpp;
2356 struct componentname *a_cnp;
2357 struct vattr *a_vap;
2358 } */ *ap;
2359{
2360 register struct vnode *dvp = ap->a_dvp;
2361 register struct vattr *vap = ap->a_vap;
2362 register struct componentname *cnp = ap->a_cnp;
2363 register struct nfsv2_sattr *sp;
2364 register struct nfsv3_sattr *sp3;
2365 register u_long *tl;
2366 register caddr_t cp;
2367 register long t1, t2;
2368 register int len;
2369 struct nfsnode *np = (struct nfsnode *)0;
2370 struct vnode *newvp = (struct vnode *)0;
2371 caddr_t bpos, dpos, cp2;
2372 int error = 0, wccflag = NFSV3_WCCRATTR;
2373 int gotvp = 0;
2374 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2375 struct vattr vattr;
2376 int v3 = NFS_ISV3(dvp);
fa4905b1 2377 u_int64_t xid, dxid;
1c79356b
A
2378
2379 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) {
2380 VOP_ABORTOP(dvp, cnp);
2381 vput(dvp);
2382 return (error);
2383 }
2384 len = cnp->cn_namelen;
2385 nfsstats.rpccnt[NFSPROC_MKDIR]++;
2386 nfsm_reqhead(dvp, NFSPROC_MKDIR,
2387 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
2388 nfsm_fhtom(dvp, v3);
2389 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
2390 if (v3) {
2391 nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
2392 nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
2393 } else {
2394 nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2395 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2396 sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
2397 sp->sa_gid = txdr_unsigned(vattr.va_gid);
2398 sp->sa_size = -1;
2399 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2400 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2401 }
fa4905b1
A
2402 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred, &xid);
2403 dxid = xid;
1c79356b 2404 if (!error)
fa4905b1 2405 nfsm_mtofh(dvp, newvp, v3, gotvp, &xid);
1c79356b 2406 if (v3)
fa4905b1 2407 nfsm_wcc_data(dvp, wccflag, &dxid);
1c79356b 2408 nfsm_reqdone;
0b4e3aa0
A
2409 if (dvp->v_type != VBAD) { /* EINVAL set on this case */
2410 VTONFS(dvp)->n_flag |= NMODIFIED;
2411 if (!wccflag)
2412 VTONFS(dvp)->n_attrstamp = 0;
fa4905b1 2413 }
1c79356b
A
2414 /*
2415 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
2416 * if we can succeed in looking up the directory.
2417 */
2418 if (error == EEXIST || (!error && !gotvp)) {
2419 if (newvp) {
2420 vrele(newvp);
2421 newvp = (struct vnode *)0;
2422 }
2423 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
fa4905b1 2424 cnp->cn_proc, &np);
1c79356b
A
2425 if (!error) {
2426 newvp = NFSTOV(np);
2427 if (newvp->v_type != VDIR)
2428 error = EEXIST;
2429 }
2430 }
2431 if (error) {
2432 if (newvp)
2433 vrele(newvp);
2434 } else
2435 *ap->a_vpp = newvp;
2436 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
2437 vput(dvp);
2438 return (error);
2439}
2440
2441/*
2442 * nfs remove directory call
2443 */
2444static int
2445nfs_rmdir(ap)
2446 struct vop_rmdir_args /* {
2447 struct vnode *a_dvp;
2448 struct vnode *a_vp;
2449 struct componentname *a_cnp;
2450 } */ *ap;
2451{
2452 register struct vnode *vp = ap->a_vp;
2453 register struct vnode *dvp = ap->a_dvp;
2454 register struct componentname *cnp = ap->a_cnp;
2455 register u_long *tl;
2456 register caddr_t cp;
2457 register long t1, t2;
2458 caddr_t bpos, dpos, cp2;
2459 int error = 0, wccflag = NFSV3_WCCRATTR;
2460 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2461 int v3 = NFS_ISV3(dvp);
fa4905b1 2462 u_int64_t xid;
1c79356b
A
2463
2464 nfsstats.rpccnt[NFSPROC_RMDIR]++;
2465 nfsm_reqhead(dvp, NFSPROC_RMDIR,
2466 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2467 nfsm_fhtom(dvp, v3);
2468 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
fa4905b1 2469 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred, &xid);
1c79356b 2470 if (v3)
fa4905b1 2471 nfsm_wcc_data(dvp, wccflag, &xid);
1c79356b
A
2472 nfsm_reqdone;
2473 FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI);
fa4905b1
A
2474 if (dvp->v_type != VBAD) { /* EINVAL set on this case */
2475 VTONFS(dvp)->n_flag |= NMODIFIED;
2476 if (!wccflag)
2477 VTONFS(dvp)->n_attrstamp = 0;
2478 }
1c79356b
A
2479 cache_purge(dvp);
2480 cache_purge(vp);
2481 vput(vp);
2482 vput(dvp);
2483 /*
2484 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2485 */
2486 if (error == ENOENT)
2487 error = 0;
2488 return (error);
2489}
2490
2491/*
2492 * nfs readdir call
2493 */
2494static int
2495nfs_readdir(ap)
2496 struct vop_readdir_args /* {
2497 struct vnode *a_vp;
2498 struct uio *a_uio;
2499 struct ucred *a_cred;
2500 } */ *ap;
2501{
2502 register struct vnode *vp = ap->a_vp;
2503 register struct nfsnode *np = VTONFS(vp);
2504 register struct uio *uio = ap->a_uio;
2505 int tresid, error;
2506 struct vattr vattr;
2507
2508 if (vp->v_type != VDIR)
2509 return (EPERM);
2510 /*
2511 * First, check for hit on the EOF offset cache
2512 */
2513 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2514 (np->n_flag & NMODIFIED) == 0) {
2515 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
2516 if (NQNFS_CKCACHABLE(vp, ND_READ)) {
2517 nfsstats.direofcache_hits++;
2518 return (0);
2519 }
2520 } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
2521 np->n_mtime == vattr.va_mtime.tv_sec) {
2522 nfsstats.direofcache_hits++;
2523 return (0);
2524 }
2525 }
2526
2527 /*
2528 * Call nfs_bioread() to do the real work.
2529 */
2530 tresid = uio->uio_resid;
2531 error = nfs_bioread(vp, uio, 0, ap->a_cred, 0);
2532
2533 if (!error && uio->uio_resid == tresid)
2534 nfsstats.direofcache_misses++;
2535 return (error);
2536}
2537
2538/*
2539 * Readdir rpc call.
2540 * Called from below the buffer cache by nfs_doio().
2541 */
2542int
2543nfs_readdirrpc(vp, uiop, cred)
2544 struct vnode *vp;
2545 register struct uio *uiop;
2546 struct ucred *cred;
2547
2548{
2549 register int len, left;
2550 register struct dirent *dp;
2551 register u_long *tl;
2552 register caddr_t cp;
2553 register long t1, t2;
2554 register nfsuint64 *cookiep;
2555 caddr_t bpos, dpos, cp2;
2556 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2557 nfsuint64 cookie;
2558 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2559 struct nfsnode *dnp = VTONFS(vp);
2560 u_quad_t fileno;
2561 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2562 int attrflag;
2563 int v3 = NFS_ISV3(vp);
fa4905b1 2564 u_int64_t xid;
1c79356b
A
2565
2566#ifndef nolint
2567 dp = (struct dirent *)0;
2568#endif
2569#if DIAGNOSTIC
2570 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) ||
2571 (uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
2572 panic("nfs_readdirrpc: bad uio");
2573#endif
2574
2575 /*
2576 * If there is no cookie, assume directory was stale.
2577 */
2578 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2579 if (cookiep)
2580 cookie = *cookiep;
2581 else
2582 return (NFSERR_BAD_COOKIE);
2583 /*
2584 * Loop around doing readdir rpc's of size nm_readdirsize
2585 * truncated to a multiple of DIRBLKSIZ.
2586 * The stopping criteria is EOF or buffer full.
2587 */
2588 while (more_dirs && bigenough) {
2589 nfsstats.rpccnt[NFSPROC_READDIR]++;
2590 nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2591 NFSX_READDIR(v3));
2592 nfsm_fhtom(vp, v3);
2593 if (v3) {
2594 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
2595 *tl++ = cookie.nfsuquad[0];
2596 *tl++ = cookie.nfsuquad[1];
2597 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2598 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2599 } else {
2600 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
2601 *tl++ = cookie.nfsuquad[0];
2602 }
2603 *tl = txdr_unsigned(nmp->nm_readdirsize);
fa4905b1 2604 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred, &xid);
1c79356b 2605 if (v3) {
fa4905b1 2606 nfsm_postop_attr(vp, attrflag, &xid);
1c79356b
A
2607 if (!error) {
2608 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2609 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2610 dnp->n_cookieverf.nfsuquad[1] = *tl;
2611 } else {
2612 m_freem(mrep);
2613 goto nfsmout;
2614 }
2615 }
2616 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2617 more_dirs = fxdr_unsigned(int, *tl);
2618
2619 /* loop thru the dir entries, doctoring them to 4bsd form */
2620 while (more_dirs && bigenough) {
2621 if (v3) {
2622 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2623 fxdr_hyper(tl, &fileno);
2624 len = fxdr_unsigned(int, *(tl + 2));
2625 } else {
2626 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2627 fileno = fxdr_unsigned(u_quad_t, *tl++);
2628 len = fxdr_unsigned(int, *tl);
2629 }
2630 if (len <= 0 || len > NFS_MAXNAMLEN) {
2631 error = EBADRPC;
2632 m_freem(mrep);
2633 goto nfsmout;
2634 }
2635 tlen = nfsm_rndup(len);
2636 if (tlen == len)
2637 tlen += 4; /* To ensure null termination */
2638 left = DIRBLKSIZ - blksiz;
2639 if ((tlen + DIRHDSIZ) > left) {
2640 dp->d_reclen += left;
2641 uiop->uio_iov->iov_base += left;
2642 uiop->uio_iov->iov_len -= left;
2643 uiop->uio_offset += left;
2644 uiop->uio_resid -= left;
2645 blksiz = 0;
2646 }
2647 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2648 bigenough = 0;
2649 if (bigenough) {
2650 dp = (struct dirent *)uiop->uio_iov->iov_base;
2651 dp->d_fileno = (int)fileno;
2652 dp->d_namlen = len;
2653 dp->d_reclen = tlen + DIRHDSIZ;
2654 dp->d_type = DT_UNKNOWN;
2655 blksiz += dp->d_reclen;
2656 if (blksiz == DIRBLKSIZ)
2657 blksiz = 0;
2658 uiop->uio_offset += DIRHDSIZ;
2659 uiop->uio_resid -= DIRHDSIZ;
2660 uiop->uio_iov->iov_base += DIRHDSIZ;
2661 uiop->uio_iov->iov_len -= DIRHDSIZ;
2662 nfsm_mtouio(uiop, len);
2663 cp = uiop->uio_iov->iov_base;
2664 tlen -= len;
2665 *cp = '\0'; /* null terminate */
2666 uiop->uio_iov->iov_base += tlen;
2667 uiop->uio_iov->iov_len -= tlen;
2668 uiop->uio_offset += tlen;
2669 uiop->uio_resid -= tlen;
2670 } else
2671 nfsm_adv(nfsm_rndup(len));
2672 if (v3) {
2673 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2674 } else {
2675 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2676 }
2677 if (bigenough) {
2678 cookie.nfsuquad[0] = *tl++;
2679 if (v3)
2680 cookie.nfsuquad[1] = *tl++;
2681 } else if (v3)
2682 tl += 2;
2683 else
2684 tl++;
2685 more_dirs = fxdr_unsigned(int, *tl);
2686 }
2687 /*
2688 * If at end of rpc data, get the eof boolean
2689 */
2690 if (!more_dirs) {
2691 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2692 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2693 }
2694 m_freem(mrep);
2695 }
2696 /*
2697 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2698 * by increasing d_reclen for the last record.
2699 */
2700 if (blksiz > 0) {
2701 left = DIRBLKSIZ - blksiz;
2702 dp->d_reclen += left;
2703 uiop->uio_iov->iov_base += left;
2704 uiop->uio_iov->iov_len -= left;
2705 uiop->uio_offset += left;
2706 uiop->uio_resid -= left;
2707 }
2708
2709 /*
2710 * We are now either at the end of the directory or have filled the
2711 * block.
2712 */
2713 if (bigenough)
2714 dnp->n_direofoffset = uiop->uio_offset;
2715 else {
2716 if (uiop->uio_resid > 0)
2717 printf("EEK! readdirrpc resid > 0\n");
2718 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2719 *cookiep = cookie;
2720 }
2721nfsmout:
2722 return (error);
2723}
2724
2725/*
2726 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2727 */
2728int
2729nfs_readdirplusrpc(vp, uiop, cred)
2730 struct vnode *vp;
2731 register struct uio *uiop;
2732 struct ucred *cred;
2733{
2734 register int len, left;
2735 register struct dirent *dp;
2736 register u_long *tl;
2737 register caddr_t cp;
2738 register long t1, t2;
2739 register struct vnode *newvp;
2740 register nfsuint64 *cookiep;
2741 caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
2742 struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
2743 struct nameidata nami, *ndp = &nami;
2744 struct componentname *cnp = &ndp->ni_cnd;
2745 nfsuint64 cookie;
2746 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2747 struct nfsnode *dnp = VTONFS(vp), *np;
2748 nfsfh_t *fhp;
2749 u_quad_t fileno;
2750 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2751 int attrflag, fhsize;
fa4905b1 2752 u_int64_t xid, savexid;
1c79356b
A
2753
2754#ifndef nolint
2755 dp = (struct dirent *)0;
2756#endif
2757#if DIAGNOSTIC
2758 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2759 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2760 panic("nfs_readdirplusrpc: bad uio");
2761#endif
2762 ndp->ni_dvp = vp;
2763 newvp = NULLVP;
2764
2765 /*
2766 * If there is no cookie, assume directory was stale.
2767 */
2768 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2769 if (cookiep)
2770 cookie = *cookiep;
2771 else
2772 return (NFSERR_BAD_COOKIE);
2773 /*
2774 * Loop around doing readdir rpc's of size nm_readdirsize
2775 * truncated to a multiple of DIRBLKSIZ.
2776 * The stopping criteria is EOF or buffer full.
2777 */
2778 while (more_dirs && bigenough) {
2779 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2780 nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2781 NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2782 nfsm_fhtom(vp, 1);
2783 nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED);
2784 *tl++ = cookie.nfsuquad[0];
2785 *tl++ = cookie.nfsuquad[1];
2786 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2787 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2788 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
2789 *tl = txdr_unsigned(nmp->nm_rsize);
fa4905b1
A
2790 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred,
2791 &xid);
2792 savexid = xid;
2793 nfsm_postop_attr(vp, attrflag, &xid);
1c79356b
A
2794 if (error) {
2795 m_freem(mrep);
2796 goto nfsmout;
2797 }
2798 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2799 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2800 dnp->n_cookieverf.nfsuquad[1] = *tl++;
2801 more_dirs = fxdr_unsigned(int, *tl);
2802
2803 /* loop thru the dir entries, doctoring them to 4bsd form */
2804 while (more_dirs && bigenough) {
2805 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2806 fxdr_hyper(tl, &fileno);
2807 len = fxdr_unsigned(int, *(tl + 2));
2808 if (len <= 0 || len > NFS_MAXNAMLEN) {
2809 error = EBADRPC;
2810 m_freem(mrep);
2811 goto nfsmout;
2812 }
2813 tlen = nfsm_rndup(len);
2814 if (tlen == len)
2815 tlen += 4; /* To ensure null termination*/
2816 left = DIRBLKSIZ - blksiz;
2817 if ((tlen + DIRHDSIZ) > left) {
2818 dp->d_reclen += left;
2819 uiop->uio_iov->iov_base += left;
2820 uiop->uio_iov->iov_len -= left;
2821 uiop->uio_offset += left;
2822 uiop->uio_resid -= left;
2823 blksiz = 0;
2824 }
2825 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2826 bigenough = 0;
2827 if (bigenough) {
2828 dp = (struct dirent *)uiop->uio_iov->iov_base;
2829 dp->d_fileno = (int)fileno;
2830 dp->d_namlen = len;
2831 dp->d_reclen = tlen + DIRHDSIZ;
2832 dp->d_type = DT_UNKNOWN;
2833 blksiz += dp->d_reclen;
2834 if (blksiz == DIRBLKSIZ)
2835 blksiz = 0;
2836 uiop->uio_offset += DIRHDSIZ;
2837 uiop->uio_resid -= DIRHDSIZ;
2838 uiop->uio_iov->iov_base += DIRHDSIZ;
2839 uiop->uio_iov->iov_len -= DIRHDSIZ;
2840 cnp->cn_nameptr = uiop->uio_iov->iov_base;
2841 cnp->cn_namelen = len;
2842 nfsm_mtouio(uiop, len);
2843 cp = uiop->uio_iov->iov_base;
2844 tlen -= len;
2845 *cp = '\0';
2846 uiop->uio_iov->iov_base += tlen;
2847 uiop->uio_iov->iov_len -= tlen;
2848 uiop->uio_offset += tlen;
2849 uiop->uio_resid -= tlen;
2850 } else
2851 nfsm_adv(nfsm_rndup(len));
2852 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2853 if (bigenough) {
2854 cookie.nfsuquad[0] = *tl++;
2855 cookie.nfsuquad[1] = *tl++;
2856 } else
2857 tl += 2;
2858
2859 /*
2860 * Since the attributes are before the file handle
2861 * (sigh), we must skip over the attributes and then
2862 * come back and get them.
2863 */
2864 attrflag = fxdr_unsigned(int, *tl);
2865 if (attrflag) {
2866 dpossav1 = dpos;
2867 mdsav1 = md;
2868 nfsm_adv(NFSX_V3FATTR);
2869 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2870 doit = fxdr_unsigned(int, *tl);
2871 if (doit) {
2872 nfsm_getfh(fhp, fhsize, 1);
2873 if (NFS_CMPFH(dnp, fhp, fhsize)) {
2874 VREF(vp);
2875 newvp = vp;
2876 np = dnp;
2877 } else {
2878 if ((error = nfs_nget(vp->v_mount, fhp,
2879 fhsize, &np)))
2880 doit = 0;
2881 else
2882 newvp = NFSTOV(np);
2883 }
2884 }
2885 if (doit) {
2886 dpossav2 = dpos;
2887 dpos = dpossav1;
2888 mdsav2 = md;
2889 md = mdsav1;
fa4905b1
A
2890 xid = savexid;
2891 nfsm_loadattr(newvp, (struct vattr *)0, &xid);
1c79356b
A
2892 dpos = dpossav2;
2893 md = mdsav2;
2894 dp->d_type =
2895 IFTODT(VTTOIF(np->n_vattr.va_type));
2896 ndp->ni_vp = newvp;
2897 cnp->cn_hash = 0;
2898 for (cp = cnp->cn_nameptr, i = 1; i <= len;
2899 i++, cp++)
2900 cnp->cn_hash += (unsigned char)*cp * i;
2901 if (cnp->cn_namelen <= NCHNAMLEN)
2902 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2903 }
2904 } else {
2905 /* Just skip over the file handle */
2906 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2907 i = fxdr_unsigned(int, *tl);
2908 nfsm_adv(nfsm_rndup(i));
2909 }
2910 if (newvp != NULLVP) {
2911 vrele(newvp);
2912 newvp = NULLVP;
2913 }
2914 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2915 more_dirs = fxdr_unsigned(int, *tl);
2916 }
2917 /*
2918 * If at end of rpc data, get the eof boolean
2919 */
2920 if (!more_dirs) {
2921 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2922 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2923 }
2924 m_freem(mrep);
2925 }
2926 /*
2927 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
2928 * by increasing d_reclen for the last record.
2929 */
2930 if (blksiz > 0) {
2931 left = DIRBLKSIZ - blksiz;
2932 dp->d_reclen += left;
2933 uiop->uio_iov->iov_base += left;
2934 uiop->uio_iov->iov_len -= left;
2935 uiop->uio_offset += left;
2936 uiop->uio_resid -= left;
2937 }
2938
2939 /*
2940 * We are now either at the end of the directory or have filled the
2941 * block.
2942 */
2943 if (bigenough)
2944 dnp->n_direofoffset = uiop->uio_offset;
2945 else {
2946 if (uiop->uio_resid > 0)
2947 printf("EEK! readdirplusrpc resid > 0\n");
2948 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2949 *cookiep = cookie;
2950 }
2951nfsmout:
2952 if (newvp != NULLVP) {
2953 if (newvp == vp)
2954 vrele(newvp);
2955 else
2956 vput(newvp);
2957 newvp = NULLVP;
2958 }
2959 return (error);
2960}
2961
2962/*
2963 * Silly rename. To make the NFS filesystem that is stateless look a little
2964 * more like the "ufs" a remove of an active vnode is translated to a rename
2965 * to a funny looking filename that is removed by nfs_inactive on the
2966 * nfsnode. There is the potential for another process on a different client
2967 * to create the same funny name between the nfs_lookitup() fails and the
2968 * nfs_rename() completes, but...
2969 */
2970static int
2971nfs_sillyrename(dvp, vp, cnp)
2972 struct vnode *dvp, *vp;
2973 struct componentname *cnp;
2974{
2975 register struct sillyrename *sp;
2976 struct nfsnode *np;
2977 int error;
2978 short pid;
2979 struct ucred *cred;
2980
2981 cache_purge(dvp);
2982 np = VTONFS(vp);
2983#if DIAGNOSTIC
2984 if (vp->v_type == VDIR)
2985 panic("nfs_sillyrename: dir");
2986#endif
2987 MALLOC_ZONE(sp, struct sillyrename *,
2988 sizeof (struct sillyrename), M_NFSREQ, M_WAITOK);
2989 sp->s_cred = crdup(cnp->cn_cred);
2990 sp->s_dvp = dvp;
2991 VREF(dvp);
2992
2993 /* Fudge together a funny name */
2994 pid = cnp->cn_proc->p_pid;
2995 sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
2996
2997 /* Try lookitups until we get one that isn't there */
2998 while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2999 cnp->cn_proc, (struct nfsnode **)0) == 0) {
3000 sp->s_name[4]++;
3001 if (sp->s_name[4] > 'z') {
3002 error = EINVAL;
3003 goto bad;
3004 }
3005 }
3006 if ((error = nfs_renameit(dvp, cnp, sp)))
3007 goto bad;
3008 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
3009 cnp->cn_proc, &np);
3010#if DIAGNOSTIC
3011 kprintf("sillyrename: %s, vp=%x, np=%x, dvp=%x\n",
3012 &sp->s_name[0], (unsigned)vp, (unsigned)np, (unsigned)dvp);
3013#endif
3014 np->n_sillyrename = sp;
3015 return (0);
3016bad:
3017 vrele(sp->s_dvp);
3018 cred = sp->s_cred;
3019 sp->s_cred = NOCRED;
3020 crfree(cred);
3021 _FREE_ZONE((caddr_t)sp, sizeof (struct sillyrename), M_NFSREQ);
3022 return (error);
3023}
3024
3025/*
3026 * Look up a file name and optionally either update the file handle or
3027 * allocate an nfsnode, depending on the value of npp.
3028 * npp == NULL --> just do the lookup
3029 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
3030 * handled too
3031 * *npp != NULL --> update the file handle in the vnode
3032 */
3033static int
3034nfs_lookitup(dvp, name, len, cred, procp, npp)
3035 register struct vnode *dvp;
3036 char *name;
3037 int len;
3038 struct ucred *cred;
3039 struct proc *procp;
3040 struct nfsnode **npp;
3041{
3042 register u_long *tl;
3043 register caddr_t cp;
3044 register long t1, t2;
3045 struct vnode *newvp = (struct vnode *)0;
3046 struct nfsnode *np, *dnp = VTONFS(dvp);
3047 caddr_t bpos, dpos, cp2;
3048 int error = 0, fhlen, attrflag;
3049 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
3050 nfsfh_t *nfhp;
3051 int v3 = NFS_ISV3(dvp);
fa4905b1 3052 u_int64_t xid;
1c79356b
A
3053
3054 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
3055 nfsm_reqhead(dvp, NFSPROC_LOOKUP,
3056 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
3057 nfsm_fhtom(dvp, v3);
3058 nfsm_strtom(name, len, NFS_MAXNAMLEN);
fa4905b1 3059 nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred, &xid);
1c79356b
A
3060 if (npp && !error) {
3061 nfsm_getfh(nfhp, fhlen, v3);
3062 if (*npp) {
3063 np = *npp;
3064 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
3065 _FREE_ZONE((caddr_t)np->n_fhp,
3066 np->n_fhsize, M_NFSBIGFH);
3067 np->n_fhp = &np->n_fh;
3068 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
3069 MALLOC_ZONE(np->n_fhp, nfsfh_t *,
3070 fhlen, M_NFSBIGFH, M_WAITOK);
3071 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
3072 np->n_fhsize = fhlen;
3073 newvp = NFSTOV(np);
3074 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
3075 VREF(dvp);
3076 newvp = dvp;
3077 } else {
3078 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
3079 if (error) {
3080 m_freem(mrep);
3081 return (error);
3082 }
3083 newvp = NFSTOV(np);
3084 }
3085 if (v3) {
fa4905b1 3086 nfsm_postop_attr(newvp, attrflag, &xid);
1c79356b
A
3087 if (!attrflag && *npp == NULL) {
3088 m_freem(mrep);
3089 if (newvp == dvp)
3090 vrele(newvp);
3091 else
3092 vput(newvp);
3093 return (ENOENT);
3094 }
3095 } else
fa4905b1 3096 nfsm_loadattr(newvp, (struct vattr *)0, &xid);
1c79356b
A
3097 }
3098 nfsm_reqdone;
3099 if (npp && *npp == NULL) {
3100 if (error) {
3101 if (newvp)
3102 if (newvp == dvp)
3103 vrele(newvp);
3104 else
3105 vput(newvp);
3106 } else
3107 *npp = np;
3108 }
3109 return (error);
3110}
3111
3112/*
3113 * Nfs Version 3 commit rpc
3114 */
3115static int
3116nfs_commit(vp, offset, cnt, cred, procp)
3117 register struct vnode *vp;
3118 u_quad_t offset;
3119 int cnt;
3120 struct ucred *cred;
3121 struct proc *procp;
3122{
3123 register caddr_t cp;
3124 register u_long *tl;
3125 register int t1, t2;
3126 register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3127 caddr_t bpos, dpos, cp2;
3128 int error = 0, wccflag = NFSV3_WCCRATTR;
3129 struct mbuf *mreq, *mrep, *md, *mb, *mb2;
fa4905b1 3130 u_int64_t xid;
1c79356b 3131
fa4905b1 3132 FSDBG(521, vp, offset, cnt, nmp->nm_flag);
1c79356b
A
3133 if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
3134 return (0);
3135 nfsstats.rpccnt[NFSPROC_COMMIT]++;
3136 nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
3137 nfsm_fhtom(vp, 1);
3138 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
3139 txdr_hyper(&offset, tl);
3140 tl += 2;
3141 *tl = txdr_unsigned(cnt);
fa4905b1
A
3142 nfsm_request(vp, NFSPROC_COMMIT, procp, cred, &xid);
3143 nfsm_wcc_data(vp, wccflag, &xid);
1c79356b
A
3144 if (!error) {
3145 nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF);
3146 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
fa4905b1 3147 NFSX_V3WRITEVERF)) {
1c79356b
A
3148 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
3149 NFSX_V3WRITEVERF);
3150 error = NFSERR_STALEWRITEVERF;
3151 }
3152 }
3153 nfsm_reqdone;
3154 return (error);
3155}
3156
3157/*
3158 * Kludge City..
3159 * - make nfs_bmap() essentially a no-op that does no translation
3160 * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
3161 * (Maybe I could use the process's page mapping, but I was concerned that
3162 * Kernel Write might not be enabled and also figured copyout() would do
3163 * a lot more work than bcopy() and also it currently happens in the
3164 * context of the swapper process (2).
3165 */
3166static int
3167nfs_bmap(ap)
3168 struct vop_bmap_args /* {
3169 struct vnode *a_vp;
3170 daddr_t a_bn;
3171 struct vnode **a_vpp;
3172 daddr_t *a_bnp;
3173 int *a_runp;
3174 int *a_runb;
3175 } */ *ap;
3176{
3177 register struct vnode *vp = ap->a_vp;
3178 int devBlockSize = DEV_BSIZE;
3179
3180 if (ap->a_vpp != NULL)
3181 *ap->a_vpp = vp;
3182 if (ap->a_bnp != NULL)
3183 *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize,
3184 devBlockSize);
3185 if (ap->a_runp != NULL)
3186 *ap->a_runp = 0;
3187#ifdef notyet
3188 if (ap->a_runb != NULL)
3189 *ap->a_runb = 0;
3190#endif
3191 return (0);
3192}
3193
3194/*
3195 * Strategy routine.
3196 * For async requests when nfsiod(s) are running, queue the request by
3197 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
3198 * request.
3199 */
3200static int
3201nfs_strategy(ap)
3202 struct vop_strategy_args *ap;
3203{
3204 register struct buf *bp = ap->a_bp;
3205 struct ucred *cr;
3206 struct proc *p;
3207 int error = 0;
3208
3209 if (ISSET(bp->b_flags, B_PHYS))
3210 panic("nfs_strategy: physio");
3211 if (ISSET(bp->b_flags, B_ASYNC))
3212 p = (struct proc *)0;
3213 else
3214 p = current_proc(); /* XXX */
3215 if (ISSET(bp->b_flags, B_READ))
3216 cr = bp->b_rcred;
3217 else
3218 cr = bp->b_wcred;
3219 /*
3220 * If the op is asynchronous and an i/o daemon is waiting
3221 * queue the request, wake it up and wait for completion
3222 * otherwise just do it ourselves.
3223 */
3224 if (!ISSET(bp->b_flags, B_ASYNC) || nfs_asyncio(bp, NOCRED))
3225 error = nfs_doio(bp, cr, p);
3226 return (error);
3227}
3228
3229/*
3230 * Mmap a file
3231 *
3232 * NB Currently unsupported.
3233 */
3234/* ARGSUSED */
3235static int
3236nfs_mmap(ap)
3237 struct vop_mmap_args /* {
3238 struct vnode *a_vp;
3239 int a_fflags;
3240 struct ucred *a_cred;
3241 struct proc *a_p;
3242 } */ *ap;
3243{
3244
3245 return (EINVAL);
3246}
3247
3248/*
3249 * fsync vnode op. Just call nfs_flush() with commit == 1.
3250 */
3251/* ARGSUSED */
3252static int
3253nfs_fsync(ap)
3254 struct vop_fsync_args /* {
3255 struct vnodeop_desc *a_desc;
3256 struct vnode * a_vp;
3257 struct ucred * a_cred;
3258 int a_waitfor;
3259 struct proc * a_p;
3260 } */ *ap;
3261{
1c79356b
A
3262 return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
3263}
3264
3265/*
3266 * Flush all the blocks associated with a vnode.
3267 * Walk through the buffer pool and push any dirty pages
3268 * associated with the vnode.
3269 */
3270static int
3271nfs_flush(vp, cred, waitfor, p, commit)
3272 register struct vnode *vp;
3273 struct ucred *cred;
3274 int waitfor;
3275 struct proc *p;
3276 int commit;
3277{
3278 register struct nfsnode *np = VTONFS(vp);
3279 register struct buf *bp;
3280 register int i;
3281 struct buf *nbp;
3282 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3283 int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos, err;
3284 int passone = 1;
3285 u_quad_t off, endoff, toff;
3286 struct ucred* wcred = NULL;
3287 struct buf **bvec = NULL;
1c79356b
A
3288#ifndef NFS_COMMITBVECSIZ
3289#define NFS_COMMITBVECSIZ 20
3290#endif
3291 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
fa4905b1
A
3292 int bvecsize = 0, bveccount;
3293 kern_return_t kret;
3294 upl_t upl;
3295
3296 FSDBG_TOP(517, vp, np, waitfor, commit);
1c79356b
A
3297
3298 if (nmp->nm_flag & NFSMNT_INT)
3299 slpflag = PCATCH;
3300 if (!commit)
3301 passone = 0;
3302
3303 /*
3304 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
3305 * server, but nas not been committed to stable storage on the server
3306 * yet. On the first pass, the byte range is worked out and the commit
3307 * rpc is done. On the second pass, nfs_writebp() is called to do the
3308 * job.
3309 */
3310again:
fa4905b1 3311 FSDBG(518, vp->v_dirtyblkhd.lh_first, np->n_flag, 0, 0);
1c79356b
A
3312 if (vp->v_dirtyblkhd.lh_first)
3313 np->n_flag |= NMODIFIED;
3314 off = (u_quad_t)-1;
3315 endoff = 0;
3316 bvecpos = 0;
1c79356b
A
3317 if (NFS_ISV3(vp) && commit) {
3318 s = splbio();
3319 /*
3320 * Count up how many buffers waiting for a commit.
fa4905b1
A
3321 * This is an upper bound - any with dirty pages must be
3322 * written not commited.
1c79356b
A
3323 */
3324 bveccount = 0;
3325 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
3326 nbp = bp->b_vnbufs.le_next;
3327 if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
3328 == (B_DELWRI | B_NEEDCOMMIT))
3329 bveccount++;
fa4905b1 3330 FSDBG(519, bp, bp->b_flags, bveccount, 0);
1c79356b
A
3331 }
3332 /*
3333 * Allocate space to remember the list of bufs to commit. It is
fa4905b1 3334 * important to use M_NOWAIT here to avoid a race with nfs_write
1c79356b
A
3335 * If we can't get memory (for whatever reason), we will end up
3336 * committing the buffers one-by-one in the loop below.
3337 */
0b4e3aa0 3338 if (bvec != NULL && bvec != bvec_on_stack)
fa4905b1 3339 _FREE(bvec, M_TEMP);
0b4e3aa0 3340 if (bveccount > NFS_COMMITBVECSIZ) {
1c79356b 3341 MALLOC(bvec, struct buf **,
fa4905b1
A
3342 bveccount * sizeof(struct buf *), M_TEMP,
3343 M_NOWAIT);
3344 if (bvec == NULL) {
1c79356b 3345 bvec = bvec_on_stack;
fa4905b1 3346 bvecsize = NFS_COMMITBVECSIZ;
1c79356b
A
3347 } else
3348 bvecsize = bveccount;
1c79356b 3349 } else {
1c79356b 3350 bvec = bvec_on_stack;
fa4905b1 3351 bvecsize = NFS_COMMITBVECSIZ;
1c79356b 3352 }
fa4905b1 3353 FSDBG(519, 0, bvecsize, bveccount, 0);
1c79356b
A
3354
3355 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
3356 nbp = bp->b_vnbufs.le_next;
9bccf70c 3357
fa4905b1
A
3358 FSDBG(520, bp, bp->b_flags, bvecpos, bp->b_bufsize);
3359 FSDBG(520, bp->b_validoff, bp->b_validend,
3360 bp->b_dirtyoff, bp->b_dirtyend);
1c79356b
A
3361 if (bvecpos >= bvecsize)
3362 break;
3363 if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
3364 != (B_DELWRI | B_NEEDCOMMIT))
3365 continue;
9bccf70c
A
3366
3367 bremfree(bp);
fa4905b1
A
3368 SET(bp->b_flags, B_BUSY);
3369 /*
3370 * we need a upl to see if the page has been
3371 * dirtied (think mmap) since the unstable write, and
3372 * so to prevent vm from paging during our commit rpc
3373 */
3374 if (ISSET(bp->b_flags, B_PAGELIST)) {
3375 upl = bp->b_pagelist;
3376 } else {
3377 kret = ubc_create_upl(vp, ubc_blktooff(vp, bp->b_lblkno),
3378 bp->b_bufsize, &upl,
3379 NULL, UPL_PRECIOUS);
3380 if (kret != KERN_SUCCESS)
3381 panic("nfs_flush: create upl %d", kret);
3382#ifdef UBC_DEBUG
3383 upl_ubc_alias_set(upl, current_act(), 1);
3384#endif /* UBC_DEBUG */
3385 }
3386 if (upl_dirty_page(ubc_upl_pageinfo(upl), 0)) {
3387 if (!ISSET(bp->b_flags, B_PAGELIST)) {
3388 err = ubc_upl_abort(upl, NULL);
3389 if (err)
3390 printf("nfs_flush: upl abort %d\n", err);
3391 }
3392 /*
3393 * Any/all of it may be modified...
3394 */
3395 bp->b_dirtyoff = bp->b_validoff;
3396 bp->b_dirtyend = bp->b_validend;
9bccf70c
A
3397 CLR(bp->b_flags, B_NEEDCOMMIT);
3398 /* blocking calls were made, re-evaluate nbp */
3399 nbp = bp->b_vnbufs.le_next;
3400 brelse(bp); /* XXX may block. Is using nbp ok??? */
fa4905b1
A
3401 continue;
3402 }
3403 if (!ISSET(bp->b_flags, B_PAGELIST)) {
3404 bp->b_pagelist = upl;
3405 SET(bp->b_flags, B_PAGELIST);
3406 ubc_upl_map(upl, (vm_address_t *)&bp->b_data);
3407 }
9bccf70c
A
3408
3409 /* blocking calls were made, re-evaluate nbp */
3410 nbp = bp->b_vnbufs.le_next;
3411
1c79356b
A
3412 /*
3413 * Work out if all buffers are using the same cred
3414 * so we can deal with them all with one commit.
3415 */
3416 if (wcred == NULL)
3417 wcred = bp->b_wcred;
3418 else if (wcred != bp->b_wcred)
3419 wcred = NOCRED;
fa4905b1 3420 SET(bp->b_flags, B_WRITEINPROG);
1c79356b
A
3421
3422 /*
3423 * A list of these buffers is kept so that the
3424 * second loop knows which buffers have actually
3425 * been committed. This is necessary, since there
3426 * may be a race between the commit rpc and new
3427 * uncommitted writes on the file.
3428 */
3429 bvec[bvecpos++] = bp;
3430 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3431 bp->b_dirtyoff;
3432 if (toff < off)
3433 off = toff;
3434 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
3435 if (toff > endoff)
3436 endoff = toff;
3437 }
3438 splx(s);
3439 }
3440 if (bvecpos > 0) {
3441 /*
3442 * Commit data on the server, as required.
3443 * If all bufs are using the same wcred, then use that with
3444 * one call for all of them, otherwise commit each one
3445 * separately.
3446 */
3447 if (wcred != NOCRED)
3448 retv = nfs_commit(vp, off, (int)(endoff - off),
3449 wcred, p);
3450 else {
3451 retv = 0;
3452 for (i = 0; i < bvecpos; i++) {
3453 off_t off, size;
3454 bp = bvec[i];
fa4905b1
A
3455 FSDBG(522, bp, bp->b_blkno * DEV_BSIZE,
3456 bp->b_dirtyoff, bp->b_dirtyend);
1c79356b
A
3457 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3458 bp->b_dirtyoff;
3459 size = (u_quad_t)(bp->b_dirtyend
3460 - bp->b_dirtyoff);
3461 retv = nfs_commit(vp, off, (int)size,
3462 bp->b_wcred, p);
3463 if (retv) break;
3464 }
3465 }
3466
3467 if (retv == NFSERR_STALEWRITEVERF)
3468 nfs_clearcommit(vp->v_mount);
1c79356b
A
3469
3470 /*
3471 * Now, either mark the blocks I/O done or mark the
3472 * blocks dirty, depending on whether the commit
3473 * succeeded.
3474 */
3475 for (i = 0; i < bvecpos; i++) {
1c79356b 3476 bp = bvec[i];
fa4905b1 3477 FSDBG(523, bp, retv, bp->b_flags, 0);
1c79356b
A
3478 CLR(bp->b_flags, (B_NEEDCOMMIT | B_WRITEINPROG));
3479 if (retv) {
fa4905b1 3480 brelse(bp);
1c79356b 3481 } else {
9bccf70c
A
3482 int oldflags = bp->b_flags;
3483
fa4905b1
A
3484 s = splbio();
3485 vp->v_numoutput++;
3486 SET(bp->b_flags, B_ASYNC);
3487 CLR(bp->b_flags,
3488 (B_READ|B_DONE|B_ERROR|B_DELWRI));
9bccf70c
A
3489 if (ISSET(oldflags, B_DELWRI)) {
3490 extern int nbdwrite;
3491 nbdwrite--;
3492 wakeup((caddr_t)&nbdwrite);
3493 }
fa4905b1
A
3494 bp->b_dirtyoff = bp->b_dirtyend = 0;
3495 reassignbuf(bp, vp);
3496 splx(s);
3497 biodone(bp);
1c79356b
A
3498 }
3499 }
3500
3501 }
1c79356b 3502 /*
fa4905b1
A
3503 * Start/do any write(s) that are required. There is a window here
3504 * where B_BUSY protects the buffer. The vm pages have been freed up,
3505 * yet B_BUSY is set. Don't think you will hit any busy/incore problems
3506 * while we sleep, but not absolutely sure. Keep an eye on it. Otherwise
3507 * we will have to hold vm page across this locked. - EKN
1c79356b
A
3508 */
3509loop:
3510 if (current_thread_aborted()) {
3511 error = EINTR;
3512 goto done;
3513 }
3514 s = splbio();
3515 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
3516 nbp = bp->b_vnbufs.le_next;
3517 if (ISSET(bp->b_flags, B_BUSY)) {
fa4905b1 3518 FSDBG(524, bp, waitfor, passone, bp->b_flags);
1c79356b
A
3519 if (waitfor != MNT_WAIT || passone)
3520 continue;
3521 SET(bp->b_flags, B_WANTED);
3522 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
fa4905b1 3523 "nfsfsync", slptimeo);
1c79356b
A
3524 splx(s);
3525 if (error) {
fa4905b1
A
3526 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
3527 error = EINTR;
3528 goto done;
3529 }
3530 if (slpflag == PCATCH) {
3531 slpflag = 0;
3532 slptimeo = 2 * hz;
3533 }
1c79356b
A
3534 }
3535 goto loop;
3536 }
3537 if (!ISSET(bp->b_flags, B_DELWRI))
3538 panic("nfs_fsync: not dirty");
fa4905b1 3539 FSDBG(525, bp, passone, commit, bp->b_flags);
1c79356b
A
3540 if ((passone || !commit) && ISSET(bp->b_flags, B_NEEDCOMMIT))
3541 continue;
3542 bremfree(bp);
3543 if (passone || !commit)
fa4905b1 3544 SET(bp->b_flags, B_BUSY|B_ASYNC);
1c79356b 3545 else
fa4905b1
A
3546 SET(bp->b_flags,
3547 B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
1c79356b
A
3548 splx(s);
3549 VOP_BWRITE(bp);
3550 goto loop;
3551 }
3552 splx(s);
3553 if (passone) {
3554 passone = 0;
3555 goto again;
3556 }
3557 if (waitfor == MNT_WAIT) {
3558 while (vp->v_numoutput) {
3559 vp->v_flag |= VBWAIT;
3560 error = tsleep((caddr_t)&vp->v_numoutput,
3561 slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
3562 if (error) {
3563 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
3564 error = EINTR;
3565 goto done;
3566 }
3567 if (slpflag == PCATCH) {
3568 slpflag = 0;
3569 slptimeo = 2 * hz;
3570 }
3571 }
3572 }
3573 if (vp->v_dirtyblkhd.lh_first && commit) {
3574 goto loop;
3575 }
3576 }
fa4905b1 3577 FSDBG(526, np->n_flag, np->n_error, 0, 0);
1c79356b
A
3578 if (np->n_flag & NWRITEERR) {
3579 error = np->n_error;
3580 np->n_flag &= ~NWRITEERR;
3581 }
3582done:
fa4905b1 3583 FSDBG_BOT(517, vp, np, error, 0);
1c79356b
A
3584 if (bvec != NULL && bvec != bvec_on_stack)
3585 _FREE(bvec, M_TEMP);
1c79356b
A
3586 return (error);
3587}
3588
3589/*
3590 * Return POSIX pathconf information applicable to nfs.
3591 *
3592 * The NFS V2 protocol doesn't support this, so just return EINVAL
3593 * for V2.
3594 */
3595/* ARGSUSED */
3596static int
3597nfs_pathconf(ap)
3598 struct vop_pathconf_args /* {
3599 struct vnode *a_vp;
3600 int a_name;
3601 int *a_retval;
3602 } */ *ap;
3603{
3604
3605 return (EINVAL);
3606}
3607
3608/*
3609 * NFS advisory byte-level locks.
3610 * Currently unsupported.
3611 */
3612static int
3613nfs_advlock(ap)
3614 struct vop_advlock_args /* {
3615 struct vnode *a_vp;
3616 caddr_t a_id;
3617 int a_op;
3618 struct flock *a_fl;
3619 int a_flags;
3620 } */ *ap;
3621{
3622#ifdef __FreeBSD__
3623 register struct nfsnode *np = VTONFS(ap->a_vp);
3624
3625 /*
3626 * The following kludge is to allow diskless support to work
3627 * until a real NFS lockd is implemented. Basically, just pretend
3628 * that this is a local lock.
3629 */
3630 return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3631#else
3632#if DIAGNOSTIC
3633 printf("nfs_advlock: pid %d comm %s\n", current_proc()->p_pid, current_proc()->p_comm);
3634#endif
3635 return (EOPNOTSUPP);
3636#endif
3637}
3638
3639/*
3640 * Print out the contents of an nfsnode.
3641 */
3642static int
3643nfs_print(ap)
3644 struct vop_print_args /* {
3645 struct vnode *a_vp;
3646 } */ *ap;
3647{
3648 register struct vnode *vp = ap->a_vp;
3649 register struct nfsnode *np = VTONFS(vp);
3650
3651 printf("tag VT_NFS, fileid %ld fsid 0x%lx",
3652 np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3653 if (vp->v_type == VFIFO)
3654 fifo_printinfo(vp);
3655 printf("\n");
3656 return (0);
3657}
3658
3659/*
3660 * NFS directory offset lookup.
3661 * Currently unsupported.
3662 */
3663static int
3664nfs_blkatoff(ap)
3665 struct vop_blkatoff_args /* {
3666 struct vnode *a_vp;
3667 off_t a_offset;
3668 char **a_res;
3669 struct buf **a_bpp;
3670 } */ *ap;
3671{
3672
3673#if DIAGNOSTIC
3674 printf("nfs_blkatoff: unimplemented!!");
3675#endif
3676 return (EOPNOTSUPP);
3677}
3678
3679/*
3680 * NFS flat namespace allocation.
3681 * Currently unsupported.
3682 */
3683static int
3684nfs_valloc(ap)
3685 struct vop_valloc_args /* {
3686 struct vnode *a_pvp;
3687 int a_mode;
3688 struct ucred *a_cred;
3689 struct vnode **a_vpp;
3690 } */ *ap;
3691{
3692
3693 return (EOPNOTSUPP);
3694}
3695
3696/*
3697 * NFS flat namespace free.
3698 * Currently unsupported.
3699 */
3700static int
3701nfs_vfree(ap)
3702 struct vop_vfree_args /* {
3703 struct vnode *a_pvp;
3704 ino_t a_ino;
3705 int a_mode;
3706 } */ *ap;
3707{
3708
3709#if DIAGNOSTIC
3710 printf("nfs_vfree: unimplemented!!");
3711#endif
3712 return (EOPNOTSUPP);
3713}
3714
3715/*
3716 * NFS file truncation.
3717 */
3718static int
3719nfs_truncate(ap)
3720 struct vop_truncate_args /* {
3721 struct vnode *a_vp;
3722 off_t a_length;
3723 int a_flags;
3724 struct ucred *a_cred;
3725 struct proc *a_p;
3726 } */ *ap;
3727{
3728
3729 /* Use nfs_setattr */
3730#if DIAGNOSTIC
3731 printf("nfs_truncate: unimplemented!!");
3732#endif
3733 return (EOPNOTSUPP);
3734}
3735
3736/*
3737 * NFS update.
3738 */
3739static int
3740nfs_update(ap)
3741 struct vop_update_args /* {
3742 struct vnode *a_vp;
3743 struct timeval *a_ta;
3744 struct timeval *a_tm;
3745 int a_waitfor;
3746 } */ *ap;
3747{
3748
3749 /* Use nfs_setattr */
3750#if DIAGNOSTIC
3751 printf("nfs_update: unimplemented!!");
3752#endif
3753 return (EOPNOTSUPP);
3754}
3755
3756int nfs_aio_threads = 0; /* 1 per nfd (arbitrary) */
3757struct slock nfs_aio_slock;
3758TAILQ_HEAD(bqueues, buf) nfs_aio_bufq;
3759int nfs_aio_bufq_len = 0; /* diagnostic only */
3760
3761void
3762nfs_aio_thread()
3763{ /* see comment below in nfs_bwrite() for some rationale */
3764 struct buf *bp;
3765 boolean_t funnel_state;
3766
3767 funnel_state = thread_funnel_set(kernel_flock, TRUE);
3768 for(;;) {
3769 simple_lock(&nfs_aio_slock);
3770 if ((bp = nfs_aio_bufq.tqh_first)) {
3771 TAILQ_REMOVE(&nfs_aio_bufq, bp, b_freelist);
3772 nfs_aio_bufq_len--;
3773 simple_unlock(&nfs_aio_slock);
3774 nfs_writebp(bp, 1);
3775 } else { /* nothing to do - goodnight */
3776 assert_wait(&nfs_aio_bufq, THREAD_UNINT);
3777 simple_unlock(&nfs_aio_slock);
3778 (void)tsleep((caddr_t)0, PRIBIO+1, "nfs_aio_bufq", 0);
3779 }
3780 }
3781 (void) thread_funnel_set(kernel_flock, FALSE);
3782}
3783
3784
3785void
3786nfs_aio_thread_init()
3787{
3788 if (nfs_aio_threads++ == 0) {
3789 simple_lock_init(&nfs_aio_slock);
3790 TAILQ_INIT(&nfs_aio_bufq);
3791 }
3792 kernel_thread(kernel_task, nfs_aio_thread);
3793}
3794
3795
3796/*
3797 * Just call nfs_writebp() with the force argument set to 1.
3798 */
3799static int
3800nfs_bwrite(ap)
3801 struct vop_bwrite_args /* {
3802 struct vnode *a_bp;
3803 } */ *ap;
3804{
3805 extern void wakeup_one(caddr_t chan);
3806
3807 /*
3808 * nfs_writebp will issue a synchronous rpc to if B_ASYNC then
3809 * to avoid distributed deadlocks we handoff the write to the
3810 * nfs_aio threads. Doing so allows us to complete the
3811 * current request, rather than blocking on a server which may
3812 * be ourself (or blocked on ourself).
3813 *
3814 * Note the loopback deadlocks happened when the thread
3815 * invoking us was nfsd, and also when it was the pagedaemon.
3816 *
3817 * This solution has one known problem. If *ALL* buffers get
3818 * on the nfs_aio queue then no forward progress can be made
3819 * until one of those writes complete. And if the current
3820 * nfs_aio writes-in-progress block due to a non-responsive server we
3821 * are in a deadlock circle. Probably the cure is to limit the
3822 * async write concurrency in getnewbuf as in FreeBSD 3.2.
3823 */
3824 if (nfs_aio_threads && ISSET(ap->a_bp->b_flags, B_ASYNC)) {
3825 simple_lock(&nfs_aio_slock);
3826 nfs_aio_bufq_len++;
3827 TAILQ_INSERT_TAIL(&nfs_aio_bufq, ap->a_bp, b_freelist);
3828 simple_unlock(&nfs_aio_slock);
3829 wakeup_one((caddr_t)&nfs_aio_bufq);
3830 return (0);
3831 }
3832 return (nfs_writebp(ap->a_bp, 1));
3833}
3834
3835/*
3836 * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
3837 * the force flag is one and it also handles the B_NEEDCOMMIT flag.
3838 */
3839int
3840nfs_writebp(bp, force)
3841 register struct buf *bp;
3842 int force;
3843{
3844 int s;
3845 register int oldflags = bp->b_flags, retv = 1;
3846 off_t off;
3847 upl_t upl;
1c79356b
A
3848 kern_return_t kret;
3849 struct vnode *vp = bp->b_vp;
3850 upl_page_info_t *pl;
3851
3852 if(!ISSET(bp->b_flags, B_BUSY))
3853 panic("nfs_writebp: buffer is not busy???");
3854
3855 s = splbio();
3856 CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI));
d52fe63f
A
3857 if (ISSET(oldflags, B_DELWRI)) {
3858 extern int nbdwrite;
3859 nbdwrite--;
9bccf70c 3860 wakeup((caddr_t)&nbdwrite);
d52fe63f 3861 }
1c79356b
A
3862
3863 if (ISSET(oldflags, (B_ASYNC|B_DELWRI))) {
3864 reassignbuf(bp, vp);
3865 }
3866
3867 vp->v_numoutput++;
3868 current_proc()->p_stats->p_ru.ru_oublock++;
3869 splx(s);
3870
3871 /*
fa4905b1
A
3872 * Since the B_BUSY flag is set, we need to lock the page before doing
3873 * nfs_commit. Otherwise we may block and get a busy incore pages
3874 * during a vm pageout. Move the existing code up before the commit.
1c79356b 3875 */
fa4905b1
A
3876 if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp) &&
3877 !ISSET(bp->b_flags, B_PAGELIST)) {
3878 kret = ubc_create_upl(vp, ubc_blktooff(vp, bp->b_lblkno),
3879 bp->b_bufsize, &upl, &pl, UPL_PRECIOUS);
3880 if (kret != KERN_SUCCESS)
3881 panic("nfs_writebp: ubc_create_upl %d", kret);
1c79356b 3882#ifdef UBC_DEBUG
fa4905b1 3883 upl_ubc_alias_set(upl, current_act(), 2);
1c79356b 3884#endif /* UBC_DEBUG */
fa4905b1
A
3885 s = splbio();
3886 bp->b_pagelist = upl;
3887 SET(bp->b_flags, B_PAGELIST);
3888 splx(s);
1c79356b 3889
fa4905b1
A
3890 kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data));
3891 if (kret != KERN_SUCCESS)
3892 panic("nfs_writebp: ubc_upl_map %d", kret);
3893 if(bp->b_data == 0)
3894 panic("nfs_writebp: ubc_upl_map mapped 0");
3895 if (!upl_page_present(pl, 0)) /* even more paranoia */
3896 panic("nfs_writebp: nopage");
0b4e3aa0 3897 }
1c79356b
A
3898
3899 /*
3900 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
3901 * an actual write will have to be scheduled via. VOP_STRATEGY().
3902 * If B_WRITEINPROG is already set, then push it with a write anyhow.
3903 */
3904 if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
3905 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
3906 SET(bp->b_flags, B_WRITEINPROG);
3907 retv = nfs_commit(vp, off, bp->b_dirtyend-bp->b_dirtyoff,
3908 bp->b_wcred, bp->b_proc);
3909 CLR(bp->b_flags, B_WRITEINPROG);
3910 if (!retv) {
3911 bp->b_dirtyoff = bp->b_dirtyend = 0;
3912 CLR(bp->b_flags, B_NEEDCOMMIT);
3913 biodone(bp); /* on B_ASYNC will brelse the buffer */
3914
3915 } else if (retv == NFSERR_STALEWRITEVERF)
3916 nfs_clearcommit(vp->v_mount);
3917 }
3918 if (retv) {
3919 if (force)
3920 SET(bp->b_flags, B_WRITEINPROG);
0b4e3aa0 3921 VOP_STRATEGY(bp);
1c79356b
A
3922 }
3923
3924 if( (oldflags & B_ASYNC) == 0) {
3925 int rtval = biowait(bp);
3926
3927 if (oldflags & B_DELWRI) {
3928 s = splbio();
3929 reassignbuf(bp, vp);
3930 splx(s);
3931 }
3932 brelse(bp);
3933 return (rtval);
3934 }
3935
3936 return (0);
3937}
3938
3939/*
3940 * nfs special file access vnode op.
3941 * Essentially just get vattr and then imitate iaccess() since the device is
3942 * local to the client.
3943 */
3944static int
3945nfsspec_access(ap)
3946 struct vop_access_args /* {
3947 struct vnode *a_vp;
3948 int a_mode;
3949 struct ucred *a_cred;
3950 struct proc *a_p;
3951 } */ *ap;
3952{
3953 register struct vattr *vap;
3954 register gid_t *gp;
3955 register struct ucred *cred = ap->a_cred;
3956 struct vnode *vp = ap->a_vp;
3957 mode_t mode = ap->a_mode;
3958 struct vattr vattr;
3959 register int i;
3960 int error;
3961
3962 /*
3963 * Disallow write attempts on filesystems mounted read-only;
3964 * unless the file is a socket, fifo, or a block or character
3965 * device resident on the filesystem.
3966 */
3967 if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3968 switch (vp->v_type) {
3969 case VREG: case VDIR: case VLNK:
3970 return (EROFS);
3971 }
3972 }
3973 /*
3974 * If you're the super-user,
3975 * you always get access.
3976 */
3977 if (cred->cr_uid == 0)
3978 return (0);
3979 vap = &vattr;
3980 error = VOP_GETATTR(vp, vap, cred, ap->a_p);
3981 if (error)
3982 return (error);
3983 /*
3984 * Access check is based on only one of owner, group, public.
3985 * If not owner, then check group. If not a member of the
3986 * group, then check public access.
3987 */
3988 if (cred->cr_uid != vap->va_uid) {
3989 mode >>= 3;
3990 gp = cred->cr_groups;
3991 for (i = 0; i < cred->cr_ngroups; i++, gp++)
3992 if (vap->va_gid == *gp)
3993 goto found;
3994 mode >>= 3;
3995found:
3996 ;
3997 }
3998 error = (vap->va_mode & mode) == mode ? 0 : EACCES;
3999 return (error);
4000}
4001
4002/*
4003 * Read wrapper for special devices.
4004 */
4005static int
4006nfsspec_read(ap)
4007 struct vop_read_args /* {
4008 struct vnode *a_vp;
4009 struct uio *a_uio;
4010 int a_ioflag;
4011 struct ucred *a_cred;
4012 } */ *ap;
4013{
4014 register struct nfsnode *np = VTONFS(ap->a_vp);
4015
4016 /*
4017 * Set access flag.
4018 */
4019 np->n_flag |= NACC;
4020 np->n_atim.tv_sec = time.tv_sec;
4021 np->n_atim.tv_nsec = time.tv_usec * 1000;
4022 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
4023}
4024
4025/*
4026 * Write wrapper for special devices.
4027 */
4028static int
4029nfsspec_write(ap)
4030 struct vop_write_args /* {
4031 struct vnode *a_vp;
4032 struct uio *a_uio;
4033 int a_ioflag;
4034 struct ucred *a_cred;
4035 } */ *ap;
4036{
4037 register struct nfsnode *np = VTONFS(ap->a_vp);
4038
4039 /*
4040 * Set update flag.
4041 */
4042 np->n_flag |= NUPD;
4043 np->n_mtim.tv_sec = time.tv_sec;
4044 np->n_mtim.tv_nsec = time.tv_usec * 1000;
4045 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
4046}
4047
4048/*
4049 * Close wrapper for special devices.
4050 *
4051 * Update the times on the nfsnode then do device close.
4052 */
4053static int
4054nfsspec_close(ap)
4055 struct vop_close_args /* {
4056 struct vnode *a_vp;
4057 int a_fflag;
4058 struct ucred *a_cred;
4059 struct proc *a_p;
4060 } */ *ap;
4061{
4062 register struct vnode *vp = ap->a_vp;
4063 register struct nfsnode *np = VTONFS(vp);
4064 struct vattr vattr;
4065
4066 if (np->n_flag & (NACC | NUPD)) {
4067 np->n_flag |= NCHG;
4068 if (vp->v_usecount == 1 &&
4069 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
4070 VATTR_NULL(&vattr);
4071 if (np->n_flag & NACC)
4072 vattr.va_atime = np->n_atim;
4073 if (np->n_flag & NUPD)
4074 vattr.va_mtime = np->n_mtim;
4075 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
4076 }
4077 }
4078 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
4079}
4080
4081/*
4082 * Read wrapper for fifos.
4083 */
4084static int
4085nfsfifo_read(ap)
4086 struct vop_read_args /* {
4087 struct vnode *a_vp;
4088 struct uio *a_uio;
4089 int a_ioflag;
4090 struct ucred *a_cred;
4091 } */ *ap;
4092{
4093 extern vop_t **fifo_vnodeop_p;
4094 register struct nfsnode *np = VTONFS(ap->a_vp);
4095
4096 /*
4097 * Set access flag.
4098 */
4099 np->n_flag |= NACC;
4100 np->n_atim.tv_sec = time.tv_sec;
4101 np->n_atim.tv_nsec = time.tv_usec * 1000;
4102 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
4103}
4104
4105/*
4106 * Write wrapper for fifos.
4107 */
4108static int
4109nfsfifo_write(ap)
4110 struct vop_write_args /* {
4111 struct vnode *a_vp;
4112 struct uio *a_uio;
4113 int a_ioflag;
4114 struct ucred *a_cred;
4115 } */ *ap;
4116{
4117 extern vop_t **fifo_vnodeop_p;
4118 register struct nfsnode *np = VTONFS(ap->a_vp);
4119
4120 /*
4121 * Set update flag.
4122 */
4123 np->n_flag |= NUPD;
4124 np->n_mtim.tv_sec = time.tv_sec;
4125 np->n_mtim.tv_nsec = time.tv_usec * 1000;
4126 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
4127}
4128
4129/*
4130 * Close wrapper for fifos.
4131 *
4132 * Update the times on the nfsnode then do fifo close.
4133 */
4134static int
4135nfsfifo_close(ap)
4136 struct vop_close_args /* {
4137 struct vnode *a_vp;
4138 int a_fflag;
4139 struct ucred *a_cred;
4140 struct proc *a_p;
4141 } */ *ap;
4142{
4143 register struct vnode *vp = ap->a_vp;
4144 register struct nfsnode *np = VTONFS(vp);
4145 struct vattr vattr;
4146 extern vop_t **fifo_vnodeop_p;
4147
4148 if (np->n_flag & (NACC | NUPD)) {
4149 if (np->n_flag & NACC) {
4150 np->n_atim.tv_sec = time.tv_sec;
4151 np->n_atim.tv_nsec = time.tv_usec * 1000;
4152 }
4153 if (np->n_flag & NUPD) {
4154 np->n_mtim.tv_sec = time.tv_sec;
4155 np->n_mtim.tv_nsec = time.tv_usec * 1000;
4156 }
4157 np->n_flag |= NCHG;
4158 if (vp->v_usecount == 1 &&
4159 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
4160 VATTR_NULL(&vattr);
4161 if (np->n_flag & NACC)
4162 vattr.va_atime = np->n_atim;
4163 if (np->n_flag & NUPD)
4164 vattr.va_mtime = np->n_mtim;
4165 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
4166 }
4167 }
4168 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
4169}
4170
4171static int
4172nfs_ioctl(ap)
4173 struct vop_ioctl_args *ap;
4174{
4175
4176 /*
4177 * XXX we were once bogusly enoictl() which returned this (ENOTTY).
4178 * Probably we should return ENODEV.
4179 */
4180 return (ENOTTY);
4181}
4182
4183static int
4184nfs_select(ap)
4185 struct vop_select_args *ap;
4186{
4187
4188 /*
4189 * We were once bogusly seltrue() which returns 1. Is this right?
4190 */
4191 return (1);
4192}
4193
4194/* XXX Eliminate use of struct bp here */
4195/*
4196 * Vnode op for pagein using getblk_pages
4197 * derived from nfs_bioread()
4198 * No read aheads are started from pagein operation
4199 */
4200static int
4201nfs_pagein(ap)
4202 struct vop_pagein_args /* {
4203 struct vnode *a_vp,
4204 upl_t a_pl,
4205 vm_offset_t a_pl_offset,
4206 off_t a_f_offset,
4207 size_t a_size,
4208 struct ucred *a_cred,
4209 int a_flags
4210 } */ *ap;
4211{
4212 register struct vnode *vp = ap->a_vp;
4213 upl_t pl = ap->a_pl;
4214 size_t size= ap->a_size;
4215 off_t f_offset = ap->a_f_offset;
4216 vm_offset_t pl_offset = ap->a_pl_offset;
4217 int flags = ap->a_flags;
4218 struct ucred *cred;
4219 register struct nfsnode *np = VTONFS(vp);
4220 register int biosize;
9bccf70c 4221 register int iosize;
1c79356b
A
4222 register int xsize;
4223 struct vattr vattr;
4224 struct proc *p = current_proc();
4225 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4226 int error = 0;
4227 vm_offset_t ioaddr;
4228 struct uio auio;
4229 struct iovec aiov;
4230 struct uio * uio = &auio;
fa4905b1 4231 int nofreeupl = flags & UPL_NOCOMMIT;
1c79356b 4232
fa4905b1
A
4233 FSDBG(322, f_offset, size, pl, pl_offset);
4234 if (pl == (upl_t)NULL)
4235 panic("nfs_pagein: no upl");
1c79356b
A
4236
4237 if (UBCINVALID(vp)) {
fa4905b1
A
4238 printf("nfs_pagein: invalid vnode 0x%x", (int)vp);
4239 if (!nofreeupl)
4240 (void) ubc_upl_abort(pl, NULL);
1c79356b
A
4241 return (EPERM);
4242 }
1c79356b 4243 UBCINFOCHECK("nfs_pagein", vp);
1c79356b 4244
fa4905b1
A
4245 if (size <= 0) {
4246 printf("nfs_pagein: invalid size %d", size);
4247 if (!nofreeupl)
4248 (void) ubc_upl_abort(pl, NULL);
1c79356b 4249 return (EINVAL);
fa4905b1
A
4250 }
4251 if (f_offset < 0 || f_offset >= np->n_size ||
4252 (f_offset & PAGE_MASK_64)) {
4253 if (!nofreeupl)
0b4e3aa0 4254 ubc_upl_abort_range(pl, pl_offset, size,
1c79356b
A
4255 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
4256 return (EINVAL);
4257 }
fa4905b1
A
4258 cred = ubc_getcred(vp);
4259 if (cred == NOCRED)
4260 cred = ap->a_cred;
1c79356b 4261
1c79356b
A
4262 auio.uio_offset = f_offset;
4263 auio.uio_segflg = UIO_SYSSPACE;
4264 auio.uio_rw = UIO_READ;
4265 auio.uio_procp = NULL;
4266
1c79356b
A
4267 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
4268 (void)nfs_fsinfo(nmp, vp, cred, p);
4269 biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
4270
4271 if (biosize & PAGE_MASK)
4272 panic("nfs_pagein(%x): biosize not page aligned", biosize);
4273
0b4e3aa0 4274 ubc_upl_map(pl, &ioaddr);
1c79356b
A
4275 ioaddr += pl_offset;
4276 xsize = size;
4277
4278 do {
9bccf70c
A
4279 iosize = min(biosize, xsize);
4280 uio->uio_resid = iosize;
4281 auio.uio_iov = &aiov;
4282 auio.uio_iovcnt = 1;
4283 aiov.iov_len = iosize;
1c79356b
A
4284 aiov.iov_base = (caddr_t)ioaddr;
4285
fa4905b1
A
4286 FSDBG(322, uio->uio_offset, uio->uio_resid, ioaddr, xsize);
4287#warning our nfs_pagein does not support NQNFS
1c79356b
A
4288 /*
4289 * With UBC we get here only when the file data is not in the VM
4290 * page cache, so go ahead and read in.
4291 */
4292#ifdef UBC_DEBUG
fa4905b1 4293 upl_ubc_alias_set(pl, current_act(), 2);
1c79356b
A
4294#endif /* UBC_DEBUG */
4295 nfsstats.pageins++;
9bccf70c 4296
1c79356b
A
4297 error = nfs_readrpc(vp, uio, cred);
4298
4299 if (!error) {
1c79356b
A
4300 if (uio->uio_resid) {
4301 /*
fa4905b1
A
4302 * If uio_resid > 0, there is a hole in the file
4303 * and no writes after the hole have been pushed
4304 * to the server yet... or we're at the EOF
1c79356b
A
4305 * Just zero fill the rest of the valid area.
4306 */
fa4905b1 4307 int zcnt = uio->uio_resid;
9bccf70c 4308 int zoff = iosize - zcnt;
1c79356b
A
4309 bzero((char *)ioaddr + zoff, zcnt);
4310
fa4905b1 4311 FSDBG(324, uio->uio_offset, zoff, zcnt, ioaddr);
1c79356b
A
4312 uio->uio_offset += zcnt;
4313 }
9bccf70c
A
4314 ioaddr += iosize;
4315 xsize -= iosize;
1c79356b 4316 } else
fa4905b1 4317 FSDBG(322, uio->uio_offset, uio->uio_resid, error, -1);
9bccf70c 4318
1c79356b 4319 if (p && (vp->v_flag & VTEXT) &&
fa4905b1
A
4320 ((nmp->nm_flag & NFSMNT_NQNFS &&
4321 NQNFS_CKINVALID(vp, np, ND_READ) &&
4322 np->n_lrev != np->n_brev) ||
4323 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
4324 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
1c79356b
A
4325 uprintf("Process killed due to text file modification\n");
4326 psignal(p, SIGKILL);
4327 p->p_flag |= P_NOSWAP;
4328 }
4329
4330 } while (error == 0 && xsize > 0);
4331
0b4e3aa0 4332 ubc_upl_unmap(pl);
1c79356b 4333
fa4905b1 4334 if (!nofreeupl) {
1c79356b 4335 if (error)
0b4e3aa0 4336 ubc_upl_abort_range(pl, pl_offset, size,
fa4905b1
A
4337 UPL_ABORT_ERROR |
4338 UPL_ABORT_FREE_ON_EMPTY);
1c79356b 4339 else
0b4e3aa0 4340 ubc_upl_commit_range(pl, pl_offset, size,
fa4905b1
A
4341 UPL_COMMIT_CLEAR_DIRTY |
4342 UPL_COMMIT_FREE_ON_EMPTY);
1c79356b 4343 }
1c79356b
A
4344 return (error);
4345}
4346
0b4e3aa0 4347
1c79356b
A
4348/*
4349 * Vnode op for pageout using UPL
4350 * Derived from nfs_write()
4351 * File size changes are not permitted in pageout.
4352 */
4353static int
4354nfs_pageout(ap)
4355 struct vop_pageout_args /* {
4356 struct vnode *a_vp,
4357 upl_t a_pl,
4358 vm_offset_t a_pl_offset,
4359 off_t a_f_offset,
4360 size_t a_size,
4361 struct ucred *a_cred,
4362 int a_flags
4363 } */ *ap;
4364{
4365 register struct vnode *vp = ap->a_vp;
4366 upl_t pl = ap->a_pl;
4367 size_t size= ap->a_size;
4368 off_t f_offset = ap->a_f_offset;
4369 vm_offset_t pl_offset = ap->a_pl_offset;
4370 int flags = ap->a_flags;
4371 int ioflag = ap->a_flags;
4372 register int biosize;
4373 struct proc *p = current_proc();
4374 struct nfsnode *np = VTONFS(vp);
4375 register struct ucred *cred;
4376 struct buf *bp;
4377 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4378 daddr_t lbn;
1c79356b
A
4379 int n = 0, on, error = 0, iomode, must_commit, s;
4380 off_t off;
4381 vm_offset_t ioaddr;
4382 struct uio auio;
4383 struct iovec aiov;
4384 struct uio * uio = &auio;
fa4905b1 4385 int nofreeupl = flags & UPL_NOCOMMIT;
1c79356b
A
4386 int iosize;
4387 int pgsize;
4388
fa4905b1
A
4389 FSDBG(323, f_offset, size, pl, pl_offset);
4390
4391 if (pl == (upl_t)NULL)
4392 panic("nfs_pageout: no upl");
1c79356b
A
4393
4394 if (UBCINVALID(vp)) {
fa4905b1
A
4395 printf("nfs_pageout: invalid vnode 0x%x", (int)vp);
4396 if (!nofreeupl)
4397 (void) ubc_upl_abort(pl, NULL);
1c79356b
A
4398 return (EIO);
4399 }
4400 UBCINFOCHECK("nfs_pageout", vp);
4401
fa4905b1
A
4402 if (size <= 0) {
4403 printf("nfs_pageout: invalid size %d", size);
4404 if (!nofreeupl)
4405 (void) ubc_upl_abort(pl, NULL);
1c79356b 4406 return (EINVAL);
1c79356b
A
4407 }
4408
4409 /*
4410 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
4411 * will be the same size within a filesystem. nfs_writerpc will
4412 * still use nm_wsize when sizing the rpc's.
4413 */
0b4e3aa0 4414 biosize = min(vp->v_mount->mnt_stat.f_iosize, size);
1c79356b 4415
0b4e3aa0
A
4416 if (biosize & PAGE_MASK)
4417 panic("nfs_pageout(%x): biosize not page aligned", biosize);
1c79356b 4418
1c79356b
A
4419 /*
4420 * Check to see whether the buffer is incore
4421 * If incore and not busy invalidate it from the cache
4422 * we should not find it BUSY, since we always do a
4423 * vm_fault_list_request in 'getblk' before returning
4424 * which would block on the page busy status
4425 */
0b4e3aa0 4426 lbn = f_offset / PAGE_SIZE; /* to match the size getblk uses */
1c79356b
A
4427
4428 for (iosize = size; iosize > 0; iosize -= PAGE_SIZE, lbn++) {
1c79356b
A
4429 s = splbio();
4430 if (bp = incore(vp, lbn)) {
fa4905b1 4431 FSDBG(323, lbn*PAGE_SIZE, 1, bp, bp->b_flags);
1c79356b 4432 if (ISSET(bp->b_flags, B_BUSY)) {
fa4905b1
A
4433 /* no panic. just tell vm we are busy */
4434 if (!nofreeupl)
4435 (void) ubc_upl_abort(pl, NULL);
0b4e3aa0 4436 return(EBUSY);
fa4905b1 4437 }
1c79356b
A
4438 bremfree(bp);
4439 SET(bp->b_flags, (B_BUSY | B_INVAL));
4440 brelse(bp);
4441 }
4442 splx(s);
4443 }
4444
4445 cred = ubc_getcred(vp);
4446 if (cred == NOCRED)
4447 cred = ap->a_cred;
4448
4449 if (np->n_flag & NWRITEERR) {
4450 np->n_flag &= ~NWRITEERR;
fa4905b1
A
4451 if (!nofreeupl)
4452 ubc_upl_abort_range(pl, pl_offset, size,
4453 UPL_ABORT_FREE_ON_EMPTY);
1c79356b
A
4454 return (np->n_error);
4455 }
4456 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
4457 (void)nfs_fsinfo(nmp, vp, cred, p);
4458
4459 if (f_offset < 0 || f_offset >= np->n_size ||
fa4905b1
A
4460 f_offset & PAGE_MASK_64 || size & PAGE_MASK) {
4461 if (!nofreeupl)
4462 ubc_upl_abort_range(pl, pl_offset, size,
4463 UPL_ABORT_FREE_ON_EMPTY);
1c79356b
A
4464 return (EINVAL);
4465 }
4466
0b4e3aa0 4467 ubc_upl_map(pl, &ioaddr);
1c79356b 4468
fa4905b1 4469 if (f_offset + size > np->n_size)
1c79356b
A
4470 iosize = np->n_size - f_offset;
4471 else
4472 iosize = size;
4473
4474 pgsize = (iosize + (PAGE_SIZE - 1)) & ~PAGE_MASK;
4475
4476 if (size > pgsize) {
fa4905b1
A
4477 if (!nofreeupl)
4478 ubc_upl_abort_range(pl, pl_offset + pgsize,
4479 size - pgsize,
4480 UPL_ABORT_FREE_ON_EMPTY);
1c79356b
A
4481 }
4482 auio.uio_iov = &aiov;
4483 auio.uio_iovcnt = 1;
4484 auio.uio_offset = f_offset;
4485 auio.uio_segflg = UIO_SYSSPACE;
4486 auio.uio_rw = UIO_READ;
4487 auio.uio_resid = iosize;
4488 auio.uio_procp = NULL;
4489
4490 aiov.iov_len = iosize;
4491 aiov.iov_base = (caddr_t)ioaddr + pl_offset;
1c79356b
A
4492 /*
4493 * check for partial page and clear the
4494 * contents past end of the file before
4495 * releasing it in the VM page cache
4496 */
fa4905b1 4497 if (f_offset < np->n_size && f_offset + size > np->n_size) {
1c79356b
A
4498 size_t io = np->n_size - f_offset;
4499
4500 bzero((caddr_t)(ioaddr + pl_offset + io), size - io);
4501
fa4905b1 4502 FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
1c79356b
A
4503 }
4504
4505 do {
fa4905b1 4506#warning our nfs_pageout does not support NQNFS
1c79356b
A
4507 nfsstats.pageouts++;
4508 lbn = uio->uio_offset / biosize;
4509 on = uio->uio_offset & (biosize-1);
4510 n = min((unsigned)(biosize - on), uio->uio_resid);
4511again:
1c79356b 4512#if 0
fa4905b1
A
4513 /* (removed for UBC) */
4514 bufsize = biosize;
1c79356b
A
4515 if ((lbn + 1) * biosize > np->n_size) {
4516 bufsize = np->n_size - lbn * biosize;
4517 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
4518 }
4519#endif
4520 vp->v_numoutput++;
fa4905b1 4521 /* NMODIFIED would be set here if doing unstable writes */
1c79356b
A
4522 iomode = NFSV3WRITE_FILESYNC;
4523 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
4524 if (must_commit)
4525 nfs_clearcommit(vp->v_mount);
fa4905b1 4526 vpwakeup(vp);
1c79356b
A
4527
4528 if (error)
4529 goto cleanup;
4530
4531 if (n > 0) {
4532 uio->uio_resid -= n;
4533 uio->uio_offset += n;
4534 uio->uio_iov->iov_base += n;
4535 uio->uio_iov->iov_len -= n;
4536 }
4537 } while (uio->uio_resid > 0 && n > 0);
4538
4539cleanup:
0b4e3aa0
A
4540 ubc_upl_unmap(pl);
4541 /*
4542 * We've had several different solutions on what to do when the pageout
4543 * gets an error. If we don't handle it, and return an error to the
4544 * caller, vm, it will retry . This can end in endless looping
4545 * between vm and here doing retries of the same page. Doing a dump
4546 * back to vm, will get it out of vm's knowledge and we lose whatever
4547 * data existed. This is risky, but in some cases necessary. For
4548 * example, the initial fix here was to do that for ESTALE. In that case
4549 * the server is telling us that the file is no longer the same. We
4550 * would not want to keep paging out to that. We also saw some 151
4551 * errors from Auspex server and NFSv3 can return errors higher than
fa4905b1
A
4552 * ELAST. Those along with NFS known server errors we will "dump" from
4553 * vm. Errors we don't expect to occur, we dump and log for further
0b4e3aa0
A
4554 * analysis. Errors that could be transient, networking ones,
4555 * we let vm "retry". Lastly, errors that we retry, but may have potential
4556 * to storm the network, we "retrywithsleep". "sever" will be used in
4557 * in the future to dump all pages of object for cases like ESTALE.
4558 * All this is the basis for the states returned and first guesses on
4559 * error handling. Tweaking expected as more statistics are gathered.
4560 * Note, in the long run we may need another more robust solution to
4561 * have some kind of persistant store when the vm cannot dump nor keep
fa4905b1 4562 * retrying as a solution, but this would be a file architectural change
0b4e3aa0
A
4563 */
4564
fa4905b1 4565 if (!nofreeupl) { /* otherwise stacked file system has to handle this */
0b4e3aa0
A
4566 if (error) {
4567 int abortflags;
4568 short action = nfs_pageouterrorhandler(error);
4569
4570 switch (action) {
4571 case DUMP:
4572 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
4573 break;
4574 case DUMPANDLOG:
4575 abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
fa4905b1
A
4576 if (error <= ELAST &&
4577 (errorcount[error] % 100 == 0))
0b4e3aa0
A
4578 printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
4579 errorcount[error]++;
4580 break;
4581 case RETRY:
4582 abortflags = UPL_ABORT_FREE_ON_EMPTY;
4583 break;
4584 case RETRYWITHSLEEP:
4585 abortflags = UPL_ABORT_FREE_ON_EMPTY;
fa4905b1
A
4586 /* pri unused. PSOCK for placeholder. */
4587 (void) tsleep(&lbolt, PSOCK,
4588 "nfspageout", 0);
0b4e3aa0
A
4589 break;
4590 case SEVER: /* not implemented */
4591 default:
4592 printf("nfs_pageout: action %d not expected\n", action);
4593 break;
4594 }
4595
4596 ubc_upl_abort_range(pl, pl_offset, size, abortflags);
4597 /* return error in all cases above */
4598
4599 } else
4600 ubc_upl_commit_range(pl, pl_offset, pgsize,
fa4905b1
A
4601 UPL_COMMIT_CLEAR_DIRTY |
4602 UPL_COMMIT_FREE_ON_EMPTY);
1c79356b 4603 }
1c79356b
A
4604 return (error);
4605}
4606
4607/* Blktooff derives file offset given a logical block number */
4608static int
4609nfs_blktooff(ap)
4610 struct vop_blktooff_args /* {
4611 struct vnode *a_vp;
4612 daddr_t a_lblkno;
4613 off_t *a_offset;
4614 } */ *ap;
4615{
4616 int biosize;
4617 register struct vnode *vp = ap->a_vp;
4618
4619 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* nfs_bio.c */
4620
4621 *ap->a_offset = (off_t)(ap->a_lblkno * biosize);
4622
4623 return (0);
4624}
4625
1c79356b
A
4626static int
4627nfs_offtoblk(ap)
4628 struct vop_offtoblk_args /* {
4629 struct vnode *a_vp;
4630 off_t a_offset;
4631 daddr_t *a_lblkno;
4632 } */ *ap;
4633{
4634 int biosize;
4635 register struct vnode *vp = ap->a_vp;
4636
4637 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* nfs_bio.c */
4638
4639 *ap->a_lblkno = (daddr_t)(ap->a_offset / biosize);
4640
4641 return (0);
4642}
4643static int
4644nfs_cmap(ap)
4645 struct vop_cmap_args /* {
4646 struct vnode *a_vp;
4647 off_t a_offset;
4648 size_t a_size;
4649 daddr_t *a_bpn;
4650 size_t *a_run;
4651 void *a_poff;
4652 } */ *ap;
4653{
4654 return (EOPNOTSUPP);
4655}