]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
91447636 | 2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
37839358 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
1c79356b | 11 | * |
37839358 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
37839358 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
1c79356b A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
23 | /* | |
24 | * Copyright (c) 1989, 1993 | |
25 | * The Regents of the University of California. All rights reserved. | |
26 | * (c) UNIX System Laboratories, Inc. | |
27 | * All or some portions of this file are derived from material licensed | |
28 | * to the University of California by American Telephone and Telegraph | |
29 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
30 | * the permission of UNIX System Laboratories, Inc. | |
31 | * | |
32 | * Redistribution and use in source and binary forms, with or without | |
33 | * modification, are permitted provided that the following conditions | |
34 | * are met: | |
35 | * 1. Redistributions of source code must retain the above copyright | |
36 | * notice, this list of conditions and the following disclaimer. | |
37 | * 2. Redistributions in binary form must reproduce the above copyright | |
38 | * notice, this list of conditions and the following disclaimer in the | |
39 | * documentation and/or other materials provided with the distribution. | |
40 | * 3. All advertising materials mentioning features or use of this software | |
41 | * must display the following acknowledgement: | |
42 | * This product includes software developed by the University of | |
43 | * California, Berkeley and its contributors. | |
44 | * 4. Neither the name of the University nor the names of its contributors | |
45 | * may be used to endorse or promote products derived from this software | |
46 | * without specific prior written permission. | |
47 | * | |
48 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
49 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
50 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
51 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
52 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
53 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
54 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
55 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
56 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
57 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
58 | * SUCH DAMAGE. | |
59 | * | |
60 | * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 | |
61 | */ | |
62 | ||
63 | /* | |
64 | * External virtual filesystem routines | |
65 | */ | |
66 | ||
9bccf70c | 67 | #undef DIAGNOSTIC |
1c79356b A |
68 | #define DIAGNOSTIC 1 |
69 | ||
70 | #include <sys/param.h> | |
71 | #include <sys/systm.h> | |
91447636 A |
72 | #include <sys/proc_internal.h> |
73 | #include <sys/kauth.h> | |
74 | #include <sys/mount_internal.h> | |
1c79356b | 75 | #include <sys/time.h> |
91447636 A |
76 | #include <sys/lock.h> |
77 | #include <sys/vnode_internal.h> | |
1c79356b A |
78 | #include <sys/stat.h> |
79 | #include <sys/namei.h> | |
80 | #include <sys/ucred.h> | |
91447636 | 81 | #include <sys/buf_internal.h> |
1c79356b A |
82 | #include <sys/errno.h> |
83 | #include <sys/malloc.h> | |
84 | #include <sys/domain.h> | |
85 | #include <sys/mbuf.h> | |
86 | #include <sys/syslog.h> | |
91447636 | 87 | #include <sys/ubc_internal.h> |
1c79356b A |
88 | #include <sys/vm.h> |
89 | #include <sys/sysctl.h> | |
55e303ae A |
90 | #include <sys/filedesc.h> |
91 | #include <sys/event.h> | |
91447636 A |
92 | #include <sys/kdebug.h> |
93 | #include <sys/kauth.h> | |
94 | #include <sys/user.h> | |
95 | #include <miscfs/fifofs/fifo.h> | |
55e303ae A |
96 | |
97 | #include <string.h> | |
98 | #include <machine/spl.h> | |
99 | ||
1c79356b A |
100 | |
101 | #include <kern/assert.h> | |
102 | ||
103 | #include <miscfs/specfs/specdev.h> | |
104 | ||
0b4e3aa0 A |
105 | #include <mach/mach_types.h> |
106 | #include <mach/memory_object_types.h> | |
107 | ||
91447636 A |
108 | extern lck_grp_t *vnode_lck_grp; |
109 | extern lck_attr_t *vnode_lck_attr; | |
110 | ||
111 | ||
112 | extern lck_mtx_t * mnt_list_mtx_lock; | |
0b4e3aa0 | 113 | |
1c79356b A |
114 | enum vtype iftovt_tab[16] = { |
115 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | |
116 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, | |
117 | }; | |
118 | int vttoif_tab[9] = { | |
119 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, | |
120 | S_IFSOCK, S_IFIFO, S_IFMT, | |
121 | }; | |
122 | ||
91447636 A |
123 | extern int ubc_isinuse_locked(vnode_t, int, int); |
124 | extern kern_return_t adjust_vm_object_cache(vm_size_t oval, vm_size_t nval); | |
125 | ||
126 | static void vnode_list_add(vnode_t); | |
127 | static void vnode_list_remove(vnode_t); | |
128 | ||
129 | static errno_t vnode_drain(vnode_t); | |
130 | static void vgone(vnode_t); | |
131 | static void vclean(vnode_t vp, int flag, proc_t p); | |
132 | static void vnode_reclaim_internal(vnode_t, int, int); | |
133 | ||
134 | static void vnode_dropiocount (vnode_t, int); | |
135 | static errno_t vnode_getiocount(vnode_t vp, int locked, int vid, int vflags); | |
136 | static int vget_internal(vnode_t, int, int); | |
137 | ||
138 | static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev); | |
139 | static int vnode_reload(vnode_t); | |
140 | static int vnode_isinuse_locked(vnode_t, int, int); | |
141 | ||
142 | static void insmntque(vnode_t vp, mount_t mp); | |
143 | mount_t mount_list_lookupby_fsid(fsid_t *, int, int); | |
144 | static int mount_getvfscnt(void); | |
145 | static int mount_fillfsids(fsid_t *, int ); | |
146 | static void vnode_iterate_setup(mount_t); | |
147 | static int vnode_umount_preflight(mount_t, vnode_t, int); | |
148 | static int vnode_iterate_prepare(mount_t); | |
149 | static int vnode_iterate_reloadq(mount_t); | |
150 | static void vnode_iterate_clear(mount_t); | |
1c79356b | 151 | |
1c79356b A |
152 | TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ |
153 | TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */ | |
154 | struct mntlist mountlist; /* mounted filesystem list */ | |
91447636 | 155 | static int nummounts = 0; |
1c79356b A |
156 | |
157 | #if DIAGNOSTIC | |
158 | #define VLISTCHECK(fun, vp, list) \ | |
159 | if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \ | |
160 | panic("%s: %s vnode not on %slist", (fun), (list), (list)); | |
161 | ||
162 | #define VINACTIVECHECK(fun, vp, expected) \ | |
163 | do { \ | |
164 | int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \ | |
165 | if (__is_inactive ^ expected) \ | |
166 | panic("%s: %sinactive vnode, expected %s", (fun), \ | |
167 | __is_inactive? "" : "not ", \ | |
168 | expected? "inactive": "not inactive"); \ | |
169 | } while(0) | |
170 | #else | |
171 | #define VLISTCHECK(fun, vp, list) | |
172 | #define VINACTIVECHECK(fun, vp, expected) | |
173 | #endif /* DIAGNOSTIC */ | |
174 | ||
175 | #define VLISTNONE(vp) \ | |
176 | do { \ | |
177 | (vp)->v_freelist.tqe_next = (struct vnode *)0; \ | |
178 | (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \ | |
179 | } while(0) | |
180 | ||
181 | #define VONLIST(vp) \ | |
182 | ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb) | |
183 | ||
184 | /* remove a vnode from free vnode list */ | |
185 | #define VREMFREE(fun, vp) \ | |
186 | do { \ | |
187 | VLISTCHECK((fun), (vp), "free"); \ | |
188 | TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \ | |
189 | VLISTNONE((vp)); \ | |
190 | freevnodes--; \ | |
191 | } while(0) | |
192 | ||
193 | /* remove a vnode from inactive vnode list */ | |
194 | #define VREMINACTIVE(fun, vp) \ | |
195 | do { \ | |
196 | VLISTCHECK((fun), (vp), "inactive"); \ | |
197 | VINACTIVECHECK((fun), (vp), VUINACTIVE); \ | |
198 | TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \ | |
199 | CLR((vp)->v_flag, VUINACTIVE); \ | |
200 | VLISTNONE((vp)); \ | |
201 | inactivevnodes--; \ | |
202 | } while(0) | |
203 | ||
1c79356b A |
204 | /* |
205 | * Have to declare first two locks as actual data even if !MACH_SLOCKS, since | |
206 | * a pointers to them get passed around. | |
207 | */ | |
91447636 A |
208 | void * mntvnode_slock; |
209 | void * mntid_slock; | |
210 | void * spechash_slock; | |
1c79356b A |
211 | |
212 | /* | |
213 | * vnodetarget is the amount of vnodes we expect to get back | |
214 | * from the the inactive vnode list and VM object cache. | |
215 | * As vnreclaim() is a mainly cpu bound operation for faster | |
216 | * processers this number could be higher. | |
217 | * Having this number too high introduces longer delays in | |
91447636 | 218 | * the execution of new_vnode(). |
1c79356b A |
219 | */ |
220 | unsigned long vnodetarget; /* target for vnreclaim() */ | |
221 | #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */ | |
222 | ||
223 | /* | |
224 | * We need quite a few vnodes on the free list to sustain the | |
225 | * rapid stat() the compilation process does, and still benefit from the name | |
226 | * cache. Having too few vnodes on the free list causes serious disk | |
227 | * thrashing as we cycle through them. | |
228 | */ | |
0b4e3aa0 | 229 | #define VNODE_FREE_MIN 300 /* freelist should have at least these many */ |
1c79356b A |
230 | |
231 | /* | |
232 | * We need to get vnodes back from the VM object cache when a certain # | |
233 | * of vnodes are reused from the freelist. This is essential for the | |
234 | * caching to be effective in the namecache and the buffer cache [for the | |
235 | * metadata]. | |
236 | */ | |
237 | #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4) | |
238 | ||
239 | /* | |
240 | * If we have enough vnodes on the freelist we do not want to reclaim | |
241 | * the vnodes from the VM object cache. | |
242 | */ | |
243 | #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2)) | |
244 | ||
245 | /* | |
246 | * Initialize the vnode management data structures. | |
247 | */ | |
0b4e3aa0 | 248 | __private_extern__ void |
91447636 | 249 | vntblinit(void) |
1c79356b | 250 | { |
1c79356b | 251 | TAILQ_INIT(&vnode_free_list); |
1c79356b | 252 | TAILQ_INIT(&vnode_inactive_list); |
91447636 | 253 | TAILQ_INIT(&mountlist); |
1c79356b A |
254 | |
255 | if (!vnodetarget) | |
256 | vnodetarget = VNODE_FREE_TARGET; | |
257 | ||
258 | /* | |
259 | * Scale the vm_object_cache to accomodate the vnodes | |
260 | * we want to cache | |
261 | */ | |
262 | (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN); | |
263 | } | |
264 | ||
265 | /* Reset the VM Object Cache with the values passed in */ | |
0b4e3aa0 | 266 | __private_extern__ kern_return_t |
1c79356b A |
267 | reset_vmobjectcache(unsigned int val1, unsigned int val2) |
268 | { | |
269 | vm_size_t oval = val1 - VNODE_FREE_MIN; | |
9bccf70c A |
270 | vm_size_t nval; |
271 | ||
272 | if(val2 < VNODE_FREE_MIN) | |
273 | nval = 0; | |
274 | else | |
275 | nval = val2 - VNODE_FREE_MIN; | |
1c79356b A |
276 | |
277 | return(adjust_vm_object_cache(oval, nval)); | |
278 | } | |
279 | ||
91447636 A |
280 | |
281 | /* the timeout is in 10 msecs */ | |
1c79356b | 282 | int |
91447636 A |
283 | vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, char *msg) { |
284 | int error = 0; | |
285 | struct timespec ts; | |
1c79356b | 286 | |
91447636 A |
287 | KERNEL_DEBUG(0x3010280 | DBG_FUNC_START, (int)vp, output_target, vp->v_numoutput, 0, 0); |
288 | ||
289 | if (vp->v_numoutput > output_target) { | |
290 | ||
291 | slpflag &= ~PDROP; | |
292 | ||
293 | vnode_lock(vp); | |
294 | ||
295 | while ((vp->v_numoutput > output_target) && error == 0) { | |
296 | if (output_target) | |
297 | vp->v_flag |= VTHROTTLED; | |
298 | else | |
299 | vp->v_flag |= VBWAIT; | |
300 | ts.tv_sec = (slptimeout/100); | |
301 | ts.tv_nsec = (slptimeout % 1000) * 10 * NSEC_PER_USEC * 1000 ; | |
302 | error = msleep((caddr_t)&vp->v_numoutput, &vp->v_lock, (slpflag | (PRIBIO + 1)), msg, &ts); | |
303 | } | |
304 | vnode_unlock(vp); | |
1c79356b | 305 | } |
91447636 A |
306 | KERNEL_DEBUG(0x3010280 | DBG_FUNC_END, (int)vp, output_target, vp->v_numoutput, error, 0); |
307 | ||
308 | return error; | |
1c79356b A |
309 | } |
310 | ||
91447636 | 311 | |
1c79356b | 312 | void |
91447636 A |
313 | vnode_startwrite(vnode_t vp) { |
314 | ||
315 | OSAddAtomic(1, &vp->v_numoutput); | |
316 | } | |
317 | ||
318 | ||
319 | void | |
320 | vnode_writedone(vnode_t vp) | |
1c79356b | 321 | { |
91447636 A |
322 | if (vp) { |
323 | int need_wakeup = 0; | |
324 | ||
325 | OSAddAtomic(-1, &vp->v_numoutput); | |
326 | ||
327 | vnode_lock(vp); | |
1c79356b | 328 | |
91447636 A |
329 | if (vp->v_numoutput < 0) |
330 | panic("vnode_writedone: numoutput < 0"); | |
331 | ||
332 | if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput < (VNODE_ASYNC_THROTTLE / 3))) { | |
333 | vp->v_flag &= ~VTHROTTLED; | |
334 | need_wakeup = 1; | |
335 | } | |
336 | if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) { | |
337 | vp->v_flag &= ~VBWAIT; | |
338 | need_wakeup = 1; | |
339 | } | |
340 | vnode_unlock(vp); | |
341 | ||
342 | if (need_wakeup) | |
343 | wakeup((caddr_t)&vp->v_numoutput); | |
344 | } | |
1c79356b A |
345 | } |
346 | ||
91447636 A |
347 | |
348 | ||
1c79356b | 349 | int |
91447636 | 350 | vnode_hasdirtyblks(vnode_t vp) |
1c79356b | 351 | { |
91447636 | 352 | struct cl_writebehind *wbp; |
1c79356b | 353 | |
91447636 A |
354 | /* |
355 | * Not taking the buf_mtxp as there is little | |
356 | * point doing it. Even if the lock is taken the | |
357 | * state can change right after that. If their | |
358 | * needs to be a synchronization, it must be driven | |
359 | * by the caller | |
360 | */ | |
361 | if (vp->v_dirtyblkhd.lh_first) | |
362 | return (1); | |
363 | ||
364 | if (!UBCINFOEXISTS(vp)) | |
365 | return (0); | |
0b4e3aa0 | 366 | |
91447636 A |
367 | wbp = vp->v_ubcinfo->cl_wbehind; |
368 | ||
369 | if (wbp && (wbp->cl_number || wbp->cl_scmap)) | |
370 | return (1); | |
0b4e3aa0 | 371 | |
1c79356b A |
372 | return (0); |
373 | } | |
374 | ||
1c79356b | 375 | int |
91447636 | 376 | vnode_hascleanblks(vnode_t vp) |
1c79356b | 377 | { |
91447636 A |
378 | /* |
379 | * Not taking the buf_mtxp as there is little | |
380 | * point doing it. Even if the lock is taken the | |
381 | * state can change right after that. If their | |
382 | * needs to be a synchronization, it must be driven | |
383 | * by the caller | |
384 | */ | |
385 | if (vp->v_cleanblkhd.lh_first) | |
386 | return (1); | |
387 | return (0); | |
388 | } | |
1c79356b | 389 | |
91447636 A |
390 | void |
391 | vnode_iterate_setup(mount_t mp) | |
392 | { | |
393 | while (mp->mnt_lflag & MNT_LITER) { | |
394 | mp->mnt_lflag |= MNT_LITERWAIT; | |
395 | msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", 0); | |
1c79356b | 396 | } |
91447636 A |
397 | |
398 | mp->mnt_lflag |= MNT_LITER; | |
399 | ||
1c79356b A |
400 | } |
401 | ||
91447636 A |
402 | static int |
403 | vnode_umount_preflight(mount_t mp, vnode_t skipvp, int flags) | |
1c79356b | 404 | { |
91447636 | 405 | vnode_t vp; |
1c79356b | 406 | |
91447636 A |
407 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { |
408 | if (vp->v_type == VDIR) | |
409 | continue; | |
410 | if (vp == skipvp) | |
411 | continue; | |
412 | if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || | |
413 | (vp->v_flag & VNOFLUSH))) | |
414 | continue; | |
415 | if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) | |
416 | continue; | |
417 | if ((flags & WRITECLOSE) && | |
418 | (vp->v_writecount == 0 || vp->v_type != VREG)) | |
419 | continue; | |
420 | /* Look for busy vnode */ | |
421 | if (((vp->v_usecount != 0) && | |
422 | ((vp->v_usecount - vp->v_kusecount) != 0))) | |
423 | return(1); | |
1c79356b | 424 | } |
91447636 A |
425 | |
426 | return(0); | |
1c79356b A |
427 | } |
428 | ||
91447636 A |
429 | /* |
430 | * This routine prepares iteration by moving all the vnodes to worker queue | |
431 | * called with mount lock held | |
1c79356b | 432 | */ |
91447636 A |
433 | int |
434 | vnode_iterate_prepare(mount_t mp) | |
1c79356b | 435 | { |
91447636 | 436 | vnode_t vp; |
1c79356b | 437 | |
91447636 A |
438 | if (TAILQ_EMPTY(&mp->mnt_vnodelist)) { |
439 | /* nothing to do */ | |
440 | return (0); | |
441 | } | |
1c79356b | 442 | |
91447636 A |
443 | vp = TAILQ_FIRST(&mp->mnt_vnodelist); |
444 | vp->v_mntvnodes.tqe_prev = &(mp->mnt_workerqueue.tqh_first); | |
445 | mp->mnt_workerqueue.tqh_first = mp->mnt_vnodelist.tqh_first; | |
446 | mp->mnt_workerqueue.tqh_last = mp->mnt_vnodelist.tqh_last; | |
447 | ||
448 | TAILQ_INIT(&mp->mnt_vnodelist); | |
449 | if (mp->mnt_newvnodes.tqh_first != NULL) | |
450 | panic("vnode_iterate_prepare: newvnode when entering vnode"); | |
451 | TAILQ_INIT(&mp->mnt_newvnodes); | |
452 | ||
453 | return (1); | |
1c79356b A |
454 | } |
455 | ||
91447636 A |
456 | |
457 | /* called with mount lock held */ | |
458 | int | |
459 | vnode_iterate_reloadq(mount_t mp) | |
1c79356b | 460 | { |
91447636 A |
461 | int moved = 0; |
462 | ||
463 | /* add the remaining entries in workerq to the end of mount vnode list */ | |
464 | if (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
465 | struct vnode * mvp; | |
466 | mvp = TAILQ_LAST(&mp->mnt_vnodelist, vnodelst); | |
467 | ||
468 | /* Joining the workerque entities to mount vnode list */ | |
469 | if (mvp) | |
470 | mvp->v_mntvnodes.tqe_next = mp->mnt_workerqueue.tqh_first; | |
471 | else | |
472 | mp->mnt_vnodelist.tqh_first = mp->mnt_workerqueue.tqh_first; | |
473 | mp->mnt_workerqueue.tqh_first->v_mntvnodes.tqe_prev = mp->mnt_vnodelist.tqh_last; | |
474 | mp->mnt_vnodelist.tqh_last = mp->mnt_workerqueue.tqh_last; | |
475 | TAILQ_INIT(&mp->mnt_workerqueue); | |
476 | } | |
477 | ||
478 | /* add the newvnodes to the head of mount vnode list */ | |
479 | if (!TAILQ_EMPTY(&mp->mnt_newvnodes)) { | |
480 | struct vnode * nlvp; | |
481 | nlvp = TAILQ_LAST(&mp->mnt_newvnodes, vnodelst); | |
482 | ||
483 | mp->mnt_newvnodes.tqh_first->v_mntvnodes.tqe_prev = &mp->mnt_vnodelist.tqh_first; | |
484 | nlvp->v_mntvnodes.tqe_next = mp->mnt_vnodelist.tqh_first; | |
485 | if(mp->mnt_vnodelist.tqh_first) | |
486 | mp->mnt_vnodelist.tqh_first->v_mntvnodes.tqe_prev = &nlvp->v_mntvnodes.tqe_next; | |
487 | else | |
488 | mp->mnt_vnodelist.tqh_last = mp->mnt_newvnodes.tqh_last; | |
489 | mp->mnt_vnodelist.tqh_first = mp->mnt_newvnodes.tqh_first; | |
490 | TAILQ_INIT(&mp->mnt_newvnodes); | |
491 | moved = 1; | |
492 | } | |
1c79356b | 493 | |
91447636 | 494 | return(moved); |
1c79356b A |
495 | } |
496 | ||
1c79356b | 497 | |
91447636 A |
498 | void |
499 | vnode_iterate_clear(mount_t mp) | |
500 | { | |
501 | mp->mnt_lflag &= ~MNT_LITER; | |
502 | if (mp->mnt_lflag & MNT_LITERWAIT) { | |
503 | mp->mnt_lflag &= ~MNT_LITERWAIT; | |
504 | wakeup(mp); | |
505 | } | |
506 | } | |
1c79356b | 507 | |
1c79356b | 508 | |
1c79356b | 509 | int |
91447636 A |
510 | vnode_iterate(mp, flags, callout, arg) |
511 | mount_t mp; | |
512 | int flags; | |
513 | int (*callout)(struct vnode *, void *); | |
514 | void * arg; | |
1c79356b | 515 | { |
1c79356b | 516 | struct vnode *vp; |
91447636 A |
517 | int vid, retval; |
518 | int ret = 0; | |
1c79356b | 519 | |
91447636 | 520 | mount_lock(mp); |
1c79356b | 521 | |
91447636 | 522 | vnode_iterate_setup(mp); |
1c79356b | 523 | |
91447636 A |
524 | /* it is returns 0 then there is nothing to do */ |
525 | retval = vnode_iterate_prepare(mp); | |
1c79356b | 526 | |
91447636 A |
527 | if (retval == 0) { |
528 | vnode_iterate_clear(mp); | |
529 | mount_unlock(mp); | |
530 | return(ret); | |
1c79356b | 531 | } |
91447636 A |
532 | |
533 | /* iterate over all the vnodes */ | |
534 | while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
535 | vp = TAILQ_FIRST(&mp->mnt_workerqueue); | |
536 | TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); | |
537 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
538 | vid = vp->v_id; | |
539 | if ((vp->v_data == NULL) || (vp->v_type == VNON) || (vp->v_mount != mp)) { | |
540 | continue; | |
541 | } | |
542 | mount_unlock(mp); | |
1c79356b | 543 | |
91447636 A |
544 | if ( vget_internal(vp, vid, (flags | VNODE_NODEAD| VNODE_WITHID | VNODE_NOSUSPEND))) { |
545 | mount_lock(mp); | |
546 | continue; | |
547 | } | |
548 | if (flags & VNODE_RELOAD) { | |
549 | /* | |
550 | * we're reloading the filesystem | |
551 | * cast out any inactive vnodes... | |
552 | */ | |
553 | if (vnode_reload(vp)) { | |
554 | /* vnode will be recycled on the refcount drop */ | |
555 | vnode_put(vp); | |
556 | mount_lock(mp); | |
557 | continue; | |
558 | } | |
559 | } | |
55e303ae | 560 | |
91447636 A |
561 | retval = callout(vp, arg); |
562 | ||
563 | switch (retval) { | |
564 | case VNODE_RETURNED: | |
565 | case VNODE_RETURNED_DONE: | |
566 | vnode_put(vp); | |
567 | if (retval == VNODE_RETURNED_DONE) { | |
568 | mount_lock(mp); | |
569 | ret = 0; | |
570 | goto out; | |
571 | } | |
572 | break; | |
573 | ||
574 | case VNODE_CLAIMED_DONE: | |
575 | mount_lock(mp); | |
576 | ret = 0; | |
577 | goto out; | |
578 | case VNODE_CLAIMED: | |
579 | default: | |
580 | break; | |
581 | } | |
582 | mount_lock(mp); | |
55e303ae | 583 | } |
1c79356b | 584 | |
91447636 A |
585 | out: |
586 | (void)vnode_iterate_reloadq(mp); | |
587 | vnode_iterate_clear(mp); | |
588 | mount_unlock(mp); | |
589 | return (ret); | |
590 | } | |
55e303ae | 591 | |
91447636 A |
592 | void |
593 | mount_lock_renames(mount_t mp) | |
594 | { | |
595 | lck_mtx_lock(&mp->mnt_renamelock); | |
1c79356b A |
596 | } |
597 | ||
1c79356b | 598 | void |
91447636 | 599 | mount_unlock_renames(mount_t mp) |
1c79356b | 600 | { |
91447636 A |
601 | lck_mtx_unlock(&mp->mnt_renamelock); |
602 | } | |
1c79356b | 603 | |
91447636 A |
604 | void |
605 | mount_lock(mount_t mp) | |
606 | { | |
607 | lck_mtx_lock(&mp->mnt_mlock); | |
1c79356b A |
608 | } |
609 | ||
91447636 A |
610 | void |
611 | mount_unlock(mount_t mp) | |
fa4905b1 | 612 | { |
91447636 | 613 | lck_mtx_unlock(&mp->mnt_mlock); |
fa4905b1 A |
614 | } |
615 | ||
91447636 | 616 | |
1c79356b | 617 | void |
91447636 | 618 | mount_ref(mount_t mp, int locked) |
1c79356b | 619 | { |
91447636 A |
620 | if ( !locked) |
621 | mount_lock(mp); | |
622 | ||
623 | mp->mnt_count++; | |
624 | ||
625 | if ( !locked) | |
626 | mount_unlock(mp); | |
1c79356b A |
627 | } |
628 | ||
91447636 A |
629 | |
630 | void | |
631 | mount_drop(mount_t mp, int locked) | |
632 | { | |
633 | if ( !locked) | |
634 | mount_lock(mp); | |
635 | ||
636 | mp->mnt_count--; | |
637 | ||
638 | if (mp->mnt_count == 0 && (mp->mnt_lflag & MNT_LDRAIN)) | |
639 | wakeup(&mp->mnt_lflag); | |
640 | ||
641 | if ( !locked) | |
642 | mount_unlock(mp); | |
643 | } | |
644 | ||
645 | ||
1c79356b | 646 | int |
91447636 | 647 | mount_iterref(mount_t mp, int locked) |
1c79356b | 648 | { |
91447636 | 649 | int retval = 0; |
1c79356b | 650 | |
91447636 A |
651 | if (!locked) |
652 | mount_list_lock(); | |
653 | if (mp->mnt_iterref < 0) { | |
654 | retval = 1; | |
655 | } else { | |
656 | mp->mnt_iterref++; | |
1c79356b | 657 | } |
91447636 A |
658 | if (!locked) |
659 | mount_list_unlock(); | |
660 | return(retval); | |
661 | } | |
1c79356b | 662 | |
91447636 A |
663 | int |
664 | mount_isdrained(mount_t mp, int locked) | |
665 | { | |
666 | int retval; | |
1c79356b | 667 | |
91447636 A |
668 | if (!locked) |
669 | mount_list_lock(); | |
670 | if (mp->mnt_iterref < 0) | |
671 | retval = 1; | |
672 | else | |
673 | retval = 0; | |
674 | if (!locked) | |
675 | mount_list_unlock(); | |
676 | return(retval); | |
677 | } | |
678 | ||
679 | void | |
680 | mount_iterdrop(mount_t mp) | |
681 | { | |
682 | mount_list_lock(); | |
683 | mp->mnt_iterref--; | |
684 | wakeup(&mp->mnt_iterref); | |
685 | mount_list_unlock(); | |
686 | } | |
687 | ||
688 | void | |
689 | mount_iterdrain(mount_t mp) | |
690 | { | |
691 | mount_list_lock(); | |
692 | while (mp->mnt_iterref) | |
693 | msleep((caddr_t)&mp->mnt_iterref, mnt_list_mtx_lock, PVFS, "mount_iterdrain", 0 ); | |
694 | /* mount iterations drained */ | |
695 | mp->mnt_iterref = -1; | |
696 | mount_list_unlock(); | |
697 | } | |
698 | void | |
699 | mount_iterreset(mount_t mp) | |
700 | { | |
701 | mount_list_lock(); | |
702 | if (mp->mnt_iterref == -1) | |
703 | mp->mnt_iterref = 0; | |
704 | mount_list_unlock(); | |
705 | } | |
706 | ||
707 | /* always called with mount lock held */ | |
708 | int | |
709 | mount_refdrain(mount_t mp) | |
710 | { | |
711 | if (mp->mnt_lflag & MNT_LDRAIN) | |
712 | panic("already in drain"); | |
713 | mp->mnt_lflag |= MNT_LDRAIN; | |
714 | ||
715 | while (mp->mnt_count) | |
716 | msleep((caddr_t)&mp->mnt_lflag, &mp->mnt_mlock, PVFS, "mount_drain", 0 ); | |
717 | ||
718 | if (mp->mnt_vnodelist.tqh_first != NULL) | |
719 | panic("mount_refdrain: dangling vnode"); | |
720 | ||
721 | mp->mnt_lflag &= ~MNT_LDRAIN; | |
722 | ||
723 | return(0); | |
724 | } | |
725 | ||
726 | ||
727 | /* | |
728 | * Mark a mount point as busy. Used to synchronize access and to delay | |
729 | * unmounting. | |
730 | */ | |
731 | int | |
732 | vfs_busy(mount_t mp, int flags) | |
733 | { | |
734 | ||
735 | restart: | |
736 | if (mp->mnt_lflag & MNT_LDEAD) | |
737 | return(ENOENT); | |
738 | ||
739 | if (mp->mnt_lflag & MNT_LUNMOUNT) { | |
740 | if (flags & LK_NOWAIT) | |
741 | return (ENOENT); | |
742 | ||
743 | mount_lock(mp); | |
744 | ||
745 | if (mp->mnt_lflag & MNT_LDEAD) { | |
746 | mount_unlock(mp); | |
747 | return(ENOENT); | |
748 | } | |
749 | if (mp->mnt_lflag & MNT_LUNMOUNT) { | |
750 | mp->mnt_lflag |= MNT_LWAIT; | |
1c79356b | 751 | /* |
91447636 A |
752 | * Since all busy locks are shared except the exclusive |
753 | * lock granted when unmounting, the only place that a | |
754 | * wakeup needs to be done is at the release of the | |
755 | * exclusive lock at the end of dounmount. | |
1c79356b | 756 | */ |
91447636 A |
757 | msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", 0 ); |
758 | return (ENOENT); | |
1c79356b | 759 | } |
91447636 A |
760 | mount_unlock(mp); |
761 | } | |
762 | ||
763 | lck_rw_lock_shared(&mp->mnt_rwlock); | |
764 | ||
765 | /* | |
766 | * until we are granted the rwlock, it's possible for the mount point to | |
767 | * change state, so reevaluate before granting the vfs_busy | |
768 | */ | |
769 | if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { | |
770 | lck_rw_done(&mp->mnt_rwlock); | |
771 | goto restart; | |
1c79356b | 772 | } |
1c79356b A |
773 | return (0); |
774 | } | |
775 | ||
91447636 A |
776 | /* |
777 | * Free a busy filesystem. | |
778 | */ | |
779 | ||
780 | void | |
781 | vfs_unbusy(mount_t mp) | |
782 | { | |
783 | lck_rw_done(&mp->mnt_rwlock); | |
784 | } | |
785 | ||
786 | ||
787 | ||
788 | static void | |
789 | vfs_rootmountfailed(mount_t mp) { | |
790 | ||
791 | mount_list_lock(); | |
792 | mp->mnt_vtable->vfc_refcount--; | |
793 | mount_list_unlock(); | |
794 | ||
795 | vfs_unbusy(mp); | |
796 | ||
797 | mount_lock_destroy(mp); | |
798 | ||
799 | FREE_ZONE(mp, sizeof(struct mount), M_MOUNT); | |
800 | } | |
801 | ||
802 | /* | |
803 | * Lookup a filesystem type, and if found allocate and initialize | |
804 | * a mount structure for it. | |
805 | * | |
806 | * Devname is usually updated by mount(8) after booting. | |
807 | */ | |
808 | static mount_t | |
809 | vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) | |
810 | { | |
811 | mount_t mp; | |
812 | ||
813 | mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); | |
814 | bzero((char *)mp, (u_long)sizeof(struct mount)); | |
815 | ||
816 | /* Initialize the default IO constraints */ | |
817 | mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; | |
818 | mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; | |
819 | mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; | |
820 | mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; | |
821 | mp->mnt_devblocksize = DEV_BSIZE; | |
822 | ||
823 | mount_lock_init(mp); | |
824 | (void)vfs_busy(mp, LK_NOWAIT); | |
825 | ||
826 | TAILQ_INIT(&mp->mnt_vnodelist); | |
827 | TAILQ_INIT(&mp->mnt_workerqueue); | |
828 | TAILQ_INIT(&mp->mnt_newvnodes); | |
829 | ||
830 | mp->mnt_vtable = vfsp; | |
831 | mp->mnt_op = vfsp->vfc_vfsops; | |
832 | mp->mnt_flag = MNT_RDONLY | MNT_ROOTFS; | |
833 | mp->mnt_vnodecovered = NULLVP; | |
834 | //mp->mnt_stat.f_type = vfsp->vfc_typenum; | |
835 | mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; | |
836 | ||
837 | mount_list_lock(); | |
838 | vfsp->vfc_refcount++; | |
839 | mount_list_unlock(); | |
840 | ||
841 | strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); | |
842 | mp->mnt_vfsstat.f_mntonname[0] = '/'; | |
843 | (void) copystr((char *)devname, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0); | |
844 | ||
845 | return (mp); | |
846 | } | |
847 | ||
848 | errno_t | |
849 | vfs_rootmountalloc(const char *fstypename, const char *devname, mount_t *mpp) | |
850 | { | |
851 | struct vfstable *vfsp; | |
852 | ||
853 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) | |
854 | if (!strcmp(vfsp->vfc_name, fstypename)) | |
855 | break; | |
856 | if (vfsp == NULL) | |
857 | return (ENODEV); | |
858 | ||
859 | *mpp = vfs_rootmountalloc_internal(vfsp, devname); | |
860 | ||
861 | if (*mpp) | |
862 | return (0); | |
863 | ||
864 | return (ENOMEM); | |
865 | } | |
866 | ||
867 | ||
868 | /* | |
869 | * Find an appropriate filesystem to use for the root. If a filesystem | |
870 | * has not been preselected, walk through the list of known filesystems | |
871 | * trying those that have mountroot routines, and try them until one | |
872 | * works or we have tried them all. | |
873 | */ | |
874 | extern int (*mountroot)(void); | |
875 | ||
876 | int | |
877 | vfs_mountroot() | |
878 | { | |
879 | struct vfstable *vfsp; | |
880 | struct vfs_context context; | |
881 | int error; | |
882 | mount_t mp; | |
883 | ||
884 | if (mountroot != NULL) { | |
885 | /* | |
886 | * used for netboot which follows a different set of rules | |
887 | */ | |
888 | error = (*mountroot)(); | |
889 | return (error); | |
890 | } | |
891 | if ((error = bdevvp(rootdev, &rootvp))) { | |
892 | printf("vfs_mountroot: can't setup bdevvp\n"); | |
893 | return (error); | |
894 | } | |
895 | context.vc_proc = current_proc(); | |
896 | context.vc_ucred = kauth_cred_get(); | |
897 | ||
898 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { | |
899 | if (vfsp->vfc_mountroot == NULL) | |
900 | continue; | |
901 | ||
902 | mp = vfs_rootmountalloc_internal(vfsp, "root_device"); | |
903 | mp->mnt_devvp = rootvp; | |
904 | ||
905 | if ((error = (*vfsp->vfc_mountroot)(mp, rootvp, &context)) == 0) { | |
906 | mp->mnt_devvp->v_specflags |= SI_MOUNTEDON; | |
907 | ||
908 | vfs_unbusy(mp); | |
909 | ||
910 | mount_list_add(mp); | |
911 | ||
912 | /* | |
913 | * cache the IO attributes for the underlying physical media... | |
914 | * an error return indicates the underlying driver doesn't | |
915 | * support all the queries necessary... however, reasonable | |
916 | * defaults will have been set, so no reason to bail or care | |
917 | */ | |
918 | vfs_init_io_attributes(rootvp, mp); | |
919 | /* | |
920 | * get rid of iocount reference returned | |
921 | * by bdevvp... it will have also taken | |
922 | * a usecount reference which we want to keep | |
923 | */ | |
924 | vnode_put(rootvp); | |
925 | ||
926 | return (0); | |
927 | } | |
928 | vfs_rootmountfailed(mp); | |
929 | ||
930 | if (error != EINVAL) | |
931 | printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); | |
932 | } | |
933 | return (ENODEV); | |
934 | } | |
935 | ||
936 | /* | |
937 | * Lookup a mount point by filesystem identifier. | |
938 | */ | |
939 | extern mount_t vfs_getvfs_locked(fsid_t *); | |
940 | ||
941 | struct mount * | |
942 | vfs_getvfs(fsid) | |
943 | fsid_t *fsid; | |
944 | { | |
945 | return (mount_list_lookupby_fsid(fsid, 0, 0)); | |
946 | } | |
947 | ||
948 | struct mount * | |
949 | vfs_getvfs_locked(fsid) | |
950 | fsid_t *fsid; | |
951 | { | |
952 | return(mount_list_lookupby_fsid(fsid, 1, 0)); | |
953 | } | |
954 | ||
955 | struct mount * | |
956 | vfs_getvfs_by_mntonname(u_char *path) | |
957 | { | |
958 | mount_t retmp = (mount_t)0; | |
959 | mount_t mp; | |
960 | ||
961 | mount_list_lock(); | |
962 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
963 | if (!strcmp(mp->mnt_vfsstat.f_mntonname, path)) { | |
964 | retmp = mp; | |
965 | goto out; | |
966 | } | |
967 | } | |
968 | out: | |
969 | mount_list_unlock(); | |
970 | return (retmp); | |
971 | } | |
972 | ||
973 | /* generation number for creation of new fsids */ | |
974 | u_short mntid_gen = 0; | |
975 | /* | |
976 | * Get a new unique fsid | |
977 | */ | |
978 | void | |
979 | vfs_getnewfsid(mp) | |
980 | struct mount *mp; | |
981 | { | |
982 | ||
983 | fsid_t tfsid; | |
984 | int mtype; | |
985 | mount_t nmp; | |
986 | ||
987 | mount_list_lock(); | |
988 | ||
989 | /* generate a new fsid */ | |
990 | mtype = mp->mnt_vtable->vfc_typenum; | |
991 | if (++mntid_gen == 0) | |
992 | mntid_gen++; | |
993 | tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); | |
994 | tfsid.val[1] = mtype; | |
995 | ||
996 | TAILQ_FOREACH(nmp, &mountlist, mnt_list) { | |
997 | while (vfs_getvfs_locked(&tfsid)) { | |
998 | if (++mntid_gen == 0) | |
999 | mntid_gen++; | |
1000 | tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); | |
1001 | } | |
1002 | } | |
1003 | mp->mnt_vfsstat.f_fsid.val[0] = tfsid.val[0]; | |
1004 | mp->mnt_vfsstat.f_fsid.val[1] = tfsid.val[1]; | |
1005 | mount_list_unlock(); | |
1006 | } | |
1007 | ||
1008 | /* | |
1009 | * Routines having to do with the management of the vnode table. | |
1010 | */ | |
1011 | extern int (**dead_vnodeop_p)(void *); | |
1012 | long numvnodes, freevnodes; | |
1013 | long inactivevnodes; | |
1014 | ||
1015 | ||
1016 | /* | |
1017 | * Move a vnode from one mount queue to another. | |
1018 | */ | |
1019 | static void | |
1020 | insmntque(vnode_t vp, mount_t mp) | |
1021 | { | |
1022 | mount_t lmp; | |
1023 | /* | |
1024 | * Delete from old mount point vnode list, if on one. | |
1025 | */ | |
3a60a9f5 | 1026 | if ( (lmp = vp->v_mount) != NULL && lmp != dead_mountp) { |
91447636 A |
1027 | if ((vp->v_lflag & VNAMED_MOUNT) == 0) |
1028 | panic("insmntque: vp not in mount vnode list"); | |
1029 | vp->v_lflag &= ~VNAMED_MOUNT; | |
1030 | ||
1031 | mount_lock(lmp); | |
1032 | ||
1033 | mount_drop(lmp, 1); | |
1034 | ||
1035 | if (vp->v_mntvnodes.tqe_next == NULL) { | |
1036 | if (TAILQ_LAST(&lmp->mnt_vnodelist, vnodelst) == vp) | |
1037 | TAILQ_REMOVE(&lmp->mnt_vnodelist, vp, v_mntvnodes); | |
1038 | else if (TAILQ_LAST(&lmp->mnt_newvnodes, vnodelst) == vp) | |
1039 | TAILQ_REMOVE(&lmp->mnt_newvnodes, vp, v_mntvnodes); | |
1040 | else if (TAILQ_LAST(&lmp->mnt_workerqueue, vnodelst) == vp) | |
1041 | TAILQ_REMOVE(&lmp->mnt_workerqueue, vp, v_mntvnodes); | |
1042 | } else { | |
1043 | vp->v_mntvnodes.tqe_next->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_prev; | |
1044 | *vp->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_next; | |
1045 | } | |
1046 | vp->v_mntvnodes.tqe_next = 0; | |
1047 | vp->v_mntvnodes.tqe_prev = 0; | |
1048 | mount_unlock(lmp); | |
1049 | return; | |
1050 | } | |
1051 | ||
1052 | /* | |
1053 | * Insert into list of vnodes for the new mount point, if available. | |
1054 | */ | |
1055 | if ((vp->v_mount = mp) != NULL) { | |
1056 | mount_lock(mp); | |
1057 | if ((vp->v_mntvnodes.tqe_next != 0) && (vp->v_mntvnodes.tqe_prev != 0)) | |
1058 | panic("vp already in mount list"); | |
1059 | if (mp->mnt_lflag & MNT_LITER) | |
1060 | TAILQ_INSERT_HEAD(&mp->mnt_newvnodes, vp, v_mntvnodes); | |
1061 | else | |
1062 | TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
1063 | if (vp->v_lflag & VNAMED_MOUNT) | |
1064 | panic("insmntque: vp already in mount vnode list"); | |
1065 | if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) | |
1066 | panic("insmntque: vp on the free list\n"); | |
1067 | vp->v_lflag |= VNAMED_MOUNT; | |
1068 | mount_ref(mp, 1); | |
1069 | mount_unlock(mp); | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | ||
1c79356b A |
1074 | /* |
1075 | * Create a vnode for a block device. | |
1076 | * Used for root filesystem, argdev, and swap areas. | |
1077 | * Also used for memory file system special devices. | |
1078 | */ | |
1079 | int | |
91447636 | 1080 | bdevvp(dev_t dev, vnode_t *vpp) |
1c79356b | 1081 | { |
91447636 A |
1082 | vnode_t nvp; |
1083 | int error; | |
1084 | struct vnode_fsparam vfsp; | |
1085 | struct vfs_context context; | |
1c79356b A |
1086 | |
1087 | if (dev == NODEV) { | |
1088 | *vpp = NULLVP; | |
1089 | return (ENODEV); | |
1090 | } | |
91447636 A |
1091 | |
1092 | context.vc_proc = current_proc(); | |
1093 | context.vc_ucred = FSCRED; | |
1094 | ||
1095 | vfsp.vnfs_mp = (struct mount *)0; | |
1096 | vfsp.vnfs_vtype = VBLK; | |
1097 | vfsp.vnfs_str = "bdevvp"; | |
1098 | vfsp.vnfs_dvp = 0; | |
1099 | vfsp.vnfs_fsnode = 0; | |
1100 | vfsp.vnfs_cnp = 0; | |
1101 | vfsp.vnfs_vops = spec_vnodeop_p; | |
1102 | vfsp.vnfs_rdev = dev; | |
1103 | vfsp.vnfs_filesize = 0; | |
1104 | ||
1105 | vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; | |
1106 | ||
1107 | vfsp.vnfs_marksystem = 0; | |
1108 | vfsp.vnfs_markroot = 0; | |
1109 | ||
1110 | if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &nvp)) ) { | |
1c79356b A |
1111 | *vpp = NULLVP; |
1112 | return (error); | |
1113 | } | |
91447636 A |
1114 | if ( (error = vnode_ref(nvp)) ) { |
1115 | panic("bdevvp failed: vnode_ref"); | |
1116 | return (error); | |
1c79356b | 1117 | } |
91447636 A |
1118 | if ( (error = VNOP_FSYNC(nvp, MNT_WAIT, &context)) ) { |
1119 | panic("bdevvp failed: fsync"); | |
1120 | return (error); | |
1121 | } | |
1122 | if ( (error = buf_invalidateblks(nvp, BUF_WRITE_DATA, 0, 0)) ) { | |
1123 | panic("bdevvp failed: invalidateblks"); | |
1124 | return (error); | |
1125 | } | |
1126 | if ( (error = VNOP_OPEN(nvp, FREAD, &context)) ) { | |
1127 | panic("bdevvp failed: open"); | |
1128 | return (error); | |
1129 | } | |
1130 | *vpp = nvp; | |
1131 | ||
1c79356b A |
1132 | return (0); |
1133 | } | |
1134 | ||
1135 | /* | |
1136 | * Check to see if the new vnode represents a special device | |
1137 | * for which we already have a vnode (either because of | |
1138 | * bdevvp() or because of a different vnode representing | |
1139 | * the same block device). If such an alias exists, deallocate | |
1140 | * the existing contents and return the aliased vnode. The | |
1141 | * caller is responsible for filling it with its new contents. | |
1142 | */ | |
91447636 A |
1143 | static vnode_t |
1144 | checkalias(nvp, nvp_rdev) | |
1c79356b A |
1145 | register struct vnode *nvp; |
1146 | dev_t nvp_rdev; | |
1c79356b | 1147 | { |
1c79356b A |
1148 | struct vnode *vp; |
1149 | struct vnode **vpp; | |
91447636 | 1150 | int vid = 0; |
1c79356b | 1151 | |
1c79356b A |
1152 | vpp = &speclisth[SPECHASH(nvp_rdev)]; |
1153 | loop: | |
91447636 A |
1154 | SPECHASH_LOCK(); |
1155 | ||
1c79356b | 1156 | for (vp = *vpp; vp; vp = vp->v_specnext) { |
91447636 A |
1157 | if (nvp_rdev == vp->v_rdev && nvp->v_type == vp->v_type) { |
1158 | vid = vp->v_id; | |
1159 | break; | |
1160 | } | |
1161 | } | |
1162 | SPECHASH_UNLOCK(); | |
1163 | ||
1164 | if (vp) { | |
1165 | if (vnode_getwithvid(vp,vid)) { | |
1166 | goto loop; | |
1167 | } | |
1168 | /* | |
1169 | * Termination state is checked in vnode_getwithvid | |
1170 | */ | |
1171 | vnode_lock(vp); | |
1172 | ||
1c79356b A |
1173 | /* |
1174 | * Alias, but not in use, so flush it out. | |
1175 | */ | |
91447636 A |
1176 | if ((vp->v_iocount == 1) && (vp->v_usecount == 0)) { |
1177 | vnode_reclaim_internal(vp, 1, 0); | |
1178 | vnode_unlock(vp); | |
1179 | vnode_put(vp); | |
1c79356b A |
1180 | goto loop; |
1181 | } | |
1c79356b A |
1182 | } |
1183 | if (vp == NULL || vp->v_tag != VT_NON) { | |
91447636 A |
1184 | MALLOC_ZONE(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), |
1185 | M_SPECINFO, M_WAITOK); | |
1c79356b A |
1186 | bzero(nvp->v_specinfo, sizeof(struct specinfo)); |
1187 | nvp->v_rdev = nvp_rdev; | |
91447636 A |
1188 | nvp->v_specflags = 0; |
1189 | nvp->v_speclastr = -1; | |
1190 | ||
1191 | SPECHASH_LOCK(); | |
1c79356b A |
1192 | nvp->v_hashchain = vpp; |
1193 | nvp->v_specnext = *vpp; | |
1c79356b | 1194 | *vpp = nvp; |
91447636 A |
1195 | SPECHASH_UNLOCK(); |
1196 | ||
1c79356b A |
1197 | if (vp != NULLVP) { |
1198 | nvp->v_flag |= VALIASED; | |
1199 | vp->v_flag |= VALIASED; | |
91447636 A |
1200 | vnode_unlock(vp); |
1201 | vnode_put(vp); | |
1c79356b | 1202 | } |
1c79356b A |
1203 | return (NULLVP); |
1204 | } | |
1c79356b A |
1205 | return (vp); |
1206 | } | |
1207 | ||
91447636 | 1208 | |
1c79356b | 1209 | /* |
0b4e3aa0 A |
1210 | * Get a reference on a particular vnode and lock it if requested. |
1211 | * If the vnode was on the inactive list, remove it from the list. | |
1212 | * If the vnode was on the free list, remove it from the list and | |
1213 | * move it to inactive list as needed. | |
1214 | * The vnode lock bit is set if the vnode is being eliminated in | |
1215 | * vgone. The process is awakened when the transition is completed, | |
1216 | * and an error returned to indicate that the vnode is no longer | |
1217 | * usable (possibly having been changed to a new file system type). | |
1c79356b | 1218 | */ |
91447636 A |
1219 | static int |
1220 | vget_internal(vnode_t vp, int vid, int vflags) | |
1c79356b A |
1221 | { |
1222 | int error = 0; | |
55e303ae A |
1223 | u_long vpid; |
1224 | ||
91447636 | 1225 | vnode_lock(vp); |
55e303ae | 1226 | |
91447636 A |
1227 | if (vflags & VNODE_WITHID) |
1228 | vpid = vid; | |
1229 | else | |
1230 | vpid = vp->v_id; // save off the original v_id | |
0b4e3aa0 | 1231 | |
91447636 A |
1232 | if ((vflags & VNODE_WRITEABLE) && (vp->v_writecount == 0)) |
1233 | /* | |
1234 | * vnode to be returned only if it has writers opened | |
1235 | */ | |
1236 | error = EINVAL; | |
1237 | else | |
1238 | error = vnode_getiocount(vp, 1, vpid, vflags); | |
55e303ae | 1239 | |
91447636 | 1240 | vnode_unlock(vp); |
55e303ae | 1241 | |
0b4e3aa0 A |
1242 | return (error); |
1243 | } | |
1244 | ||
1c79356b | 1245 | int |
91447636 | 1246 | vnode_ref(vnode_t vp) |
1c79356b | 1247 | { |
1c79356b | 1248 | |
91447636 | 1249 | return (vnode_ref_ext(vp, 0)); |
1c79356b A |
1250 | } |
1251 | ||
1c79356b | 1252 | int |
91447636 | 1253 | vnode_ref_ext(vnode_t vp, int fmode) |
1c79356b | 1254 | { |
91447636 | 1255 | int error = 0; |
1c79356b | 1256 | |
91447636 | 1257 | vnode_lock(vp); |
1c79356b | 1258 | |
91447636 A |
1259 | /* |
1260 | * once all the current call sites have been fixed to insure they have | |
1261 | * taken an iocount, we can toughen this assert up and insist that the | |
1262 | * iocount is non-zero... a non-zero usecount doesn't insure correctness | |
1263 | */ | |
1264 | if (vp->v_iocount <= 0 && vp->v_usecount <= 0) | |
1265 | panic("vnode_ref_ext: vp %x has no valid reference %d, %d", vp, vp->v_iocount, vp->v_usecount); | |
1c79356b | 1266 | |
91447636 A |
1267 | /* |
1268 | * if you are the owner of drain/termination, can acquire usecount | |
1269 | */ | |
1270 | if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) { | |
1271 | if (vp->v_owner != current_thread()) { | |
1272 | error = ENOENT; | |
1273 | goto out; | |
1274 | } | |
1275 | } | |
1276 | vp->v_usecount++; | |
1c79356b | 1277 | |
91447636 A |
1278 | if (fmode & FWRITE) { |
1279 | if (++vp->v_writecount <= 0) | |
1280 | panic("vnode_ref_ext: v_writecount"); | |
55e303ae | 1281 | } |
91447636 A |
1282 | if (fmode & O_EVTONLY) { |
1283 | if (++vp->v_kusecount <= 0) | |
1284 | panic("vnode_ref_ext: v_kusecount"); | |
55e303ae | 1285 | } |
91447636 A |
1286 | out: |
1287 | vnode_unlock(vp); | |
1288 | ||
1289 | return (error); | |
55e303ae A |
1290 | } |
1291 | ||
1292 | ||
1c79356b A |
1293 | /* |
1294 | * put the vnode on appropriate free list. | |
91447636 | 1295 | * called with vnode LOCKED |
1c79356b A |
1296 | */ |
1297 | static void | |
91447636 | 1298 | vnode_list_add(vnode_t vp) |
1c79356b | 1299 | { |
55e303ae | 1300 | |
1c79356b | 1301 | /* |
91447636 | 1302 | * if it is already on a list or non zero references return |
1c79356b | 1303 | */ |
91447636 | 1304 | if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0)) |
1c79356b | 1305 | return; |
91447636 | 1306 | vnode_list_lock(); |
1c79356b | 1307 | |
91447636 A |
1308 | /* |
1309 | * insert at tail of LRU list or at head if VAGE or VL_DEAD is set | |
1310 | */ | |
1311 | if ((vp->v_flag & VAGE) || (vp->v_lflag & VL_DEAD)) { | |
1312 | TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); | |
1c79356b | 1313 | vp->v_flag &= ~VAGE; |
91447636 A |
1314 | } else { |
1315 | TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); | |
1316 | } | |
1c79356b | 1317 | freevnodes++; |
91447636 A |
1318 | |
1319 | vnode_list_unlock(); | |
1c79356b A |
1320 | } |
1321 | ||
1322 | /* | |
91447636 | 1323 | * remove the vnode from appropriate free list. |
1c79356b A |
1324 | */ |
1325 | static void | |
91447636 | 1326 | vnode_list_remove(vnode_t vp) |
1c79356b | 1327 | { |
91447636 A |
1328 | /* |
1329 | * we want to avoid taking the list lock | |
1330 | * in the case where we're not on the free | |
1331 | * list... this will be true for most | |
1332 | * directories and any currently in use files | |
1333 | * | |
1334 | * we're guaranteed that we can't go from | |
1335 | * the not-on-list state to the on-list | |
1336 | * state since we hold the vnode lock... | |
1337 | * all calls to vnode_list_add are done | |
1338 | * under the vnode lock... so we can | |
1339 | * check for that condition (the prevelant one) | |
1340 | * without taking the list lock | |
1341 | */ | |
1342 | if (VONLIST(vp)) { | |
1343 | vnode_list_lock(); | |
1344 | /* | |
1345 | * however, we're not guaranteed that | |
1346 | * we won't go from the on-list state | |
1347 | * to the non-on-list state until we | |
1348 | * hold the vnode_list_lock... this | |
1349 | * is due to new_vnode removing vnodes | |
1350 | * from the free list uder the list_lock | |
1351 | * w/o the vnode lock... so we need to | |
1352 | * check again whether we're currently | |
1353 | * on the free list | |
1354 | */ | |
1355 | if (VONLIST(vp)) { | |
1356 | VREMFREE("vnode_list_remove", vp); | |
1357 | VLISTNONE(vp); | |
1358 | } | |
1359 | vnode_list_unlock(); | |
1360 | } | |
1c79356b A |
1361 | } |
1362 | ||
1363 | ||
1c79356b | 1364 | void |
91447636 | 1365 | vnode_rele(vnode_t vp) |
1c79356b | 1366 | { |
91447636 A |
1367 | vnode_rele_internal(vp, 0, 0, 0); |
1368 | } | |
1c79356b | 1369 | |
1c79356b | 1370 | |
91447636 A |
1371 | void |
1372 | vnode_rele_ext(vnode_t vp, int fmode, int dont_reenter) | |
1373 | { | |
1374 | vnode_rele_internal(vp, fmode, dont_reenter, 0); | |
1c79356b A |
1375 | } |
1376 | ||
91447636 | 1377 | |
1c79356b | 1378 | void |
91447636 | 1379 | vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) |
1c79356b | 1380 | { |
91447636 | 1381 | struct vfs_context context; |
55e303ae | 1382 | |
91447636 A |
1383 | if ( !locked) |
1384 | vnode_lock(vp); | |
1c79356b | 1385 | |
91447636 A |
1386 | if (--vp->v_usecount < 0) |
1387 | panic("vnode_rele_ext: vp %x usecount -ve : %d", vp, vp->v_usecount); | |
1388 | ||
1389 | if (fmode & FWRITE) { | |
1390 | if (--vp->v_writecount < 0) | |
1391 | panic("vnode_rele_ext: vp %x writecount -ve : %d", vp, vp->v_writecount); | |
1c79356b | 1392 | } |
91447636 A |
1393 | if (fmode & O_EVTONLY) { |
1394 | if (--vp->v_kusecount < 0) | |
1395 | panic("vnode_rele_ext: vp %x kusecount -ve : %d", vp, vp->v_kusecount); | |
1c79356b | 1396 | } |
91447636 A |
1397 | if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) { |
1398 | /* | |
1399 | * vnode is still busy... if we're the last | |
1400 | * usecount, mark for a future call to VNOP_INACTIVE | |
1401 | * when the iocount finally drops to 0 | |
1402 | */ | |
1403 | if (vp->v_usecount == 0) { | |
1404 | vp->v_lflag |= VL_NEEDINACTIVE; | |
1405 | vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); | |
1406 | } | |
1407 | if ( !locked) | |
1408 | vnode_unlock(vp); | |
1c79356b A |
1409 | return; |
1410 | } | |
91447636 A |
1411 | vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); |
1412 | ||
1413 | if ( (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) || dont_reenter) { | |
1414 | /* | |
1415 | * vnode is being cleaned, or | |
1416 | * we've requested that we don't reenter | |
1417 | * the filesystem on this release... in | |
1418 | * this case, we'll mark the vnode aged | |
1419 | * if it's been marked for termination | |
1c79356b | 1420 | */ |
91447636 A |
1421 | if (dont_reenter) { |
1422 | if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) | |
1423 | vp->v_lflag |= VL_NEEDINACTIVE; | |
1424 | vp->v_flag |= VAGE; | |
1c79356b | 1425 | } |
91447636 A |
1426 | vnode_list_add(vp); |
1427 | if ( !locked) | |
1428 | vnode_unlock(vp); | |
1429 | return; | |
1c79356b | 1430 | } |
91447636 A |
1431 | /* |
1432 | * at this point both the iocount and usecount | |
1433 | * are zero | |
1434 | * pick up an iocount so that we can call | |
1435 | * VNOP_INACTIVE with the vnode lock unheld | |
1436 | */ | |
1437 | vp->v_iocount++; | |
1438 | #ifdef JOE_DEBUG | |
1439 | record_vp(vp, 1); | |
1c79356b | 1440 | #endif |
91447636 A |
1441 | vp->v_lflag &= ~VL_NEEDINACTIVE; |
1442 | vnode_unlock(vp); | |
1c79356b | 1443 | |
91447636 A |
1444 | context.vc_proc = current_proc(); |
1445 | context.vc_ucred = kauth_cred_get(); | |
1446 | VNOP_INACTIVE(vp, &context); | |
1c79356b | 1447 | |
91447636 A |
1448 | vnode_lock(vp); |
1449 | /* | |
1450 | * because we dropped the vnode lock to call VNOP_INACTIVE | |
1451 | * the state of the vnode may have changed... we may have | |
1452 | * picked up an iocount, usecount or the MARKTERM may have | |
1453 | * been set... we need to reevaluate the reference counts | |
1454 | * to determine if we can call vnode_reclaim_internal at | |
1455 | * this point... if the reference counts are up, we'll pick | |
1456 | * up the MARKTERM state when they get subsequently dropped | |
1457 | */ | |
1458 | if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && | |
1459 | ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { | |
1460 | struct uthread *ut; | |
1c79356b | 1461 | |
91447636 A |
1462 | ut = get_bsdthread_info(current_thread()); |
1463 | ||
1464 | if (ut->uu_defer_reclaims) { | |
1465 | vp->v_defer_reclaimlist = ut->uu_vreclaims; | |
1466 | ut->uu_vreclaims = vp; | |
1467 | goto defer_reclaim; | |
1468 | } | |
1469 | vnode_reclaim_internal(vp, 1, 0); | |
1470 | } | |
1471 | vnode_dropiocount(vp, 1); | |
1472 | vnode_list_add(vp); | |
1473 | defer_reclaim: | |
1474 | if ( !locked) | |
1475 | vnode_unlock(vp); | |
1476 | return; | |
1c79356b A |
1477 | } |
1478 | ||
1479 | /* | |
1480 | * Remove any vnodes in the vnode table belonging to mount point mp. | |
1481 | * | |
1482 | * If MNT_NOFORCE is specified, there should not be any active ones, | |
1483 | * return error if any are found (nb: this is a user error, not a | |
1484 | * system error). If MNT_FORCE is specified, detach any active vnodes | |
1485 | * that are found. | |
1486 | */ | |
1487 | #if DIAGNOSTIC | |
1488 | int busyprt = 0; /* print out busy vnodes */ | |
1489 | #if 0 | |
1490 | struct ctldebug debug1 = { "busyprt", &busyprt }; | |
1491 | #endif /* 0 */ | |
1492 | #endif | |
1493 | ||
1494 | int | |
1495 | vflush(mp, skipvp, flags) | |
1496 | struct mount *mp; | |
1497 | struct vnode *skipvp; | |
1498 | int flags; | |
1499 | { | |
0b4e3aa0 | 1500 | struct proc *p = current_proc(); |
91447636 | 1501 | struct vnode *vp; |
1c79356b | 1502 | int busy = 0; |
91447636 A |
1503 | int reclaimed = 0; |
1504 | int vid, retval; | |
1c79356b | 1505 | |
91447636 A |
1506 | mount_lock(mp); |
1507 | vnode_iterate_setup(mp); | |
1508 | /* | |
1509 | * On regular unmounts(not forced) do a | |
1510 | * quick check for vnodes to be in use. This | |
1511 | * preserves the caching of vnodes. automounter | |
1512 | * tries unmounting every so often to see whether | |
1513 | * it is still busy or not. | |
1514 | */ | |
1515 | if ((flags & FORCECLOSE)==0) { | |
1516 | if (vnode_umount_preflight(mp, skipvp, flags)) { | |
1517 | vnode_iterate_clear(mp); | |
1518 | mount_unlock(mp); | |
1519 | return(EBUSY); | |
1520 | } | |
1521 | } | |
1c79356b | 1522 | loop: |
91447636 A |
1523 | /* it is returns 0 then there is nothing to do */ |
1524 | retval = vnode_iterate_prepare(mp); | |
1525 | ||
1526 | if (retval == 0) { | |
1527 | vnode_iterate_clear(mp); | |
1528 | mount_unlock(mp); | |
1529 | return(retval); | |
1530 | } | |
1531 | ||
1532 | /* iterate over all the vnodes */ | |
1533 | while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
1534 | vp = TAILQ_FIRST(&mp->mnt_workerqueue); | |
1535 | TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); | |
1536 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
1537 | if ( (vp->v_mount != mp) || (vp == skipvp)) { | |
1538 | continue; | |
1539 | } | |
1540 | vid = vp->v_id; | |
1541 | mount_unlock(mp); | |
1542 | vnode_lock(vp); | |
1543 | ||
1544 | if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) { | |
1545 | vnode_unlock(vp); | |
1546 | mount_lock(mp); | |
1547 | continue; | |
1548 | } | |
1549 | ||
1c79356b | 1550 | /* |
91447636 A |
1551 | * If requested, skip over vnodes marked VSYSTEM. |
1552 | * Skip over all vnodes marked VNOFLUSH. | |
1553 | */ | |
1554 | if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || | |
1555 | (vp->v_flag & VNOFLUSH))) { | |
1556 | vnode_unlock(vp); | |
1557 | mount_lock(mp); | |
1c79356b | 1558 | continue; |
91447636 | 1559 | } |
1c79356b | 1560 | /* |
91447636 | 1561 | * If requested, skip over vnodes marked VSWAP. |
1c79356b | 1562 | */ |
91447636 A |
1563 | if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) { |
1564 | vnode_unlock(vp); | |
1565 | mount_lock(mp); | |
1c79356b A |
1566 | continue; |
1567 | } | |
1568 | /* | |
91447636 | 1569 | * If requested, skip over vnodes marked VSWAP. |
1c79356b | 1570 | */ |
91447636 A |
1571 | if ((flags & SKIPROOT) && (vp->v_flag & VROOT)) { |
1572 | vnode_unlock(vp); | |
1573 | mount_lock(mp); | |
1c79356b A |
1574 | continue; |
1575 | } | |
1576 | /* | |
1577 | * If WRITECLOSE is set, only flush out regular file | |
1578 | * vnodes open for writing. | |
1579 | */ | |
1580 | if ((flags & WRITECLOSE) && | |
1581 | (vp->v_writecount == 0 || vp->v_type != VREG)) { | |
91447636 A |
1582 | vnode_unlock(vp); |
1583 | mount_lock(mp); | |
1c79356b A |
1584 | continue; |
1585 | } | |
1586 | /* | |
91447636 | 1587 | * If the real usecount is 0, all we need to do is clear |
1c79356b A |
1588 | * out the vnode data structures and we are done. |
1589 | */ | |
91447636 A |
1590 | if (((vp->v_usecount == 0) || |
1591 | ((vp->v_usecount - vp->v_kusecount) == 0))) { | |
1592 | vp->v_iocount++; /* so that drain waits for * other iocounts */ | |
1593 | #ifdef JOE_DEBUG | |
1594 | record_vp(vp, 1); | |
1595 | #endif | |
1596 | vnode_reclaim_internal(vp, 1, 0); | |
1597 | vnode_dropiocount(vp, 1); | |
1598 | vnode_list_add(vp); | |
1599 | ||
1600 | vnode_unlock(vp); | |
1601 | reclaimed++; | |
1602 | mount_lock(mp); | |
1c79356b A |
1603 | continue; |
1604 | } | |
1605 | /* | |
1606 | * If FORCECLOSE is set, forcibly close the vnode. | |
1607 | * For block or character devices, revert to an | |
1608 | * anonymous device. For all other files, just kill them. | |
1609 | */ | |
1610 | if (flags & FORCECLOSE) { | |
1c79356b | 1611 | if (vp->v_type != VBLK && vp->v_type != VCHR) { |
91447636 A |
1612 | vp->v_iocount++; /* so that drain waits * for other iocounts */ |
1613 | #ifdef JOE_DEBUG | |
1614 | record_vp(vp, 1); | |
1615 | #endif | |
1616 | vnode_reclaim_internal(vp, 1, 0); | |
1617 | vnode_dropiocount(vp, 1); | |
1618 | vnode_list_add(vp); | |
1619 | vnode_unlock(vp); | |
1c79356b A |
1620 | } else { |
1621 | vclean(vp, 0, p); | |
91447636 | 1622 | vp->v_lflag &= ~VL_DEAD; |
1c79356b | 1623 | vp->v_op = spec_vnodeop_p; |
91447636 | 1624 | vnode_unlock(vp); |
1c79356b | 1625 | } |
91447636 | 1626 | mount_lock(mp); |
1c79356b A |
1627 | continue; |
1628 | } | |
1629 | #if DIAGNOSTIC | |
1630 | if (busyprt) | |
1631 | vprint("vflush: busy vnode", vp); | |
1632 | #endif | |
91447636 A |
1633 | vnode_unlock(vp); |
1634 | mount_lock(mp); | |
1c79356b A |
1635 | busy++; |
1636 | } | |
91447636 A |
1637 | |
1638 | /* At this point the worker queue is completed */ | |
1639 | if (busy && ((flags & FORCECLOSE)==0) && reclaimed) { | |
1640 | busy = 0; | |
1641 | reclaimed = 0; | |
1642 | (void)vnode_iterate_reloadq(mp); | |
1643 | /* returned with mount lock held */ | |
1644 | goto loop; | |
1645 | } | |
1646 | ||
1647 | /* if new vnodes were created in between retry the reclaim */ | |
1648 | if ( vnode_iterate_reloadq(mp) != 0) { | |
1649 | if (!(busy && ((flags & FORCECLOSE)==0))) | |
1650 | goto loop; | |
1651 | } | |
1652 | vnode_iterate_clear(mp); | |
1653 | mount_unlock(mp); | |
1654 | ||
9bccf70c | 1655 | if (busy && ((flags & FORCECLOSE)==0)) |
1c79356b A |
1656 | return (EBUSY); |
1657 | return (0); | |
1658 | } | |
1659 | ||
91447636 | 1660 | int num_recycledvnodes=0; |
1c79356b A |
1661 | /* |
1662 | * Disassociate the underlying file system from a vnode. | |
91447636 | 1663 | * The vnode lock is held on entry. |
1c79356b A |
1664 | */ |
1665 | static void | |
91447636 | 1666 | vclean(vnode_t vp, int flags, proc_t p) |
1c79356b | 1667 | { |
91447636 | 1668 | struct vfs_context context; |
1c79356b | 1669 | int active; |
91447636 A |
1670 | int need_inactive; |
1671 | int already_terminating; | |
1672 | kauth_cred_t ucred = NULL; | |
1c79356b | 1673 | |
91447636 A |
1674 | context.vc_proc = p; |
1675 | context.vc_ucred = kauth_cred_get(); | |
1c79356b A |
1676 | |
1677 | /* | |
1678 | * Check to see if the vnode is in use. | |
1679 | * If so we have to reference it before we clean it out | |
1680 | * so that its count cannot fall to zero and generate a | |
1681 | * race against ourselves to recycle it. | |
1682 | */ | |
91447636 | 1683 | active = vp->v_usecount; |
55e303ae | 1684 | |
91447636 A |
1685 | /* |
1686 | * just in case we missed sending a needed | |
1687 | * VNOP_INACTIVE, we'll do it now | |
1688 | */ | |
1689 | need_inactive = (vp->v_lflag & VL_NEEDINACTIVE); | |
1690 | ||
1691 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
55e303ae | 1692 | |
1c79356b A |
1693 | /* |
1694 | * Prevent the vnode from being recycled or | |
1695 | * brought into use while we clean it out. | |
1696 | */ | |
91447636 A |
1697 | already_terminating = (vp->v_lflag & VL_TERMINATE); |
1698 | ||
1699 | vp->v_lflag |= VL_TERMINATE; | |
1c79356b A |
1700 | |
1701 | /* | |
91447636 A |
1702 | * remove the vnode from any mount list |
1703 | * it might be on... | |
1c79356b | 1704 | */ |
91447636 | 1705 | insmntque(vp, (struct mount *)0); |
1c79356b | 1706 | |
91447636 A |
1707 | ucred = vp->v_cred; |
1708 | vp->v_cred = NULL; | |
1709 | ||
1710 | vnode_unlock(vp); | |
1711 | ||
1712 | if (ucred) | |
1713 | kauth_cred_rele(ucred); | |
1714 | ||
1715 | OSAddAtomic(1, &num_recycledvnodes); | |
1c79356b | 1716 | /* |
91447636 | 1717 | * purge from the name cache as early as possible... |
1c79356b | 1718 | */ |
91447636 | 1719 | cache_purge(vp); |
1c79356b | 1720 | |
0b4e3aa0 | 1721 | if (active && (flags & DOCLOSE)) |
91447636 | 1722 | VNOP_CLOSE(vp, IO_NDELAY, &context); |
1c79356b A |
1723 | |
1724 | /* | |
1725 | * Clean out any buffers associated with the vnode. | |
1726 | */ | |
1727 | if (flags & DOCLOSE) { | |
91447636 | 1728 | #if NFSCLIENT |
1c79356b | 1729 | if (vp->v_tag == VT_NFS) |
55e303ae A |
1730 | nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0); |
1731 | else | |
91447636 A |
1732 | #endif |
1733 | { | |
1734 | VNOP_FSYNC(vp, MNT_WAIT, &context); | |
1735 | buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
1736 | } | |
1737 | if (UBCINFOEXISTS(vp)) | |
1738 | /* | |
1739 | * Clean the pages in VM. | |
1740 | */ | |
1741 | (void)ubc_sync_range(vp, (off_t)0, ubc_getsize(vp), UBC_PUSHALL); | |
55e303ae | 1742 | } |
91447636 A |
1743 | if (UBCINFOEXISTS(vp)) |
1744 | cluster_release(vp->v_ubcinfo); | |
0b4e3aa0 | 1745 | |
91447636 A |
1746 | if (active || need_inactive) |
1747 | VNOP_INACTIVE(vp, &context); | |
0b4e3aa0 A |
1748 | |
1749 | /* Destroy ubc named reference */ | |
91447636 | 1750 | ubc_destroy_named(vp); |
0b4e3aa0 | 1751 | |
1c79356b A |
1752 | /* |
1753 | * Reclaim the vnode. | |
1754 | */ | |
91447636 | 1755 | if (VNOP_RECLAIM(vp, &context)) |
1c79356b | 1756 | panic("vclean: cannot reclaim"); |
55e303ae A |
1757 | |
1758 | // make sure the name & parent ptrs get cleaned out! | |
91447636 | 1759 | vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); |
55e303ae | 1760 | |
91447636 | 1761 | vnode_lock(vp); |
1c79356b | 1762 | |
91447636 | 1763 | vp->v_mount = dead_mountp; |
1c79356b A |
1764 | vp->v_op = dead_vnodeop_p; |
1765 | vp->v_tag = VT_NON; | |
91447636 | 1766 | vp->v_data = NULL; |
1c79356b | 1767 | |
91447636 | 1768 | vp->v_lflag |= VL_DEAD; |
55e303ae | 1769 | |
91447636 A |
1770 | if (already_terminating == 0) { |
1771 | vp->v_lflag &= ~VL_TERMINATE; | |
1772 | /* | |
1773 | * Done with purge, notify sleepers of the grim news. | |
1774 | */ | |
1775 | if (vp->v_lflag & VL_TERMWANT) { | |
1776 | vp->v_lflag &= ~VL_TERMWANT; | |
1777 | wakeup(&vp->v_lflag); | |
1778 | } | |
1c79356b A |
1779 | } |
1780 | } | |
1781 | ||
1782 | /* | |
1783 | * Eliminate all activity associated with the requested vnode | |
1784 | * and with all vnodes aliased to the requested vnode. | |
1785 | */ | |
1786 | int | |
91447636 | 1787 | vn_revoke(vnode_t vp, int flags, __unused vfs_context_t a_context) |
1c79356b | 1788 | { |
91447636 A |
1789 | struct vnode *vq; |
1790 | int vid; | |
1c79356b A |
1791 | |
1792 | #if DIAGNOSTIC | |
91447636 A |
1793 | if ((flags & REVOKEALL) == 0) |
1794 | panic("vnop_revoke"); | |
1c79356b A |
1795 | #endif |
1796 | ||
1c79356b A |
1797 | if (vp->v_flag & VALIASED) { |
1798 | /* | |
1799 | * If a vgone (or vclean) is already in progress, | |
1800 | * wait until it is done and return. | |
1801 | */ | |
91447636 A |
1802 | vnode_lock(vp); |
1803 | if (vp->v_lflag & VL_TERMINATE) { | |
1804 | vnode_unlock(vp); | |
1805 | return(ENOENT); | |
1c79356b | 1806 | } |
91447636 | 1807 | vnode_unlock(vp); |
1c79356b A |
1808 | /* |
1809 | * Ensure that vp will not be vgone'd while we | |
1810 | * are eliminating its aliases. | |
1811 | */ | |
91447636 | 1812 | SPECHASH_LOCK(); |
1c79356b | 1813 | while (vp->v_flag & VALIASED) { |
1c79356b A |
1814 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { |
1815 | if (vq->v_rdev != vp->v_rdev || | |
1816 | vq->v_type != vp->v_type || vp == vq) | |
1817 | continue; | |
91447636 A |
1818 | vid = vq->v_id; |
1819 | SPECHASH_UNLOCK(); | |
1820 | if (vnode_getwithvid(vq,vid)){ | |
1821 | SPECHASH_LOCK(); | |
1822 | break; | |
1823 | } | |
1824 | vnode_reclaim_internal(vq, 0, 0); | |
1825 | vnode_put(vq); | |
1826 | SPECHASH_LOCK(); | |
1c79356b A |
1827 | break; |
1828 | } | |
1c79356b | 1829 | } |
91447636 | 1830 | SPECHASH_UNLOCK(); |
1c79356b | 1831 | } |
91447636 A |
1832 | vnode_reclaim_internal(vp, 0, 0); |
1833 | ||
1c79356b A |
1834 | return (0); |
1835 | } | |
1836 | ||
1837 | /* | |
1838 | * Recycle an unused vnode to the front of the free list. | |
1839 | * Release the passed interlock if the vnode will be recycled. | |
1840 | */ | |
1841 | int | |
91447636 | 1842 | vnode_recycle(vp) |
1c79356b | 1843 | struct vnode *vp; |
1c79356b | 1844 | { |
91447636 | 1845 | vnode_lock(vp); |
1c79356b | 1846 | |
91447636 A |
1847 | if (vp->v_iocount || vp->v_usecount) { |
1848 | vp->v_lflag |= VL_MARKTERM; | |
1849 | vnode_unlock(vp); | |
1850 | return(0); | |
1851 | } | |
1852 | vnode_reclaim_internal(vp, 1, 0); | |
1853 | vnode_unlock(vp); | |
1854 | ||
1855 | return (1); | |
1c79356b A |
1856 | } |
1857 | ||
91447636 A |
1858 | static int |
1859 | vnode_reload(vnode_t vp) | |
1c79356b | 1860 | { |
91447636 | 1861 | vnode_lock(vp); |
1c79356b | 1862 | |
91447636 A |
1863 | if ((vp->v_iocount > 1) || vp->v_usecount) { |
1864 | vnode_unlock(vp); | |
1865 | return(0); | |
1866 | } | |
1867 | if (vp->v_iocount <= 0) | |
1868 | panic("vnode_reload with no iocount %d", vp->v_iocount); | |
1869 | ||
1870 | /* mark for release when iocount is dopped */ | |
1871 | vp->v_lflag |= VL_MARKTERM; | |
1872 | vnode_unlock(vp); | |
1873 | ||
1874 | return (1); | |
1c79356b A |
1875 | } |
1876 | ||
91447636 A |
1877 | |
1878 | static void | |
1879 | vgone(vnode_t vp) | |
1c79356b A |
1880 | { |
1881 | struct vnode *vq; | |
1882 | struct vnode *vx; | |
1883 | ||
1c79356b A |
1884 | /* |
1885 | * Clean out the filesystem specific data. | |
91447636 A |
1886 | * vclean also takes care of removing the |
1887 | * vnode from any mount list it might be on | |
1c79356b | 1888 | */ |
91447636 A |
1889 | vclean(vp, DOCLOSE, current_proc()); |
1890 | ||
1c79356b A |
1891 | /* |
1892 | * If special device, remove it from special device alias list | |
1893 | * if it is on one. | |
1894 | */ | |
1895 | if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { | |
91447636 A |
1896 | SPECHASH_LOCK(); |
1897 | if (*vp->v_hashchain == vp) { | |
1898 | *vp->v_hashchain = vp->v_specnext; | |
1899 | } else { | |
1900 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { | |
1901 | if (vq->v_specnext != vp) | |
1902 | continue; | |
1903 | vq->v_specnext = vp->v_specnext; | |
1904 | break; | |
1905 | } | |
1c79356b A |
1906 | if (vq == NULL) |
1907 | panic("missing bdev"); | |
1c79356b | 1908 | } |
91447636 A |
1909 | if (vp->v_flag & VALIASED) { |
1910 | vx = NULL; | |
1911 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { | |
1912 | if (vq->v_rdev != vp->v_rdev || | |
1913 | vq->v_type != vp->v_type) | |
1914 | continue; | |
1915 | if (vx) | |
1916 | break; | |
1917 | vx = vq; | |
1918 | } | |
1919 | if (vx == NULL) | |
1920 | panic("missing alias"); | |
1921 | if (vq == NULL) | |
1922 | vx->v_flag &= ~VALIASED; | |
1923 | vp->v_flag &= ~VALIASED; | |
1924 | } | |
1925 | SPECHASH_UNLOCK(); | |
1926 | { | |
1927 | struct specinfo *tmp = vp->v_specinfo; | |
1928 | vp->v_specinfo = NULL; | |
1929 | FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO); | |
1930 | } | |
1c79356b | 1931 | } |
1c79356b A |
1932 | } |
1933 | ||
1934 | /* | |
1935 | * Lookup a vnode by device number. | |
1936 | */ | |
1937 | int | |
91447636 | 1938 | check_mountedon(dev_t dev, enum vtype type, int *errorp) |
1c79356b | 1939 | { |
91447636 | 1940 | vnode_t vp; |
1c79356b | 1941 | int rc = 0; |
91447636 | 1942 | int vid; |
1c79356b | 1943 | |
91447636 A |
1944 | loop: |
1945 | SPECHASH_LOCK(); | |
1c79356b A |
1946 | for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { |
1947 | if (dev != vp->v_rdev || type != vp->v_type) | |
1948 | continue; | |
91447636 A |
1949 | vid = vp->v_id; |
1950 | SPECHASH_UNLOCK(); | |
1951 | if (vnode_getwithvid(vp,vid)) | |
1952 | goto loop; | |
1953 | vnode_lock(vp); | |
1954 | if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { | |
1955 | vnode_unlock(vp); | |
1956 | if ((*errorp = vfs_mountedon(vp)) != 0) | |
1957 | rc = 1; | |
1958 | } else | |
1959 | vnode_unlock(vp); | |
1960 | vnode_put(vp); | |
1961 | return(rc); | |
1c79356b | 1962 | } |
91447636 A |
1963 | SPECHASH_UNLOCK(); |
1964 | return (0); | |
1c79356b A |
1965 | } |
1966 | ||
1967 | /* | |
1968 | * Calculate the total number of references to a special device. | |
1969 | */ | |
1970 | int | |
91447636 | 1971 | vcount(vnode_t vp) |
1c79356b | 1972 | { |
91447636 | 1973 | vnode_t vq, vnext; |
1c79356b | 1974 | int count; |
91447636 | 1975 | int vid; |
1c79356b A |
1976 | |
1977 | loop: | |
1978 | if ((vp->v_flag & VALIASED) == 0) | |
91447636 A |
1979 | return (vp->v_usecount - vp->v_kusecount); |
1980 | ||
1981 | SPECHASH_LOCK(); | |
1c79356b A |
1982 | for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { |
1983 | vnext = vq->v_specnext; | |
1984 | if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) | |
1985 | continue; | |
91447636 A |
1986 | vid = vq->v_id; |
1987 | SPECHASH_UNLOCK(); | |
1988 | ||
1989 | if (vnode_getwithvid(vq, vid)) { | |
1990 | goto loop; | |
1991 | } | |
1c79356b A |
1992 | /* |
1993 | * Alias, but not in use, so flush it out. | |
1994 | */ | |
91447636 A |
1995 | vnode_lock(vq); |
1996 | if ((vq->v_usecount == 0) && (vq->v_iocount == 1) && vq != vp) { | |
1997 | vnode_reclaim_internal(vq, 1, 0); | |
1998 | vnode_unlock(vq); | |
1999 | vnode_put(vq); | |
1c79356b A |
2000 | goto loop; |
2001 | } | |
91447636 A |
2002 | count += (vq->v_usecount - vq->v_kusecount); |
2003 | vnode_unlock(vq); | |
2004 | vnode_put(vq); | |
2005 | ||
2006 | SPECHASH_LOCK(); | |
1c79356b | 2007 | } |
91447636 A |
2008 | SPECHASH_UNLOCK(); |
2009 | ||
1c79356b A |
2010 | return (count); |
2011 | } | |
2012 | ||
2013 | int prtactive = 0; /* 1 => print out reclaim of active vnodes */ | |
2014 | ||
2015 | /* | |
2016 | * Print out a description of a vnode. | |
2017 | */ | |
2018 | static char *typename[] = | |
2019 | { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; | |
2020 | ||
2021 | void | |
91447636 | 2022 | vprint(const char *label, struct vnode *vp) |
1c79356b | 2023 | { |
91447636 | 2024 | char sbuf[64]; |
1c79356b A |
2025 | |
2026 | if (label != NULL) | |
2027 | printf("%s: ", label); | |
91447636 A |
2028 | printf("type %s, usecount %d, writecount %d", |
2029 | typename[vp->v_type], vp->v_usecount, vp->v_writecount); | |
2030 | sbuf[0] = '\0'; | |
1c79356b | 2031 | if (vp->v_flag & VROOT) |
91447636 | 2032 | strcat(sbuf, "|VROOT"); |
1c79356b | 2033 | if (vp->v_flag & VTEXT) |
91447636 | 2034 | strcat(sbuf, "|VTEXT"); |
1c79356b | 2035 | if (vp->v_flag & VSYSTEM) |
91447636 | 2036 | strcat(sbuf, "|VSYSTEM"); |
9bccf70c | 2037 | if (vp->v_flag & VNOFLUSH) |
91447636 | 2038 | strcat(sbuf, "|VNOFLUSH"); |
1c79356b | 2039 | if (vp->v_flag & VBWAIT) |
91447636 | 2040 | strcat(sbuf, "|VBWAIT"); |
1c79356b | 2041 | if (vp->v_flag & VALIASED) |
91447636 A |
2042 | strcat(sbuf, "|VALIASED"); |
2043 | if (sbuf[0] != '\0') | |
2044 | printf(" flags (%s)", &sbuf[1]); | |
1c79356b A |
2045 | } |
2046 | ||
1c79356b | 2047 | |
91447636 A |
2048 | int |
2049 | vn_getpath(struct vnode *vp, char *pathbuf, int *len) | |
2050 | { | |
2051 | return build_path(vp, pathbuf, *len, len); | |
1c79356b | 2052 | } |
91447636 A |
2053 | |
2054 | ||
2055 | static char *extension_table=NULL; | |
2056 | static int nexts; | |
2057 | static int max_ext_width; | |
1c79356b | 2058 | |
55e303ae | 2059 | static int |
91447636 A |
2060 | extension_cmp(void *a, void *b) |
2061 | { | |
2062 | return (strlen((char *)a) - strlen((char *)b)); | |
2063 | } | |
55e303ae | 2064 | |
55e303ae | 2065 | |
91447636 A |
2066 | // |
2067 | // This is the api LaunchServices uses to inform the kernel | |
2068 | // the list of package extensions to ignore. | |
2069 | // | |
2070 | // Internally we keep the list sorted by the length of the | |
2071 | // the extension (from longest to shortest). We sort the | |
2072 | // list of extensions so that we can speed up our searches | |
2073 | // when comparing file names -- we only compare extensions | |
2074 | // that could possibly fit into the file name, not all of | |
2075 | // them (i.e. a short 8 character name can't have an 8 | |
2076 | // character extension). | |
2077 | // | |
2078 | __private_extern__ int | |
2079 | set_package_extensions_table(void *data, int nentries, int maxwidth) | |
2080 | { | |
2081 | char *new_exts, *ptr; | |
2082 | int error, i, len; | |
2083 | ||
2084 | if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) { | |
2085 | return EINVAL; | |
2086 | } | |
2087 | ||
2088 | MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK); | |
2089 | ||
2090 | error = copyin(CAST_USER_ADDR_T(data), new_exts, nentries * maxwidth); | |
2091 | if (error) { | |
2092 | FREE(new_exts, M_TEMP); | |
2093 | return error; | |
2094 | } | |
2095 | ||
2096 | if (extension_table) { | |
2097 | FREE(extension_table, M_TEMP); | |
2098 | } | |
2099 | extension_table = new_exts; | |
2100 | nexts = nentries; | |
2101 | max_ext_width = maxwidth; | |
2102 | ||
2103 | qsort(extension_table, nexts, maxwidth, extension_cmp); | |
2104 | ||
2105 | return 0; | |
2106 | } | |
2107 | ||
2108 | ||
2109 | __private_extern__ int | |
2110 | is_package_name(char *name, int len) | |
2111 | { | |
2112 | int i, extlen; | |
2113 | char *ptr, *name_ext; | |
2114 | ||
2115 | if (len <= 3) { | |
2116 | return 0; | |
2117 | } | |
2118 | ||
2119 | name_ext = NULL; | |
2120 | for(ptr=name; *ptr != '\0'; ptr++) { | |
2121 | if (*ptr == '.') { | |
2122 | name_ext = ptr; | |
55e303ae | 2123 | } |
91447636 | 2124 | } |
55e303ae | 2125 | |
91447636 A |
2126 | // if there is no "." extension, it can't match |
2127 | if (name_ext == NULL) { | |
2128 | return 0; | |
2129 | } | |
55e303ae | 2130 | |
91447636 A |
2131 | // advance over the "." |
2132 | name_ext++; | |
55e303ae | 2133 | |
91447636 A |
2134 | // now iterate over all the extensions to see if any match |
2135 | ptr = &extension_table[0]; | |
2136 | for(i=0; i < nexts; i++, ptr+=max_ext_width) { | |
2137 | extlen = strlen(ptr); | |
2138 | if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') { | |
2139 | // aha, a match! | |
2140 | return 1; | |
55e303ae A |
2141 | } |
2142 | } | |
2143 | ||
91447636 A |
2144 | // if we get here, no extension matched |
2145 | return 0; | |
55e303ae A |
2146 | } |
2147 | ||
91447636 A |
2148 | int |
2149 | vn_path_package_check(__unused vnode_t vp, char *path, int pathlen, int *component) | |
55e303ae | 2150 | { |
91447636 A |
2151 | char *ptr, *end; |
2152 | int comp=0; | |
2153 | ||
2154 | *component = -1; | |
2155 | if (*path != '/') { | |
2156 | return EINVAL; | |
2157 | } | |
2158 | ||
2159 | end = path + 1; | |
2160 | while(end < path + pathlen && *end != '\0') { | |
2161 | while(end < path + pathlen && *end == '/' && *end != '\0') { | |
2162 | end++; | |
2163 | } | |
2164 | ||
2165 | ptr = end; | |
2166 | ||
2167 | while(end < path + pathlen && *end != '/' && *end != '\0') { | |
2168 | end++; | |
2169 | } | |
2170 | ||
2171 | if (end > path + pathlen) { | |
2172 | // hmm, string wasn't null terminated | |
2173 | return EINVAL; | |
2174 | } | |
2175 | ||
2176 | *end = '\0'; | |
2177 | if (is_package_name(ptr, end - ptr)) { | |
2178 | *component = comp; | |
2179 | break; | |
2180 | } | |
55e303ae | 2181 | |
91447636 A |
2182 | end++; |
2183 | comp++; | |
2184 | } | |
2185 | ||
2186 | return 0; | |
2187 | } | |
55e303ae A |
2188 | |
2189 | ||
1c79356b A |
2190 | /* |
2191 | * Top level filesystem related information gathering. | |
2192 | */ | |
91447636 A |
2193 | extern unsigned int vfs_nummntops; |
2194 | ||
1c79356b | 2195 | int |
91447636 A |
2196 | vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, |
2197 | user_addr_t newp, size_t newlen, struct proc *p) | |
1c79356b | 2198 | { |
91447636 | 2199 | struct vfstable *vfsp; |
55e303ae A |
2200 | int *username; |
2201 | u_int usernamelen; | |
2202 | int error; | |
91447636 | 2203 | struct vfsconf *vfsc; |
1c79356b | 2204 | |
9bccf70c A |
2205 | /* |
2206 | * The VFS_NUMMNTOPS shouldn't be at name[0] since | |
2207 | * is a VFS generic variable. So now we must check | |
2208 | * namelen so we don't end up covering any UFS | |
2209 | * variables (sinc UFS vfc_typenum is 1). | |
2210 | * | |
2211 | * It should have been: | |
2212 | * name[0]: VFS_GENERIC | |
2213 | * name[1]: VFS_NUMMNTOPS | |
2214 | */ | |
2215 | if (namelen == 1 && name[0] == VFS_NUMMNTOPS) { | |
1c79356b A |
2216 | return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops)); |
2217 | } | |
2218 | ||
2219 | /* all sysctl names at this level are at least name and field */ | |
2220 | if (namelen < 2) | |
55e303ae | 2221 | return (EISDIR); /* overloaded */ |
1c79356b | 2222 | if (name[0] != VFS_GENERIC) { |
91447636 A |
2223 | struct vfs_context context; |
2224 | ||
1c79356b A |
2225 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) |
2226 | if (vfsp->vfc_typenum == name[0]) | |
2227 | break; | |
2228 | if (vfsp == NULL) | |
91447636 A |
2229 | return (ENOTSUP); |
2230 | context.vc_proc = p; | |
2231 | context.vc_ucred = kauth_cred_get(); | |
2232 | ||
1c79356b | 2233 | return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, |
91447636 | 2234 | oldp, oldlenp, newp, newlen, &context)); |
1c79356b A |
2235 | } |
2236 | switch (name[1]) { | |
2237 | case VFS_MAXTYPENUM: | |
2238 | return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); | |
2239 | case VFS_CONF: | |
2240 | if (namelen < 3) | |
2241 | return (ENOTDIR); /* overloaded */ | |
2242 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) | |
2243 | if (vfsp->vfc_typenum == name[2]) | |
2244 | break; | |
2245 | if (vfsp == NULL) | |
91447636 A |
2246 | return (ENOTSUP); |
2247 | vfsc = (struct vfsconf *)vfsp; | |
2248 | if (proc_is64bit(p)) { | |
2249 | struct user_vfsconf usr_vfsc; | |
2250 | usr_vfsc.vfc_vfsops = CAST_USER_ADDR_T(vfsc->vfc_vfsops); | |
2251 | bcopy(vfsc->vfc_name, usr_vfsc.vfc_name, sizeof(usr_vfsc.vfc_name)); | |
2252 | usr_vfsc.vfc_typenum = vfsc->vfc_typenum; | |
2253 | usr_vfsc.vfc_refcount = vfsc->vfc_refcount; | |
2254 | usr_vfsc.vfc_flags = vfsc->vfc_flags; | |
2255 | usr_vfsc.vfc_mountroot = CAST_USER_ADDR_T(vfsc->vfc_mountroot); | |
2256 | usr_vfsc.vfc_next = CAST_USER_ADDR_T(vfsc->vfc_next); | |
2257 | return (sysctl_rdstruct(oldp, oldlenp, newp, &usr_vfsc, | |
2258 | sizeof(usr_vfsc))); | |
2259 | } | |
2260 | else { | |
2261 | return (sysctl_rdstruct(oldp, oldlenp, newp, vfsc, | |
2262 | sizeof(struct vfsconf))); | |
2263 | } | |
2264 | ||
2265 | case VFS_SET_PACKAGE_EXTS: | |
2266 | return set_package_extensions_table((void *)name[1], name[2], name[3]); | |
1c79356b | 2267 | } |
55e303ae A |
2268 | /* |
2269 | * We need to get back into the general MIB, so we need to re-prepend | |
2270 | * CTL_VFS to our name and try userland_sysctl(). | |
2271 | */ | |
2272 | usernamelen = namelen + 1; | |
2273 | MALLOC(username, int *, usernamelen * sizeof(*username), | |
2274 | M_TEMP, M_WAITOK); | |
2275 | bcopy(name, username + 1, namelen * sizeof(*name)); | |
2276 | username[0] = CTL_VFS; | |
91447636 A |
2277 | error = userland_sysctl(p, username, usernamelen, oldp, |
2278 | oldlenp, 1, newp, newlen, oldlenp); | |
55e303ae A |
2279 | FREE(username, M_TEMP); |
2280 | return (error); | |
1c79356b A |
2281 | } |
2282 | ||
2283 | int kinfo_vdebug = 1; | |
2284 | #define KINFO_VNODESLOP 10 | |
2285 | /* | |
2286 | * Dump vnode list (via sysctl). | |
2287 | * Copyout address of vnode followed by vnode. | |
2288 | */ | |
2289 | /* ARGSUSED */ | |
2290 | int | |
91447636 | 2291 | sysctl_vnode(__unused user_addr_t where, __unused size_t *sizep) |
1c79356b | 2292 | { |
91447636 | 2293 | #if 0 |
1c79356b A |
2294 | struct mount *mp, *nmp; |
2295 | struct vnode *nvp, *vp; | |
2296 | char *bp = where, *savebp; | |
2297 | char *ewhere; | |
2298 | int error; | |
2299 | ||
2300 | #define VPTRSZ sizeof (struct vnode *) | |
2301 | #define VNODESZ sizeof (struct vnode) | |
2302 | if (where == NULL) { | |
2303 | *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); | |
2304 | return (0); | |
2305 | } | |
2306 | ewhere = where + *sizep; | |
2307 | ||
1c79356b | 2308 | for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { |
91447636 | 2309 | if (vfs_busy(mp, LK_NOWAIT)) { |
1c79356b A |
2310 | nmp = mp->mnt_list.cqe_next; |
2311 | continue; | |
2312 | } | |
2313 | savebp = bp; | |
2314 | again: | |
91447636 | 2315 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { |
1c79356b A |
2316 | /* |
2317 | * Check that the vp is still associated with | |
2318 | * this filesystem. RACE: could have been | |
2319 | * recycled onto the same filesystem. | |
2320 | */ | |
2321 | if (vp->v_mount != mp) { | |
1c79356b A |
2322 | if (kinfo_vdebug) |
2323 | printf("kinfo: vp changed\n"); | |
2324 | bp = savebp; | |
2325 | goto again; | |
2326 | } | |
1c79356b | 2327 | if (bp + VPTRSZ + VNODESZ > ewhere) { |
91447636 | 2328 | vfs_unbusy(mp); |
1c79356b A |
2329 | *sizep = bp - where; |
2330 | return (ENOMEM); | |
2331 | } | |
1c79356b | 2332 | if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || |
55e303ae | 2333 | (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) { |
91447636 | 2334 | vfs_unbusy(mp); |
1c79356b | 2335 | return (error); |
55e303ae | 2336 | } |
1c79356b | 2337 | bp += VPTRSZ + VNODESZ; |
1c79356b | 2338 | } |
1c79356b | 2339 | nmp = mp->mnt_list.cqe_next; |
91447636 | 2340 | vfs_unbusy(mp); |
1c79356b | 2341 | } |
1c79356b A |
2342 | |
2343 | *sizep = bp - where; | |
2344 | return (0); | |
91447636 A |
2345 | #else |
2346 | return(EINVAL); | |
2347 | #endif | |
1c79356b A |
2348 | } |
2349 | ||
2350 | /* | |
2351 | * Check to see if a filesystem is mounted on a block device. | |
2352 | */ | |
2353 | int | |
2354 | vfs_mountedon(vp) | |
2355 | struct vnode *vp; | |
2356 | { | |
2357 | struct vnode *vq; | |
2358 | int error = 0; | |
2359 | ||
91447636 A |
2360 | SPECHASH_LOCK(); |
2361 | if (vp->v_specflags & SI_MOUNTEDON) { | |
2362 | error = EBUSY; | |
2363 | goto out; | |
2364 | } | |
1c79356b | 2365 | if (vp->v_flag & VALIASED) { |
1c79356b A |
2366 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { |
2367 | if (vq->v_rdev != vp->v_rdev || | |
2368 | vq->v_type != vp->v_type) | |
2369 | continue; | |
2370 | if (vq->v_specflags & SI_MOUNTEDON) { | |
2371 | error = EBUSY; | |
2372 | break; | |
2373 | } | |
2374 | } | |
1c79356b | 2375 | } |
91447636 A |
2376 | out: |
2377 | SPECHASH_UNLOCK(); | |
1c79356b A |
2378 | return (error); |
2379 | } | |
2380 | ||
2381 | /* | |
2382 | * Unmount all filesystems. The list is traversed in reverse order | |
2383 | * of mounting to avoid dependencies. | |
2384 | */ | |
0b4e3aa0 | 2385 | __private_extern__ void |
1c79356b A |
2386 | vfs_unmountall() |
2387 | { | |
91447636 | 2388 | struct mount *mp; |
0b4e3aa0 | 2389 | struct proc *p = current_proc(); |
91447636 | 2390 | int error; |
1c79356b A |
2391 | |
2392 | /* | |
2393 | * Since this only runs when rebooting, it is not interlocked. | |
2394 | */ | |
91447636 A |
2395 | mount_list_lock(); |
2396 | while(!TAILQ_EMPTY(&mountlist)) { | |
2397 | mp = TAILQ_LAST(&mountlist, mntlist); | |
2398 | mount_list_unlock(); | |
2399 | error = dounmount(mp, MNT_FORCE, p); | |
2400 | if (error) { | |
2401 | mount_list_lock(); | |
2402 | TAILQ_REMOVE(&mountlist, mp, mnt_list); | |
2403 | printf("unmount of %s failed (", mp->mnt_vfsstat.f_mntonname); | |
2404 | if (error == EBUSY) | |
2405 | printf("BUSY)\n"); | |
2406 | else | |
2407 | printf("%d)\n", error); | |
2408 | continue; | |
1c79356b | 2409 | } |
91447636 | 2410 | mount_list_lock(); |
1c79356b | 2411 | } |
91447636 | 2412 | mount_list_unlock(); |
1c79356b A |
2413 | } |
2414 | ||
1c79356b | 2415 | |
91447636 A |
2416 | /* |
2417 | * This routine is called from vnode_pager_no_senders() | |
2418 | * which in turn can be called with vnode locked by vnode_uncache() | |
2419 | * But it could also get called as a result of vm_object_cache_trim(). | |
2420 | * In that case lock state is unknown. | |
2421 | * AGE the vnode so that it gets recycled quickly. | |
1c79356b | 2422 | */ |
91447636 A |
2423 | __private_extern__ void |
2424 | vnode_pager_vrele(struct vnode *vp) | |
1c79356b | 2425 | { |
91447636 | 2426 | vnode_lock(vp); |
1c79356b | 2427 | |
91447636 A |
2428 | if (!ISSET(vp->v_lflag, VL_TERMINATE)) |
2429 | panic("vnode_pager_vrele: vp not in termination"); | |
2430 | vp->v_lflag &= ~VNAMED_UBC; | |
1c79356b | 2431 | |
91447636 A |
2432 | if (UBCINFOEXISTS(vp)) { |
2433 | struct ubc_info *uip = vp->v_ubcinfo; | |
1c79356b | 2434 | |
91447636 A |
2435 | if (ISSET(uip->ui_flags, UI_WASMAPPED)) |
2436 | SET(vp->v_flag, VWASMAPPED); | |
2437 | vp->v_ubcinfo = UBC_INFO_NULL; | |
2438 | ||
2439 | ubc_info_deallocate(uip); | |
2440 | } else { | |
2441 | panic("NO ubcinfo in vnode_pager_vrele"); | |
1c79356b | 2442 | } |
91447636 A |
2443 | vnode_unlock(vp); |
2444 | ||
2445 | wakeup(&vp->v_lflag); | |
1c79356b A |
2446 | } |
2447 | ||
91447636 A |
2448 | |
2449 | #include <sys/disk.h> | |
2450 | ||
2451 | errno_t | |
2452 | vfs_init_io_attributes(vnode_t devvp, mount_t mp) | |
1c79356b | 2453 | { |
91447636 A |
2454 | int error; |
2455 | off_t readblockcnt; | |
2456 | off_t writeblockcnt; | |
2457 | off_t readmaxcnt; | |
2458 | off_t writemaxcnt; | |
2459 | off_t readsegcnt; | |
2460 | off_t writesegcnt; | |
2461 | off_t readsegsize; | |
2462 | off_t writesegsize; | |
2463 | u_long blksize; | |
2464 | u_int64_t temp; | |
2465 | struct vfs_context context; | |
1c79356b | 2466 | |
91447636 | 2467 | proc_t p = current_proc(); |
0b4e3aa0 | 2468 | |
91447636 A |
2469 | context.vc_proc = p; |
2470 | context.vc_ucred = kauth_cred_get(); | |
0b4e3aa0 | 2471 | |
55e303ae A |
2472 | int isvirtual = 0; |
2473 | /* | |
2474 | * determine if this mount point exists on the same device as the root | |
2475 | * partition... if so, then it comes under the hard throttle control | |
2476 | */ | |
2477 | int thisunit = -1; | |
2478 | static int rootunit = -1; | |
55e303ae A |
2479 | |
2480 | if (rootunit == -1) { | |
91447636 | 2481 | if (VNOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, &context)) |
55e303ae A |
2482 | rootunit = -1; |
2483 | else if (rootvp == devvp) | |
2484 | mp->mnt_kern_flag |= MNTK_ROOTDEV; | |
2485 | } | |
2486 | if (devvp != rootvp && rootunit != -1) { | |
91447636 | 2487 | if (VNOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, &context) == 0) { |
55e303ae A |
2488 | if (thisunit == rootunit) |
2489 | mp->mnt_kern_flag |= MNTK_ROOTDEV; | |
2490 | } | |
2491 | } | |
91447636 A |
2492 | /* |
2493 | * force the spec device to re-cache | |
2494 | * the underlying block size in case | |
2495 | * the filesystem overrode the initial value | |
2496 | */ | |
2497 | set_fsblocksize(devvp); | |
2498 | ||
2499 | ||
2500 | if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, | |
2501 | (caddr_t)&blksize, 0, &context))) | |
2502 | return (error); | |
2503 | ||
2504 | mp->mnt_devblocksize = blksize; | |
2505 | ||
2506 | if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, &context) == 0) { | |
55e303ae A |
2507 | if (isvirtual) |
2508 | mp->mnt_kern_flag |= MNTK_VIRTUALDEV; | |
2509 | } | |
2510 | ||
91447636 A |
2511 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, |
2512 | (caddr_t)&readblockcnt, 0, &context))) | |
0b4e3aa0 A |
2513 | return (error); |
2514 | ||
91447636 A |
2515 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, |
2516 | (caddr_t)&writeblockcnt, 0, &context))) | |
55e303ae A |
2517 | return (error); |
2518 | ||
91447636 A |
2519 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, |
2520 | (caddr_t)&readmaxcnt, 0, &context))) | |
55e303ae A |
2521 | return (error); |
2522 | ||
91447636 A |
2523 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, |
2524 | (caddr_t)&writemaxcnt, 0, &context))) | |
0b4e3aa0 A |
2525 | return (error); |
2526 | ||
91447636 A |
2527 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, |
2528 | (caddr_t)&readsegcnt, 0, &context))) | |
0b4e3aa0 A |
2529 | return (error); |
2530 | ||
91447636 A |
2531 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE, |
2532 | (caddr_t)&writesegcnt, 0, &context))) | |
55e303ae A |
2533 | return (error); |
2534 | ||
91447636 A |
2535 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD, |
2536 | (caddr_t)&readsegsize, 0, &context))) | |
55e303ae A |
2537 | return (error); |
2538 | ||
91447636 A |
2539 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, |
2540 | (caddr_t)&writesegsize, 0, &context))) | |
0b4e3aa0 A |
2541 | return (error); |
2542 | ||
55e303ae A |
2543 | if (readmaxcnt) |
2544 | temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt; | |
2545 | else { | |
2546 | if (readblockcnt) { | |
2547 | temp = readblockcnt * blksize; | |
2548 | temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; | |
2549 | } else | |
2550 | temp = MAXPHYS; | |
2551 | } | |
91447636 | 2552 | mp->mnt_maxreadcnt = (u_int32_t)temp; |
55e303ae A |
2553 | |
2554 | if (writemaxcnt) | |
2555 | temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt; | |
2556 | else { | |
2557 | if (writeblockcnt) { | |
2558 | temp = writeblockcnt * blksize; | |
2559 | temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; | |
2560 | } else | |
2561 | temp = MAXPHYS; | |
2562 | } | |
0b4e3aa0 A |
2563 | mp->mnt_maxwritecnt = (u_int32_t)temp; |
2564 | ||
55e303ae A |
2565 | if (readsegcnt) { |
2566 | temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt; | |
2567 | mp->mnt_segreadcnt = (u_int16_t)temp; | |
2568 | } | |
2569 | if (writesegcnt) { | |
2570 | temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt; | |
2571 | mp->mnt_segwritecnt = (u_int16_t)temp; | |
2572 | } | |
2573 | if (readsegsize) | |
2574 | temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize; | |
2575 | else | |
2576 | temp = mp->mnt_maxreadcnt; | |
91447636 | 2577 | mp->mnt_maxsegreadsize = (u_int32_t)temp; |
0b4e3aa0 | 2578 | |
55e303ae A |
2579 | if (writesegsize) |
2580 | temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize; | |
2581 | else | |
2582 | temp = mp->mnt_maxwritecnt; | |
91447636 | 2583 | mp->mnt_maxsegwritesize = (u_int32_t)temp; |
0b4e3aa0 | 2584 | |
55e303ae A |
2585 | return (error); |
2586 | } | |
2587 | ||
2588 | static struct klist fs_klist; | |
2589 | ||
2590 | void | |
2591 | vfs_event_init(void) | |
2592 | { | |
2593 | ||
2594 | klist_init(&fs_klist); | |
2595 | } | |
2596 | ||
2597 | void | |
91447636 | 2598 | vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data) |
55e303ae A |
2599 | { |
2600 | ||
2601 | KNOTE(&fs_klist, event); | |
2602 | } | |
2603 | ||
2604 | /* | |
2605 | * return the number of mounted filesystems. | |
2606 | */ | |
2607 | static int | |
2608 | sysctl_vfs_getvfscnt(void) | |
2609 | { | |
91447636 A |
2610 | return(mount_getvfscnt()); |
2611 | } | |
2612 | ||
0b4e3aa0 | 2613 | |
91447636 A |
2614 | static int |
2615 | mount_getvfscnt(void) | |
2616 | { | |
2617 | int ret; | |
2618 | ||
2619 | mount_list_lock(); | |
2620 | ret = nummounts; | |
2621 | mount_list_unlock(); | |
55e303ae | 2622 | return (ret); |
91447636 A |
2623 | |
2624 | } | |
2625 | ||
2626 | ||
2627 | ||
2628 | static int | |
2629 | mount_fillfsids(fsid_t *fsidlst, int count) | |
2630 | { | |
2631 | struct mount *mp; | |
2632 | int actual=0; | |
2633 | ||
2634 | actual = 0; | |
2635 | mount_list_lock(); | |
2636 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
2637 | if (actual <= count) { | |
2638 | fsidlst[actual] = mp->mnt_vfsstat.f_fsid; | |
2639 | actual++; | |
2640 | } | |
2641 | } | |
2642 | mount_list_unlock(); | |
2643 | return (actual); | |
2644 | ||
55e303ae A |
2645 | } |
2646 | ||
2647 | /* | |
2648 | * fill in the array of fsid_t's up to a max of 'count', the actual | |
2649 | * number filled in will be set in '*actual'. If there are more fsid_t's | |
2650 | * than room in fsidlst then ENOMEM will be returned and '*actual' will | |
2651 | * have the actual count. | |
2652 | * having *actual filled out even in the error case is depended upon. | |
2653 | */ | |
2654 | static int | |
2655 | sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual) | |
2656 | { | |
2657 | struct mount *mp; | |
2658 | ||
2659 | *actual = 0; | |
91447636 A |
2660 | mount_list_lock(); |
2661 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
55e303ae A |
2662 | (*actual)++; |
2663 | if (*actual <= count) | |
91447636 | 2664 | fsidlst[(*actual) - 1] = mp->mnt_vfsstat.f_fsid; |
55e303ae | 2665 | } |
91447636 | 2666 | mount_list_unlock(); |
55e303ae A |
2667 | return (*actual <= count ? 0 : ENOMEM); |
2668 | } | |
2669 | ||
2670 | static int | |
2671 | sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS | |
2672 | { | |
2673 | int actual, error; | |
2674 | size_t space; | |
2675 | fsid_t *fsidlst; | |
2676 | ||
2677 | /* This is a readonly node. */ | |
91447636 | 2678 | if (req->newptr != USER_ADDR_NULL) |
55e303ae A |
2679 | return (EPERM); |
2680 | ||
2681 | /* they are querying us so just return the space required. */ | |
91447636 | 2682 | if (req->oldptr == USER_ADDR_NULL) { |
55e303ae A |
2683 | req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t); |
2684 | return 0; | |
2685 | } | |
2686 | again: | |
2687 | /* | |
2688 | * Retrieve an accurate count of the amount of space required to copy | |
2689 | * out all the fsids in the system. | |
2690 | */ | |
2691 | space = req->oldlen; | |
2692 | req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t); | |
2693 | ||
2694 | /* they didn't give us enough space. */ | |
2695 | if (space < req->oldlen) | |
2696 | return (ENOMEM); | |
2697 | ||
2698 | MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK); | |
2699 | error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t), | |
2700 | &actual); | |
2701 | /* | |
2702 | * If we get back ENOMEM, then another mount has been added while we | |
2703 | * slept in malloc above. If this is the case then try again. | |
2704 | */ | |
2705 | if (error == ENOMEM) { | |
2706 | FREE(fsidlst, M_TEMP); | |
2707 | req->oldlen = space; | |
2708 | goto again; | |
2709 | } | |
2710 | if (error == 0) { | |
2711 | error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t)); | |
2712 | } | |
2713 | FREE(fsidlst, M_TEMP); | |
2714 | return (error); | |
2715 | } | |
2716 | ||
2717 | /* | |
2718 | * Do a sysctl by fsid. | |
2719 | */ | |
2720 | static int | |
2721 | sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS | |
2722 | { | |
2723 | struct vfsidctl vc; | |
91447636 | 2724 | struct user_vfsidctl user_vc; |
55e303ae | 2725 | struct mount *mp; |
91447636 | 2726 | struct vfsstatfs *sp; |
55e303ae A |
2727 | struct proc *p; |
2728 | int *name; | |
2729 | int error, flags, namelen; | |
91447636 A |
2730 | struct vfs_context context; |
2731 | boolean_t is_64_bit; | |
55e303ae A |
2732 | |
2733 | name = arg1; | |
2734 | namelen = arg2; | |
2735 | p = req->p; | |
91447636 A |
2736 | context.vc_proc = p; |
2737 | context.vc_ucred = kauth_cred_get(); | |
2738 | is_64_bit = proc_is64bit(p); | |
55e303ae | 2739 | |
91447636 A |
2740 | if (is_64_bit) { |
2741 | error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); | |
2742 | if (error) | |
2743 | return (error); | |
2744 | if (user_vc.vc_vers != VFS_CTL_VERS1) | |
2745 | return (EINVAL); | |
2746 | mp = mount_list_lookupby_fsid(&user_vc.vc_fsid, 0, 0); | |
2747 | } | |
2748 | else { | |
2749 | error = SYSCTL_IN(req, &vc, sizeof(vc)); | |
2750 | if (error) | |
2751 | return (error); | |
2752 | if (vc.vc_vers != VFS_CTL_VERS1) | |
2753 | return (EINVAL); | |
2754 | mp = mount_list_lookupby_fsid(&vc.vc_fsid, 0, 0); | |
2755 | } | |
55e303ae A |
2756 | if (mp == NULL) |
2757 | return (ENOENT); | |
2758 | /* reset so that the fs specific code can fetch it. */ | |
2759 | req->newidx = 0; | |
2760 | /* | |
2761 | * Note if this is a VFS_CTL then we pass the actual sysctl req | |
2762 | * in for "oldp" so that the lower layer can DTRT and use the | |
2763 | * SYSCTL_IN/OUT routines. | |
2764 | */ | |
2765 | if (mp->mnt_op->vfs_sysctl != NULL) { | |
91447636 A |
2766 | if (is_64_bit) { |
2767 | if (vfs_64bitready(mp)) { | |
2768 | error = mp->mnt_op->vfs_sysctl(name, namelen, | |
2769 | CAST_USER_ADDR_T(req), | |
2770 | NULL, USER_ADDR_NULL, 0, | |
2771 | &context); | |
2772 | } | |
2773 | else { | |
2774 | error = ENOTSUP; | |
2775 | } | |
2776 | } | |
2777 | else { | |
2778 | error = mp->mnt_op->vfs_sysctl(name, namelen, | |
2779 | CAST_USER_ADDR_T(req), | |
2780 | NULL, USER_ADDR_NULL, 0, | |
2781 | &context); | |
2782 | } | |
2783 | if (error != ENOTSUP) | |
55e303ae A |
2784 | return (error); |
2785 | } | |
2786 | switch (name[0]) { | |
2787 | case VFS_CTL_UMOUNT: | |
91447636 A |
2788 | req->newidx = 0; |
2789 | if (is_64_bit) { | |
2790 | req->newptr = user_vc.vc_ptr; | |
2791 | req->newlen = (size_t)user_vc.vc_len; | |
2792 | } | |
2793 | else { | |
2794 | req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); | |
2795 | req->newlen = vc.vc_len; | |
2796 | } | |
55e303ae A |
2797 | error = SYSCTL_IN(req, &flags, sizeof(flags)); |
2798 | if (error) | |
2799 | break; | |
2800 | error = safedounmount(mp, flags, p); | |
2801 | break; | |
2802 | case VFS_CTL_STATFS: | |
91447636 A |
2803 | req->newidx = 0; |
2804 | if (is_64_bit) { | |
2805 | req->newptr = user_vc.vc_ptr; | |
2806 | req->newlen = (size_t)user_vc.vc_len; | |
2807 | } | |
2808 | else { | |
2809 | req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); | |
2810 | req->newlen = vc.vc_len; | |
2811 | } | |
55e303ae A |
2812 | error = SYSCTL_IN(req, &flags, sizeof(flags)); |
2813 | if (error) | |
2814 | break; | |
91447636 | 2815 | sp = &mp->mnt_vfsstat; |
55e303ae | 2816 | if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) && |
91447636 | 2817 | (error = vfs_update_vfsstat(mp, &context))) |
55e303ae | 2818 | return (error); |
91447636 A |
2819 | if (is_64_bit) { |
2820 | struct user_statfs sfs; | |
2821 | bzero(&sfs, sizeof(sfs)); | |
2822 | sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; | |
2823 | sfs.f_type = mp->mnt_vtable->vfc_typenum; | |
2824 | sfs.f_bsize = (user_long_t)sp->f_bsize; | |
2825 | sfs.f_iosize = (user_long_t)sp->f_iosize; | |
2826 | sfs.f_blocks = (user_long_t)sp->f_blocks; | |
2827 | sfs.f_bfree = (user_long_t)sp->f_bfree; | |
2828 | sfs.f_bavail = (user_long_t)sp->f_bavail; | |
2829 | sfs.f_files = (user_long_t)sp->f_files; | |
2830 | sfs.f_ffree = (user_long_t)sp->f_ffree; | |
2831 | sfs.f_fsid = sp->f_fsid; | |
2832 | sfs.f_owner = sp->f_owner; | |
2833 | ||
2834 | strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); | |
2835 | strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); | |
2836 | strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); | |
2837 | ||
2838 | error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); | |
2839 | } | |
2840 | else { | |
2841 | struct statfs sfs; | |
2842 | bzero(&sfs, sizeof(struct statfs)); | |
2843 | sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; | |
2844 | sfs.f_type = mp->mnt_vtable->vfc_typenum; | |
2845 | ||
2846 | /* | |
2847 | * It's possible for there to be more than 2^^31 blocks in the filesystem, so we | |
2848 | * have to fudge the numbers here in that case. We inflate the blocksize in order | |
2849 | * to reflect the filesystem size as best we can. | |
2850 | */ | |
2851 | if (sp->f_blocks > LONG_MAX) { | |
2852 | int shift; | |
2853 | ||
2854 | /* | |
2855 | * Work out how far we have to shift the block count down to make it fit. | |
2856 | * Note that it's possible to have to shift so far that the resulting | |
2857 | * blocksize would be unreportably large. At that point, we will clip | |
2858 | * any values that don't fit. | |
2859 | * | |
2860 | * For safety's sake, we also ensure that f_iosize is never reported as | |
2861 | * being smaller than f_bsize. | |
2862 | */ | |
2863 | for (shift = 0; shift < 32; shift++) { | |
2864 | if ((sp->f_blocks >> shift) <= LONG_MAX) | |
2865 | break; | |
2866 | if ((sp->f_bsize << (shift + 1)) > LONG_MAX) | |
2867 | break; | |
2868 | } | |
2869 | #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) | |
2870 | sfs.f_blocks = (long)__SHIFT_OR_CLIP(sp->f_blocks, shift); | |
2871 | sfs.f_bfree = (long)__SHIFT_OR_CLIP(sp->f_bfree, shift); | |
2872 | sfs.f_bavail = (long)__SHIFT_OR_CLIP(sp->f_bavail, shift); | |
2873 | #undef __SHIFT_OR_CLIP | |
2874 | sfs.f_bsize = (long)(sp->f_bsize << shift); | |
2875 | sfs.f_iosize = lmax(sp->f_iosize, sp->f_bsize); | |
2876 | } else { | |
2877 | sfs.f_bsize = (long)sp->f_bsize; | |
2878 | sfs.f_iosize = (long)sp->f_iosize; | |
2879 | sfs.f_blocks = (long)sp->f_blocks; | |
2880 | sfs.f_bfree = (long)sp->f_bfree; | |
2881 | sfs.f_bavail = (long)sp->f_bavail; | |
2882 | } | |
2883 | sfs.f_files = (long)sp->f_files; | |
2884 | sfs.f_ffree = (long)sp->f_ffree; | |
2885 | sfs.f_fsid = sp->f_fsid; | |
2886 | sfs.f_owner = sp->f_owner; | |
2887 | ||
2888 | strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); | |
2889 | strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); | |
2890 | strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); | |
2891 | ||
2892 | error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); | |
2893 | } | |
55e303ae A |
2894 | break; |
2895 | default: | |
91447636 | 2896 | return (ENOTSUP); |
55e303ae | 2897 | } |
0b4e3aa0 A |
2898 | return (error); |
2899 | } | |
2900 | ||
55e303ae A |
2901 | static int filt_fsattach(struct knote *kn); |
2902 | static void filt_fsdetach(struct knote *kn); | |
2903 | static int filt_fsevent(struct knote *kn, long hint); | |
2904 | ||
2905 | struct filterops fs_filtops = | |
2906 | { 0, filt_fsattach, filt_fsdetach, filt_fsevent }; | |
2907 | ||
2908 | static int | |
2909 | filt_fsattach(struct knote *kn) | |
2910 | { | |
2911 | ||
2912 | kn->kn_flags |= EV_CLEAR; | |
2913 | KNOTE_ATTACH(&fs_klist, kn); | |
2914 | return (0); | |
2915 | } | |
2916 | ||
2917 | static void | |
2918 | filt_fsdetach(struct knote *kn) | |
2919 | { | |
2920 | ||
2921 | KNOTE_DETACH(&fs_klist, kn); | |
2922 | } | |
2923 | ||
2924 | static int | |
2925 | filt_fsevent(struct knote *kn, long hint) | |
2926 | { | |
2927 | ||
2928 | kn->kn_fflags |= hint; | |
2929 | return (kn->kn_fflags != 0); | |
2930 | } | |
2931 | ||
2932 | static int | |
2933 | sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS | |
2934 | { | |
2935 | int out, error; | |
2936 | pid_t pid; | |
2937 | size_t space; | |
2938 | struct proc *p; | |
2939 | ||
2940 | /* We need a pid. */ | |
91447636 | 2941 | if (req->newptr == USER_ADDR_NULL) |
55e303ae A |
2942 | return (EINVAL); |
2943 | ||
2944 | error = SYSCTL_IN(req, &pid, sizeof(pid)); | |
2945 | if (error) | |
2946 | return (error); | |
2947 | ||
2948 | p = pfind(pid < 0 ? -pid : pid); | |
2949 | if (p == NULL) | |
2950 | return (ESRCH); | |
2951 | ||
2952 | /* | |
2953 | * Fetching the value is ok, but we only fetch if the old | |
2954 | * pointer is given. | |
2955 | */ | |
91447636 | 2956 | if (req->oldptr != USER_ADDR_NULL) { |
55e303ae A |
2957 | out = !((p->p_flag & P_NOREMOTEHANG) == 0); |
2958 | error = SYSCTL_OUT(req, &out, sizeof(out)); | |
2959 | return (error); | |
2960 | } | |
2961 | ||
91447636 | 2962 | /* XXX req->p->p_ucred -> kauth_cred_get() ??? */ |
55e303ae A |
2963 | /* cansignal offers us enough security. */ |
2964 | if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0) | |
2965 | return (EPERM); | |
2966 | ||
2967 | if (pid < 0) | |
2968 | p->p_flag &= ~P_NOREMOTEHANG; | |
2969 | else | |
2970 | p->p_flag |= P_NOREMOTEHANG; | |
2971 | ||
2972 | return (0); | |
2973 | } | |
2974 | /* the vfs.generic. branch. */ | |
2975 | SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge"); | |
2976 | /* retreive a list of mounted filesystem fsid_t */ | |
2977 | SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD, | |
2978 | 0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids"); | |
2979 | /* perform operations on filesystem via fsid_t */ | |
2980 | SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW, | |
2981 | sysctl_vfs_ctlbyfsid, "ctlbyfsid"); | |
2982 | SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW, | |
2983 | 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang"); | |
91447636 A |
2984 | |
2985 | ||
2986 | int num_reusedvnodes=0; | |
55e303ae | 2987 | |
91447636 A |
2988 | static int |
2989 | new_vnode(vnode_t *vpp) | |
2990 | { | |
2991 | vnode_t vp; | |
2992 | int retries = 0; /* retry incase of tablefull */ | |
2993 | int vpid; | |
2994 | struct timespec ts; | |
2995 | ||
2996 | retry: | |
2997 | vnode_list_lock(); | |
2998 | ||
2999 | if ( !TAILQ_EMPTY(&vnode_free_list)) { | |
3000 | /* | |
3001 | * Pick the first vp for possible reuse | |
3002 | */ | |
3003 | vp = TAILQ_FIRST(&vnode_free_list); | |
3004 | ||
3005 | if (vp->v_lflag & VL_DEAD) | |
3006 | goto steal_this_vp; | |
3007 | } else | |
3008 | vp = NULL; | |
3009 | ||
3010 | /* | |
3011 | * we're either empty, or the next guy on the | |
3012 | * list is a valid vnode... if we're under the | |
3013 | * limit, we'll create a new vnode | |
3014 | */ | |
3015 | if (numvnodes < desiredvnodes) { | |
3016 | numvnodes++; | |
3017 | vnode_list_unlock(); | |
3018 | MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK); | |
3019 | bzero((char *)vp, sizeof *vp); | |
3020 | VLISTNONE(vp); /* avoid double queue removal */ | |
3021 | lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr); | |
3022 | ||
3023 | nanouptime(&ts); | |
3024 | vp->v_id = ts.tv_nsec; | |
3025 | vp->v_flag = VSTANDARD; | |
3026 | ||
3027 | goto done; | |
3028 | } | |
3029 | if (vp == NULL) { | |
3030 | /* | |
3031 | * we've reached the system imposed maximum number of vnodes | |
3032 | * but there isn't a single one available | |
3033 | * wait a bit and then retry... if we can't get a vnode | |
3034 | * after 100 retries, than log a complaint | |
3035 | */ | |
3036 | if (++retries <= 100) { | |
3037 | vnode_list_unlock(); | |
3038 | IOSleep(1); | |
3039 | goto retry; | |
3040 | } | |
3041 | ||
3042 | vnode_list_unlock(); | |
3043 | tablefull("vnode"); | |
3044 | log(LOG_EMERG, "%d desired, %d numvnodes, " | |
3045 | "%d free, %d inactive\n", | |
3046 | desiredvnodes, numvnodes, freevnodes, inactivevnodes); | |
3047 | *vpp = 0; | |
3048 | return (ENFILE); | |
3049 | } | |
3050 | steal_this_vp: | |
3051 | vpid = vp->v_id; | |
3052 | ||
3053 | VREMFREE("new_vnode", vp); | |
3054 | VLISTNONE(vp); | |
3055 | ||
3056 | vnode_list_unlock(); | |
3057 | vnode_lock(vp); | |
3058 | ||
3059 | /* | |
3060 | * We could wait for the vnode_lock after removing the vp from the freelist | |
3061 | * and the vid is bumped only at the very end of reclaim. So it is possible | |
3062 | * that we are looking at a vnode that is being terminated. If so skip it. | |
3063 | */ | |
3064 | if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || | |
3065 | VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) { | |
3066 | /* | |
3067 | * we lost the race between dropping the list lock | |
3068 | * and picking up the vnode_lock... someone else | |
3069 | * used this vnode and it is now in a new state | |
3070 | * so we need to go back and try again | |
3071 | */ | |
3072 | vnode_unlock(vp); | |
3073 | goto retry; | |
3074 | } | |
3075 | if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) { | |
3076 | /* | |
3077 | * we did a vnode_rele_ext that asked for | |
3078 | * us not to reenter the filesystem during | |
3079 | * the release even though VL_NEEDINACTIVE was | |
3080 | * set... we'll do it here by doing a | |
3081 | * vnode_get/vnode_put | |
3082 | * | |
3083 | * pick up an iocount so that we can call | |
3084 | * vnode_put and drive the VNOP_INACTIVE... | |
3085 | * vnode_put will either leave us off | |
3086 | * the freelist if a new ref comes in, | |
3087 | * or put us back on the end of the freelist | |
3088 | * or recycle us if we were marked for termination... | |
3089 | * so we'll just go grab a new candidate | |
3090 | */ | |
3091 | vp->v_iocount++; | |
3092 | #ifdef JOE_DEBUG | |
3093 | record_vp(vp, 1); | |
3094 | #endif | |
3095 | vnode_put_locked(vp); | |
3096 | vnode_unlock(vp); | |
3097 | goto retry; | |
3098 | } | |
3099 | OSAddAtomic(1, &num_reusedvnodes); | |
3100 | ||
3101 | /* Checks for anyone racing us for recycle */ | |
3102 | if (vp->v_type != VBAD) { | |
3103 | if (vp->v_lflag & VL_DEAD) | |
3104 | panic("new_vnode: the vnode is VL_DEAD but not VBAD"); | |
3105 | ||
3106 | (void)vnode_reclaim_internal(vp, 1, 1); | |
3107 | ||
3108 | if ((VONLIST(vp))) | |
3109 | panic("new_vnode: vp on list "); | |
3110 | if (vp->v_usecount || vp->v_iocount || vp->v_kusecount || | |
3111 | (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH))) | |
3112 | panic("new_vnode: free vnode still referenced\n"); | |
3113 | if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0)) | |
3114 | panic("new_vnode: vnode seems to be on mount list "); | |
3115 | if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren)) | |
3116 | panic("new_vnode: vnode still hooked into the name cache"); | |
3117 | } | |
3118 | if (vp->v_unsafefs) { | |
3119 | lck_mtx_destroy(&vp->v_unsafefs->fsnodelock, vnode_lck_grp); | |
3120 | FREE_ZONE((void *)vp->v_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS); | |
3121 | vp->v_unsafefs = (struct unsafe_fsnode *)NULL; | |
3122 | } | |
3123 | vp->v_lflag = 0; | |
3124 | vp->v_writecount = 0; | |
3125 | vp->v_references = 0; | |
3126 | vp->v_iterblkflags = 0; | |
3127 | vp->v_flag = VSTANDARD; | |
3128 | /* vbad vnodes can point to dead_mountp */ | |
3129 | vp->v_mount = 0; | |
3130 | vp->v_defer_reclaimlist = (vnode_t)0; | |
3131 | ||
3132 | vnode_unlock(vp); | |
3133 | done: | |
3134 | *vpp = vp; | |
3135 | ||
3136 | return (0); | |
3137 | } | |
3138 | ||
3139 | void | |
3140 | vnode_lock(vnode_t vp) | |
3141 | { | |
3142 | lck_mtx_lock(&vp->v_lock); | |
3143 | } | |
3144 | ||
3145 | void | |
3146 | vnode_unlock(vnode_t vp) | |
3147 | { | |
3148 | lck_mtx_unlock(&vp->v_lock); | |
3149 | } | |
3150 | ||
3151 | ||
3152 | ||
3153 | int | |
3154 | vnode_get(struct vnode *vp) | |
3155 | { | |
3156 | vnode_lock(vp); | |
3157 | ||
3158 | if ( (vp->v_iocount == 0) && (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) ) { | |
3159 | vnode_unlock(vp); | |
3160 | return(ENOENT); | |
3161 | } | |
3162 | vp->v_iocount++; | |
3163 | #ifdef JOE_DEBUG | |
3164 | record_vp(vp, 1); | |
3165 | #endif | |
3166 | vnode_unlock(vp); | |
3167 | ||
3168 | return(0); | |
3169 | } | |
3170 | ||
3171 | int | |
3172 | vnode_getwithvid(vnode_t vp, int vid) | |
3173 | { | |
3174 | return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID))); | |
3175 | } | |
3176 | ||
3177 | int | |
3178 | vnode_getwithref(vnode_t vp) | |
3179 | { | |
3180 | return(vget_internal(vp, 0, 0)); | |
3181 | } | |
3182 | ||
3183 | ||
3184 | int | |
3185 | vnode_put(vnode_t vp) | |
3186 | { | |
3187 | int retval; | |
3188 | ||
3189 | vnode_lock(vp); | |
3190 | retval = vnode_put_locked(vp); | |
3191 | vnode_unlock(vp); | |
3192 | ||
3193 | return(retval); | |
3194 | } | |
3195 | ||
3196 | int | |
3197 | vnode_put_locked(vnode_t vp) | |
3198 | { | |
3199 | struct vfs_context context; | |
3200 | ||
3201 | retry: | |
3202 | if (vp->v_iocount < 1) | |
3203 | panic("vnode_put(%x): iocount < 1", vp); | |
3204 | ||
3205 | if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { | |
3206 | vnode_dropiocount(vp, 1); | |
3207 | return(0); | |
3208 | } | |
3209 | if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) { | |
3210 | ||
3211 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
3212 | vnode_unlock(vp); | |
3213 | ||
3214 | context.vc_proc = current_proc(); | |
3215 | context.vc_ucred = kauth_cred_get(); | |
3216 | VNOP_INACTIVE(vp, &context); | |
3217 | ||
3218 | vnode_lock(vp); | |
3219 | /* | |
3220 | * because we had to drop the vnode lock before calling | |
3221 | * VNOP_INACTIVE, the state of this vnode may have changed... | |
3222 | * we may pick up both VL_MARTERM and either | |
3223 | * an iocount or a usecount while in the VNOP_INACTIVE call | |
3224 | * we don't want to call vnode_reclaim_internal on a vnode | |
3225 | * that has active references on it... so loop back around | |
3226 | * and reevaluate the state | |
3227 | */ | |
3228 | goto retry; | |
3229 | } | |
3230 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
3231 | ||
3232 | if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM) | |
3233 | vnode_reclaim_internal(vp, 1, 0); | |
3234 | ||
3235 | vnode_dropiocount(vp, 1); | |
3236 | vnode_list_add(vp); | |
3237 | ||
3238 | return(0); | |
3239 | } | |
3240 | ||
3241 | /* is vnode_t in use by others? */ | |
3242 | int | |
3243 | vnode_isinuse(vnode_t vp, int refcnt) | |
3244 | { | |
3245 | return(vnode_isinuse_locked(vp, refcnt, 0)); | |
3246 | } | |
3247 | ||
3248 | ||
3249 | static int | |
3250 | vnode_isinuse_locked(vnode_t vp, int refcnt, int locked) | |
3251 | { | |
3252 | int retval = 0; | |
3253 | ||
3254 | if (!locked) | |
3255 | vnode_lock(vp); | |
3256 | if ((vp->v_type != VREG) && (vp->v_usecount > refcnt)) { | |
3257 | retval = 1; | |
3258 | goto out; | |
3259 | } | |
3260 | if (vp->v_type == VREG) { | |
3261 | retval = ubc_isinuse_locked(vp, refcnt, 1); | |
3262 | } | |
3263 | ||
3264 | out: | |
3265 | if (!locked) | |
3266 | vnode_unlock(vp); | |
3267 | return(retval); | |
3268 | } | |
3269 | ||
3270 | ||
3271 | /* resume vnode_t */ | |
3272 | errno_t | |
3273 | vnode_resume(vnode_t vp) | |
3274 | { | |
3275 | ||
3276 | vnode_lock(vp); | |
3277 | ||
3278 | if (vp->v_owner == current_thread()) { | |
3279 | vp->v_lflag &= ~VL_SUSPENDED; | |
3280 | vp->v_owner = 0; | |
3281 | vnode_unlock(vp); | |
3282 | wakeup(&vp->v_iocount); | |
3283 | } else | |
3284 | vnode_unlock(vp); | |
3285 | ||
3286 | return(0); | |
3287 | } | |
3288 | ||
3289 | static errno_t | |
3290 | vnode_drain(vnode_t vp) | |
3291 | { | |
3292 | ||
3293 | if (vp->v_lflag & VL_DRAIN) { | |
3294 | panic("vnode_drain: recursuve drain"); | |
3295 | return(ENOENT); | |
3296 | } | |
3297 | vp->v_lflag |= VL_DRAIN; | |
3298 | vp->v_owner = current_thread(); | |
3299 | ||
3300 | while (vp->v_iocount > 1) | |
3301 | msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", 0); | |
3302 | return(0); | |
3303 | } | |
3304 | ||
3305 | ||
3306 | /* | |
3307 | * if the number of recent references via vnode_getwithvid or vnode_getwithref | |
3308 | * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from | |
3309 | * the LRU list if it's currently on it... once the iocount and usecount both drop | |
3310 | * to 0, it will get put back on the end of the list, effectively making it younger | |
3311 | * this allows us to keep actively referenced vnodes in the list without having | |
3312 | * to constantly remove and add to the list each time a vnode w/o a usecount is | |
3313 | * referenced which costs us taking and dropping a global lock twice. | |
3314 | */ | |
3315 | #define UNAGE_THRESHHOLD 10 | |
3316 | ||
3317 | errno_t | |
3318 | vnode_getiocount(vnode_t vp, int locked, int vid, int vflags) | |
3319 | { | |
3320 | int nodead = vflags & VNODE_NODEAD; | |
3321 | int nosusp = vflags & VNODE_NOSUSPEND; | |
3322 | ||
3323 | if (!locked) | |
3324 | vnode_lock(vp); | |
3325 | ||
3326 | for (;;) { | |
3327 | /* | |
3328 | * if it is a dead vnode with deadfs | |
3329 | */ | |
3330 | if (nodead && (vp->v_lflag & VL_DEAD) && ((vp->v_type == VBAD) || (vp->v_data == 0))) { | |
3331 | if (!locked) | |
3332 | vnode_unlock(vp); | |
3333 | return(ENOENT); | |
3334 | } | |
3335 | /* | |
3336 | * will return VL_DEAD ones | |
3337 | */ | |
3338 | if ((vp->v_lflag & (VL_SUSPENDED | VL_DRAIN | VL_TERMINATE)) == 0 ) { | |
3339 | break; | |
3340 | } | |
3341 | /* | |
3342 | * if suspended vnodes are to be failed | |
3343 | */ | |
3344 | if (nosusp && (vp->v_lflag & VL_SUSPENDED)) { | |
3345 | if (!locked) | |
3346 | vnode_unlock(vp); | |
3347 | return(ENOENT); | |
3348 | } | |
3349 | /* | |
3350 | * if you are the owner of drain/suspend/termination , can acquire iocount | |
3351 | * check for VL_TERMINATE; it does not set owner | |
3352 | */ | |
3353 | if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED | VL_TERMINATE)) && | |
3354 | (vp->v_owner == current_thread())) { | |
3355 | break; | |
3356 | } | |
3357 | if (vp->v_lflag & VL_TERMINATE) { | |
3358 | vp->v_lflag |= VL_TERMWANT; | |
3359 | ||
3360 | msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vnode getiocount", 0); | |
3361 | } else | |
3362 | msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", 0); | |
3363 | } | |
3364 | if (vid != vp->v_id) { | |
3365 | if (!locked) | |
3366 | vnode_unlock(vp); | |
3367 | return(ENOENT); | |
3368 | } | |
3369 | if (++vp->v_references >= UNAGE_THRESHHOLD) { | |
3370 | vp->v_references = 0; | |
3371 | vnode_list_remove(vp); | |
3372 | } | |
3373 | vp->v_iocount++; | |
3374 | #ifdef JOE_DEBUG | |
3375 | record_vp(vp, 1); | |
3376 | #endif | |
3377 | if (!locked) | |
3378 | vnode_unlock(vp); | |
3379 | return(0); | |
3380 | } | |
3381 | ||
3382 | static void | |
3383 | vnode_dropiocount (vnode_t vp, int locked) | |
3384 | { | |
3385 | if (!locked) | |
3386 | vnode_lock(vp); | |
3387 | if (vp->v_iocount < 1) | |
3388 | panic("vnode_dropiocount(%x): v_iocount < 1", vp); | |
3389 | ||
3390 | vp->v_iocount--; | |
3391 | #ifdef JOE_DEBUG | |
3392 | record_vp(vp, -1); | |
3393 | #endif | |
3394 | if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1)) | |
3395 | wakeup(&vp->v_iocount); | |
3396 | ||
3397 | if (!locked) | |
3398 | vnode_unlock(vp); | |
3399 | } | |
3400 | ||
3401 | ||
3402 | void | |
3403 | vnode_reclaim(struct vnode * vp) | |
3404 | { | |
3405 | vnode_reclaim_internal(vp, 0, 0); | |
3406 | } | |
3407 | ||
3408 | __private_extern__ | |
3409 | void | |
3410 | vnode_reclaim_internal(struct vnode * vp, int locked, int reuse) | |
3411 | { | |
3412 | int isfifo = 0; | |
3413 | ||
3414 | if (!locked) | |
3415 | vnode_lock(vp); | |
3416 | ||
3417 | if (vp->v_lflag & VL_TERMINATE) { | |
3418 | panic("vnode reclaim in progress"); | |
3419 | } | |
3420 | vp->v_lflag |= VL_TERMINATE; | |
3421 | ||
3422 | if (vnode_drain(vp)) { | |
3423 | panic("vnode drain failed"); | |
3424 | vnode_unlock(vp); | |
3425 | return; | |
3426 | } | |
3427 | isfifo = (vp->v_type == VFIFO); | |
3428 | ||
3429 | if (vp->v_type != VBAD) | |
3430 | vgone(vp); /* clean and reclaim the vnode */ | |
3431 | ||
3432 | /* | |
3433 | * give the vnode a new identity so | |
3434 | * that vnode_getwithvid will fail | |
3435 | * on any stale cache accesses | |
3436 | */ | |
3437 | vp->v_id++; | |
3438 | if (isfifo) { | |
3439 | struct fifoinfo * fip; | |
3440 | ||
3441 | fip = vp->v_fifoinfo; | |
3442 | vp->v_fifoinfo = NULL; | |
3443 | FREE(fip, M_TEMP); | |
3444 | } | |
3445 | ||
3446 | vp->v_type = VBAD; | |
3447 | ||
3448 | if (vp->v_data) | |
3449 | panic("vnode_reclaim_internal: cleaned vnode isn't"); | |
3450 | if (vp->v_numoutput) | |
3451 | panic("vnode_reclaim_internal: Clean vnode has pending I/O's"); | |
3452 | if (UBCINFOEXISTS(vp)) | |
3453 | panic("vnode_reclaim_internal: ubcinfo not cleaned"); | |
3454 | if (vp->v_parent) | |
3455 | panic("vnode_reclaim_internal: vparent not removed"); | |
3456 | if (vp->v_name) | |
3457 | panic("vnode_reclaim_internal: vname not removed"); | |
3458 | ||
3459 | vp->v_socket = 0; | |
3460 | ||
3461 | vp->v_lflag &= ~VL_TERMINATE; | |
3462 | vp->v_lflag &= ~VL_DRAIN; | |
3463 | vp->v_owner = 0; | |
3464 | ||
3465 | if (vp->v_lflag & VL_TERMWANT) { | |
3466 | vp->v_lflag &= ~VL_TERMWANT; | |
3467 | wakeup(&vp->v_lflag); | |
3468 | } | |
3469 | if (!reuse && vp->v_usecount == 0) | |
3470 | vnode_list_add(vp); | |
3471 | if (!locked) | |
3472 | vnode_unlock(vp); | |
3473 | } | |
3474 | ||
3475 | /* USAGE: | |
3476 | * The following api creates a vnode and associates all the parameter specified in vnode_fsparam | |
3477 | * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias | |
3478 | * is obsoleted by this. | |
3479 | * vnode_create(int flavor, size_t size, void * param, vnode_t *vp) | |
3480 | */ | |
3481 | int | |
3482 | vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) | |
3483 | { | |
3484 | int error; | |
3485 | int insert = 1; | |
3486 | vnode_t vp; | |
3487 | vnode_t nvp; | |
3488 | vnode_t dvp; | |
3489 | struct componentname *cnp; | |
3490 | struct vnode_fsparam *param = (struct vnode_fsparam *)data; | |
3491 | ||
3492 | if (flavor == VNCREATE_FLAVOR && (size == VCREATESIZE) && param) { | |
3493 | if ( (error = new_vnode(&vp)) ) { | |
3494 | return(error); | |
3495 | } else { | |
3496 | dvp = param->vnfs_dvp; | |
3497 | cnp = param->vnfs_cnp; | |
3498 | ||
3499 | vp->v_op = param->vnfs_vops; | |
3500 | vp->v_type = param->vnfs_vtype; | |
3501 | vp->v_data = param->vnfs_fsnode; | |
3502 | vp->v_iocount = 1; | |
3503 | ||
3504 | if (param->vnfs_markroot) | |
3505 | vp->v_flag |= VROOT; | |
3506 | if (param->vnfs_marksystem) | |
3507 | vp->v_flag |= VSYSTEM; | |
3508 | else if (vp->v_type == VREG) { | |
3509 | /* | |
3510 | * only non SYSTEM vp | |
3511 | */ | |
3512 | error = ubc_info_init_withsize(vp, param->vnfs_filesize); | |
3513 | if (error) { | |
3514 | #ifdef JOE_DEBUG | |
3515 | record_vp(vp, 1); | |
3516 | #endif | |
3517 | vp->v_mount = 0; | |
3518 | vp->v_op = dead_vnodeop_p; | |
3519 | vp->v_tag = VT_NON; | |
3520 | vp->v_data = NULL; | |
3521 | vp->v_type = VBAD; | |
3522 | vp->v_lflag |= VL_DEAD; | |
3523 | ||
3524 | vnode_put(vp); | |
3525 | return(error); | |
3526 | } | |
3527 | } | |
3528 | #ifdef JOE_DEBUG | |
3529 | record_vp(vp, 1); | |
3530 | #endif | |
3531 | if (vp->v_type == VCHR || vp->v_type == VBLK) { | |
3532 | ||
3533 | if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) { | |
3534 | /* | |
3535 | * if checkalias returns a vnode, it will be locked | |
3536 | * | |
3537 | * first get rid of the unneeded vnode we acquired | |
3538 | */ | |
3539 | vp->v_data = NULL; | |
3540 | vp->v_op = spec_vnodeop_p; | |
3541 | vp->v_type = VBAD; | |
3542 | vp->v_lflag = VL_DEAD; | |
3543 | vp->v_data = NULL; | |
3544 | vp->v_tag = VT_NON; | |
3545 | vnode_put(vp); | |
3546 | ||
3547 | /* | |
3548 | * switch to aliased vnode and finish | |
3549 | * preparing it | |
3550 | */ | |
3551 | vp = nvp; | |
3552 | ||
3553 | vclean(vp, 0, current_proc()); | |
3554 | vp->v_op = param->vnfs_vops; | |
3555 | vp->v_type = param->vnfs_vtype; | |
3556 | vp->v_data = param->vnfs_fsnode; | |
3557 | vp->v_lflag = 0; | |
3558 | vp->v_mount = NULL; | |
3559 | insmntque(vp, param->vnfs_mp); | |
3560 | insert = 0; | |
3561 | vnode_unlock(vp); | |
3562 | } | |
3563 | } | |
3564 | ||
3565 | if (vp->v_type == VFIFO) { | |
3566 | struct fifoinfo *fip; | |
3567 | ||
3568 | MALLOC(fip, struct fifoinfo *, | |
3569 | sizeof(*fip), M_TEMP, M_WAITOK); | |
3570 | bzero(fip, sizeof(struct fifoinfo )); | |
3571 | vp->v_fifoinfo = fip; | |
3572 | } | |
3573 | /* The file systems usually pass the address of the location where | |
3574 | * where there store the vnode pointer. When we add the vnode in mount | |
3575 | * point and name cache they are discoverable. So the file system node | |
3576 | * will have the connection to vnode setup by then | |
3577 | */ | |
3578 | *vpp = vp; | |
3579 | ||
3580 | if (param->vnfs_mp) { | |
3581 | if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL) | |
3582 | vp->v_flag |= VLOCKLOCAL; | |
3583 | if (insert) { | |
3584 | /* | |
3585 | * enter in mount vnode list | |
3586 | */ | |
3587 | insmntque(vp, param->vnfs_mp); | |
3588 | } | |
3589 | #ifdef INTERIM_FSNODE_LOCK | |
3590 | if (param->vnfs_mp->mnt_vtable->vfc_threadsafe == 0) { | |
3591 | MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *, | |
3592 | sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK); | |
3593 | vp->v_unsafefs->fsnode_count = 0; | |
3594 | vp->v_unsafefs->fsnodeowner = (void *)NULL; | |
3595 | lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr); | |
3596 | } | |
3597 | #endif /* INTERIM_FSNODE_LOCK */ | |
3598 | } | |
3599 | if (dvp && vnode_ref(dvp) == 0) { | |
3600 | vp->v_parent = dvp; | |
3601 | } | |
3602 | if (cnp) { | |
3603 | if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) { | |
3604 | /* | |
3605 | * enter into name cache | |
3606 | * we've got the info to enter it into the name cache now | |
3607 | */ | |
3608 | cache_enter(dvp, vp, cnp); | |
3609 | } | |
3610 | vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); | |
3611 | } | |
3612 | if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) { | |
3613 | /* | |
3614 | * this vnode is being created as cacheable in the name cache | |
3615 | * this allows us to re-enter it in the cache | |
3616 | */ | |
3617 | vp->v_flag |= VNCACHEABLE; | |
3618 | } | |
3619 | if ((vp->v_flag & VSYSTEM) && (vp->v_type != VREG)) | |
3620 | panic("incorrect vnode setup"); | |
3621 | ||
3622 | return(0); | |
3623 | } | |
3624 | } | |
3625 | return (EINVAL); | |
3626 | } | |
3627 | ||
3628 | int | |
3629 | vnode_addfsref(vnode_t vp) | |
3630 | { | |
3631 | vnode_lock(vp); | |
3632 | if (vp->v_lflag & VNAMED_FSHASH) | |
3633 | panic("add_fsref: vp already has named reference"); | |
3634 | if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) | |
3635 | panic("addfsref: vp on the free list\n"); | |
3636 | vp->v_lflag |= VNAMED_FSHASH; | |
3637 | vnode_unlock(vp); | |
3638 | return(0); | |
3639 | ||
3640 | } | |
3641 | int | |
3642 | vnode_removefsref(vnode_t vp) | |
3643 | { | |
3644 | vnode_lock(vp); | |
3645 | if ((vp->v_lflag & VNAMED_FSHASH) == 0) | |
3646 | panic("remove_fsref: no named reference"); | |
3647 | vp->v_lflag &= ~VNAMED_FSHASH; | |
3648 | vnode_unlock(vp); | |
3649 | return(0); | |
3650 | ||
3651 | } | |
3652 | ||
3653 | ||
3654 | int | |
3655 | vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg) | |
3656 | { | |
3657 | mount_t mp; | |
3658 | int ret = 0; | |
3659 | fsid_t * fsid_list; | |
3660 | int count, actualcount, i; | |
3661 | void * allocmem; | |
3662 | ||
3663 | count = mount_getvfscnt(); | |
3664 | count += 10; | |
3665 | ||
3666 | fsid_list = (fsid_t *)kalloc(count * sizeof(fsid_t)); | |
3667 | allocmem = (void *)fsid_list; | |
3668 | ||
3669 | actualcount = mount_fillfsids(fsid_list, count); | |
3670 | ||
3671 | for (i=0; i< actualcount; i++) { | |
3672 | ||
3673 | /* obtain the mount point with iteration reference */ | |
3674 | mp = mount_list_lookupby_fsid(&fsid_list[i], 0, 1); | |
3675 | ||
3676 | if(mp == (struct mount *)0) | |
3677 | continue; | |
3678 | mount_lock(mp); | |
3679 | if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { | |
3680 | mount_unlock(mp); | |
3681 | mount_iterdrop(mp); | |
3682 | continue; | |
3683 | ||
3684 | } | |
3685 | mount_unlock(mp); | |
3686 | ||
3687 | /* iterate over all the vnodes */ | |
3688 | ret = callout(mp, arg); | |
3689 | ||
3690 | mount_iterdrop(mp); | |
3691 | ||
3692 | switch (ret) { | |
3693 | case VFS_RETURNED: | |
3694 | case VFS_RETURNED_DONE: | |
3695 | if (ret == VFS_RETURNED_DONE) { | |
3696 | ret = 0; | |
3697 | goto out; | |
3698 | } | |
3699 | break; | |
3700 | ||
3701 | case VFS_CLAIMED_DONE: | |
3702 | ret = 0; | |
3703 | goto out; | |
3704 | case VFS_CLAIMED: | |
3705 | default: | |
3706 | break; | |
3707 | } | |
3708 | ret = 0; | |
3709 | } | |
3710 | ||
3711 | out: | |
3712 | kfree(allocmem, (count * sizeof(fsid_t))); | |
3713 | return (ret); | |
3714 | } | |
3715 | ||
3716 | /* | |
3717 | * Update the vfsstatfs structure in the mountpoint. | |
3718 | */ | |
3719 | int | |
3720 | vfs_update_vfsstat(mount_t mp, vfs_context_t ctx) | |
3721 | { | |
3722 | struct vfs_attr va; | |
3723 | int error; | |
3724 | ||
3725 | /* | |
3726 | * Request the attributes we want to propagate into | |
3727 | * the per-mount vfsstat structure. | |
3728 | */ | |
3729 | VFSATTR_INIT(&va); | |
3730 | VFSATTR_WANTED(&va, f_iosize); | |
3731 | VFSATTR_WANTED(&va, f_blocks); | |
3732 | VFSATTR_WANTED(&va, f_bfree); | |
3733 | VFSATTR_WANTED(&va, f_bavail); | |
3734 | VFSATTR_WANTED(&va, f_bused); | |
3735 | VFSATTR_WANTED(&va, f_files); | |
3736 | VFSATTR_WANTED(&va, f_ffree); | |
3737 | VFSATTR_WANTED(&va, f_bsize); | |
3738 | VFSATTR_WANTED(&va, f_fssubtype); | |
3739 | if ((error = vfs_getattr(mp, &va, ctx)) != 0) { | |
3740 | KAUTH_DEBUG("STAT - filesystem returned error %d", error); | |
3741 | return(error); | |
3742 | } | |
3743 | ||
3744 | /* | |
3745 | * Unpack into the per-mount structure. | |
3746 | * | |
3747 | * We only overwrite these fields, which are likely to change: | |
3748 | * f_blocks | |
3749 | * f_bfree | |
3750 | * f_bavail | |
3751 | * f_bused | |
3752 | * f_files | |
3753 | * f_ffree | |
3754 | * | |
3755 | * And these which are not, but which the FS has no other way | |
3756 | * of providing to us: | |
3757 | * f_bsize | |
3758 | * f_iosize | |
3759 | * f_fssubtype | |
3760 | * | |
3761 | */ | |
3762 | if (VFSATTR_IS_SUPPORTED(&va, f_bsize)) { | |
3763 | mp->mnt_vfsstat.f_bsize = va.f_bsize; | |
3764 | } else { | |
3765 | mp->mnt_vfsstat.f_bsize = mp->mnt_devblocksize; /* default from the device block size */ | |
3766 | } | |
3767 | if (VFSATTR_IS_SUPPORTED(&va, f_iosize)) { | |
3768 | mp->mnt_vfsstat.f_iosize = va.f_iosize; | |
3769 | } else { | |
3770 | mp->mnt_vfsstat.f_iosize = 1024 * 1024; /* 1MB sensible I/O size */ | |
3771 | } | |
3772 | if (VFSATTR_IS_SUPPORTED(&va, f_blocks)) | |
3773 | mp->mnt_vfsstat.f_blocks = va.f_blocks; | |
3774 | if (VFSATTR_IS_SUPPORTED(&va, f_bfree)) | |
3775 | mp->mnt_vfsstat.f_bfree = va.f_bfree; | |
3776 | if (VFSATTR_IS_SUPPORTED(&va, f_bavail)) | |
3777 | mp->mnt_vfsstat.f_bavail = va.f_bavail; | |
3778 | if (VFSATTR_IS_SUPPORTED(&va, f_bused)) | |
3779 | mp->mnt_vfsstat.f_bused = va.f_bused; | |
3780 | if (VFSATTR_IS_SUPPORTED(&va, f_files)) | |
3781 | mp->mnt_vfsstat.f_files = va.f_files; | |
3782 | if (VFSATTR_IS_SUPPORTED(&va, f_ffree)) | |
3783 | mp->mnt_vfsstat.f_ffree = va.f_ffree; | |
3784 | ||
3785 | /* this is unlikely to change, but has to be queried for */ | |
3786 | if (VFSATTR_IS_SUPPORTED(&va, f_fssubtype)) | |
3787 | mp->mnt_vfsstat.f_fssubtype = va.f_fssubtype; | |
3788 | ||
3789 | return(0); | |
3790 | } | |
3791 | ||
3792 | void | |
3793 | mount_list_add(mount_t mp) | |
3794 | { | |
3795 | mount_list_lock(); | |
3796 | TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); | |
3797 | nummounts++; | |
3798 | mount_list_unlock(); | |
3799 | } | |
3800 | ||
3801 | void | |
3802 | mount_list_remove(mount_t mp) | |
3803 | { | |
3804 | mount_list_lock(); | |
3805 | TAILQ_REMOVE(&mountlist, mp, mnt_list); | |
3806 | nummounts--; | |
3807 | mp->mnt_list.tqe_next = 0; | |
3808 | mp->mnt_list.tqe_prev = 0; | |
3809 | mount_list_unlock(); | |
3810 | } | |
3811 | ||
3812 | mount_t | |
3813 | mount_lookupby_volfsid(int volfs_id, int withref) | |
3814 | { | |
3815 | mount_t cur_mount = (mount_t)0; | |
3816 | mount_t mp ; | |
3817 | ||
3818 | mount_list_lock(); | |
3819 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
3820 | if (validfsnode(mp) && mp->mnt_vfsstat.f_fsid.val[0] == volfs_id) { | |
3821 | cur_mount = mp; | |
3822 | if (withref) { | |
3823 | if (mount_iterref(cur_mount, 1)) { | |
3824 | cur_mount = (mount_t)0; | |
3825 | mount_list_unlock(); | |
3826 | goto out; | |
3827 | } | |
3828 | } | |
3829 | break; | |
3830 | } | |
3831 | } | |
3832 | mount_list_unlock(); | |
3833 | if (withref && (cur_mount != (mount_t)0)) { | |
3834 | mp = cur_mount; | |
3835 | if (vfs_busy(mp, LK_NOWAIT) != 0) { | |
3836 | cur_mount = (mount_t)0; | |
3837 | } | |
3838 | mount_iterdrop(mp); | |
3839 | } | |
3840 | out: | |
3841 | return(cur_mount); | |
3842 | } | |
3843 | ||
3844 | ||
3845 | mount_t | |
3846 | mount_list_lookupby_fsid(fsid, locked, withref) | |
3847 | fsid_t *fsid; | |
3848 | int locked; | |
3849 | int withref; | |
3850 | { | |
3851 | mount_t retmp = (mount_t)0; | |
3852 | mount_t mp; | |
3853 | ||
3854 | if (!locked) | |
3855 | mount_list_lock(); | |
3856 | TAILQ_FOREACH(mp, &mountlist, mnt_list) | |
3857 | if (mp->mnt_vfsstat.f_fsid.val[0] == fsid->val[0] && | |
3858 | mp->mnt_vfsstat.f_fsid.val[1] == fsid->val[1]) { | |
3859 | retmp = mp; | |
3860 | if (withref) { | |
3861 | if (mount_iterref(retmp, 1)) | |
3862 | retmp = (mount_t)0; | |
3863 | } | |
3864 | goto out; | |
3865 | } | |
3866 | out: | |
3867 | if (!locked) | |
3868 | mount_list_unlock(); | |
3869 | return (retmp); | |
3870 | } | |
3871 | ||
3872 | errno_t | |
3873 | vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t context) | |
3874 | { | |
3875 | struct nameidata nd; | |
3876 | int error; | |
3877 | struct vfs_context context2; | |
3878 | vfs_context_t ctx = context; | |
3879 | u_long ndflags = 0; | |
3880 | ||
3881 | if (context == NULL) { /* XXX technically an error */ | |
3882 | context2.vc_proc = current_proc(); | |
3883 | context2.vc_ucred = kauth_cred_get(); | |
3884 | ctx = &context2; | |
3885 | } | |
3886 | ||
3887 | if (flags & VNODE_LOOKUP_NOFOLLOW) | |
3888 | ndflags = NOFOLLOW; | |
3889 | else | |
3890 | ndflags = FOLLOW; | |
3891 | ||
3892 | if (flags & VNODE_LOOKUP_NOCROSSMOUNT) | |
3893 | ndflags |= NOCROSSMOUNT; | |
3894 | if (flags & VNODE_LOOKUP_DOWHITEOUT) | |
3895 | ndflags |= DOWHITEOUT; | |
3896 | ||
3897 | /* XXX AUDITVNPATH1 needed ? */ | |
3898 | NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); | |
3899 | ||
3900 | if ((error = namei(&nd))) | |
3901 | return (error); | |
3902 | *vpp = nd.ni_vp; | |
3903 | nameidone(&nd); | |
3904 | ||
3905 | return (0); | |
3906 | } | |
3907 | ||
3908 | errno_t | |
3909 | vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_context_t context) | |
3910 | { | |
3911 | struct nameidata nd; | |
3912 | int error; | |
3913 | struct vfs_context context2; | |
3914 | vfs_context_t ctx = context; | |
3915 | u_long ndflags = 0; | |
3a60a9f5 | 3916 | int lflags = flags; |
91447636 A |
3917 | |
3918 | if (context == NULL) { /* XXX technically an error */ | |
3919 | context2.vc_proc = current_proc(); | |
3920 | context2.vc_ucred = kauth_cred_get(); | |
3921 | ctx = &context2; | |
3922 | } | |
3923 | ||
3a60a9f5 A |
3924 | if (fmode & O_NOFOLLOW) |
3925 | lflags |= VNODE_LOOKUP_NOFOLLOW; | |
3926 | ||
3927 | if (lflags & VNODE_LOOKUP_NOFOLLOW) | |
91447636 A |
3928 | ndflags = NOFOLLOW; |
3929 | else | |
3930 | ndflags = FOLLOW; | |
3931 | ||
3a60a9f5 | 3932 | if (lflags & VNODE_LOOKUP_NOCROSSMOUNT) |
91447636 | 3933 | ndflags |= NOCROSSMOUNT; |
3a60a9f5 | 3934 | if (lflags & VNODE_LOOKUP_DOWHITEOUT) |
91447636 A |
3935 | ndflags |= DOWHITEOUT; |
3936 | ||
3937 | /* XXX AUDITVNPATH1 needed ? */ | |
3938 | NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); | |
3939 | ||
3940 | if ((error = vn_open(&nd, fmode, cmode))) | |
3941 | *vpp = NULL; | |
3942 | else | |
3943 | *vpp = nd.ni_vp; | |
3944 | ||
3945 | return (error); | |
3946 | } | |
3947 | ||
3948 | errno_t | |
3949 | vnode_close(vnode_t vp, int flags, vfs_context_t context) | |
3950 | { | |
3951 | kauth_cred_t cred; | |
3952 | struct proc *p; | |
3953 | int error; | |
3954 | ||
3955 | if (context) { | |
3956 | p = context->vc_proc; | |
3957 | cred = context->vc_ucred; | |
3958 | } else { | |
3959 | p = current_proc(); | |
3960 | cred = kauth_cred_get(); | |
3961 | } | |
3962 | ||
3963 | error = vn_close(vp, flags, cred, p); | |
3964 | vnode_put(vp); | |
3965 | return (error); | |
3966 | } | |
3967 | ||
3968 | errno_t | |
3969 | vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) | |
3970 | { | |
3971 | struct vnode_attr va; | |
3972 | int error; | |
3973 | ||
3974 | VATTR_INIT(&va); | |
3975 | VATTR_WANTED(&va, va_data_size); | |
3976 | error = vnode_getattr(vp, &va, ctx); | |
3977 | if (!error) | |
3978 | *sizep = va.va_data_size; | |
3979 | return(error); | |
3980 | } | |
3981 | ||
3982 | errno_t | |
3983 | vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx) | |
3984 | { | |
3985 | struct vnode_attr va; | |
3986 | ||
3987 | VATTR_INIT(&va); | |
3988 | VATTR_SET(&va, va_data_size, size); | |
3989 | va.va_vaflags = ioflag & 0xffff; | |
3990 | return(vnode_setattr(vp, &va, ctx)); | |
3991 | } | |
3992 | ||
3993 | errno_t | |
3994 | vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx) | |
3995 | { | |
3996 | kauth_acl_t oacl, nacl; | |
3997 | int initial_acl; | |
3998 | errno_t error; | |
3999 | vnode_t vp = (vnode_t)0; | |
4000 | ||
4001 | error = 0; | |
4002 | oacl = nacl = NULL; | |
4003 | initial_acl = 0; | |
4004 | ||
4005 | KAUTH_DEBUG("%p CREATE - '%s'", dvp, cnp->cn_nameptr); | |
4006 | ||
4007 | /* | |
4008 | * Handle ACL inheritance. | |
4009 | */ | |
4010 | if (!(flags & VN_CREATE_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) { | |
4011 | /* save the original filesec */ | |
4012 | if (VATTR_IS_ACTIVE(vap, va_acl)) { | |
4013 | initial_acl = 1; | |
4014 | oacl = vap->va_acl; | |
4015 | } | |
4016 | ||
4017 | vap->va_acl = NULL; | |
4018 | if ((error = kauth_acl_inherit(dvp, | |
4019 | oacl, | |
4020 | &nacl, | |
4021 | vap->va_type == VDIR, | |
4022 | ctx)) != 0) { | |
4023 | KAUTH_DEBUG("%p CREATE - error %d processing inheritance", dvp, error); | |
4024 | return(error); | |
4025 | } | |
4026 | ||
4027 | /* | |
4028 | * If the generated ACL is NULL, then we can save ourselves some effort | |
4029 | * by clearing the active bit. | |
4030 | */ | |
4031 | if (nacl == NULL) { | |
4032 | VATTR_CLEAR_ACTIVE(vap, va_acl); | |
4033 | } else { | |
4034 | VATTR_SET(vap, va_acl, nacl); | |
4035 | } | |
4036 | } | |
4037 | ||
4038 | /* | |
4039 | * Check and default new attributes. | |
4040 | * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller | |
4041 | * hasn't supplied them. | |
4042 | */ | |
4043 | if ((error = vnode_authattr_new(dvp, vap, flags & VN_CREATE_NOAUTH, ctx)) != 0) { | |
4044 | KAUTH_DEBUG("%p CREATE - error %d handing/defaulting attributes", dvp, error); | |
4045 | goto out; | |
4046 | } | |
4047 | ||
4048 | ||
4049 | /* | |
4050 | * Create the requested node. | |
4051 | */ | |
4052 | switch(vap->va_type) { | |
4053 | case VREG: | |
4054 | error = VNOP_CREATE(dvp, vpp, cnp, vap, ctx); | |
4055 | break; | |
4056 | case VDIR: | |
4057 | error = VNOP_MKDIR(dvp, vpp, cnp, vap, ctx); | |
4058 | break; | |
4059 | case VSOCK: | |
4060 | case VFIFO: | |
4061 | case VBLK: | |
4062 | case VCHR: | |
4063 | error = VNOP_MKNOD(dvp, vpp, cnp, vap, ctx); | |
4064 | break; | |
4065 | default: | |
4066 | panic("vnode_create: unknown vtype %d", vap->va_type); | |
4067 | } | |
4068 | if (error != 0) { | |
4069 | KAUTH_DEBUG("%p CREATE - error %d returned by filesystem", dvp, error); | |
4070 | goto out; | |
4071 | } | |
4072 | ||
4073 | vp = *vpp; | |
4074 | /* | |
4075 | * If some of the requested attributes weren't handled by the VNOP, | |
4076 | * use our fallback code. | |
4077 | */ | |
4078 | if (!VATTR_ALL_SUPPORTED(vap) && *vpp) { | |
4079 | KAUTH_DEBUG(" CREATE - doing fallback with ACL %p", vap->va_acl); | |
4080 | error = vnode_setattr_fallback(*vpp, vap, ctx); | |
4081 | } | |
4082 | if ((error != 0 ) && (vp != (vnode_t)0)) { | |
4083 | *vpp = (vnode_t) 0; | |
4084 | vnode_put(vp); | |
4085 | } | |
4086 | ||
4087 | out: | |
4088 | /* | |
4089 | * If the caller supplied a filesec in vap, it has been replaced | |
4090 | * now by the post-inheritance copy. We need to put the original back | |
4091 | * and free the inherited product. | |
4092 | */ | |
4093 | if (initial_acl) { | |
4094 | VATTR_SET(vap, va_acl, oacl); | |
4095 | } else { | |
4096 | VATTR_CLEAR_ACTIVE(vap, va_acl); | |
4097 | } | |
4098 | if (nacl != NULL) | |
4099 | kauth_acl_free(nacl); | |
4100 | ||
4101 | return(error); | |
4102 | } | |
4103 | ||
4104 | static kauth_scope_t vnode_scope; | |
4105 | static int vnode_authorize_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action, | |
4106 | uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3); | |
4107 | ||
4108 | typedef struct _vnode_authorize_context { | |
4109 | vnode_t vp; | |
4110 | struct vnode_attr *vap; | |
4111 | vnode_t dvp; | |
4112 | struct vnode_attr *dvap; | |
4113 | vfs_context_t ctx; | |
4114 | int flags; | |
4115 | int flags_valid; | |
4116 | #define _VAC_IS_OWNER (1<<0) | |
4117 | #define _VAC_IN_GROUP (1<<1) | |
4118 | #define _VAC_IS_DIR_OWNER (1<<2) | |
4119 | #define _VAC_IN_DIR_GROUP (1<<3) | |
4120 | } *vauth_ctx; | |
4121 | ||
4122 | void | |
4123 | vnode_authorize_init(void) | |
4124 | { | |
4125 | vnode_scope = kauth_register_scope(KAUTH_SCOPE_VNODE, vnode_authorize_callback, NULL); | |
4126 | } | |
4127 | ||
4128 | /* | |
4129 | * Authorize an operation on a vnode. | |
4130 | * | |
4131 | * This is KPI, but here because it needs vnode_scope. | |
4132 | */ | |
4133 | int | |
4134 | vnode_authorize(vnode_t vp, vnode_t dvp, kauth_action_t action, vfs_context_t context) | |
4135 | { | |
4136 | int error, result; | |
4137 | ||
4138 | /* | |
4139 | * We can't authorize against a dead vnode; allow all operations through so that | |
4140 | * the correct error can be returned. | |
4141 | */ | |
4142 | if (vp->v_type == VBAD) | |
4143 | return(0); | |
4144 | ||
4145 | error = 0; | |
4146 | result = kauth_authorize_action(vnode_scope, vfs_context_ucred(context), action, | |
4147 | (uintptr_t)context, (uintptr_t)vp, (uintptr_t)dvp, (uintptr_t)&error); | |
4148 | if (result == EPERM) /* traditional behaviour */ | |
4149 | result = EACCES; | |
4150 | /* did the lower layers give a better error return? */ | |
4151 | if ((result != 0) && (error != 0)) | |
4152 | return(error); | |
4153 | return(result); | |
4154 | } | |
4155 | ||
4156 | /* | |
4157 | * Test for vnode immutability. | |
4158 | * | |
4159 | * The 'append' flag is set when the authorization request is constrained | |
4160 | * to operations which only request the right to append to a file. | |
4161 | * | |
4162 | * The 'ignore' flag is set when an operation modifying the immutability flags | |
4163 | * is being authorized. We check the system securelevel to determine which | |
4164 | * immutability flags we can ignore. | |
4165 | */ | |
4166 | static int | |
4167 | vnode_immutable(struct vnode_attr *vap, int append, int ignore) | |
4168 | { | |
4169 | int mask; | |
4170 | ||
4171 | /* start with all bits precluding the operation */ | |
4172 | mask = IMMUTABLE | APPEND; | |
4173 | ||
4174 | /* if appending only, remove the append-only bits */ | |
4175 | if (append) | |
4176 | mask &= ~APPEND; | |
4177 | ||
4178 | /* ignore only set when authorizing flags changes */ | |
4179 | if (ignore) { | |
4180 | if (securelevel <= 0) { | |
4181 | /* in insecure state, flags do not inhibit changes */ | |
4182 | mask = 0; | |
4183 | } else { | |
4184 | /* in secure state, user flags don't inhibit */ | |
4185 | mask &= ~(UF_IMMUTABLE | UF_APPEND); | |
4186 | } | |
4187 | } | |
4188 | KAUTH_DEBUG("IMMUTABLE - file flags 0x%x mask 0x%x append = %d ignore = %d", vap->va_flags, mask, append, ignore); | |
4189 | if ((vap->va_flags & mask) != 0) | |
4190 | return(EPERM); | |
4191 | return(0); | |
4192 | } | |
4193 | ||
4194 | static int | |
4195 | vauth_node_owner(struct vnode_attr *vap, kauth_cred_t cred) | |
4196 | { | |
4197 | int result; | |
4198 | ||
4199 | /* default assumption is not-owner */ | |
4200 | result = 0; | |
4201 | ||
4202 | /* | |
4203 | * If the filesystem has given us a UID, we treat this as authoritative. | |
4204 | */ | |
4205 | if (vap && VATTR_IS_SUPPORTED(vap, va_uid)) { | |
4206 | result = (vap->va_uid == kauth_cred_getuid(cred)) ? 1 : 0; | |
4207 | } | |
4208 | /* we could test the owner UUID here if we had a policy for it */ | |
4209 | ||
4210 | return(result); | |
4211 | } | |
4212 | ||
4213 | static int | |
4214 | vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember) | |
4215 | { | |
4216 | int error; | |
4217 | int result; | |
4218 | ||
4219 | error = 0; | |
4220 | result = 0; | |
4221 | ||
4222 | /* the caller is expected to have asked the filesystem for a group at some point */ | |
4223 | if (vap && VATTR_IS_SUPPORTED(vap, va_gid)) { | |
4224 | error = kauth_cred_ismember_gid(cred, vap->va_gid, &result); | |
4225 | } | |
4226 | /* we could test the group UUID here if we had a policy for it */ | |
4227 | ||
4228 | if (!error) | |
4229 | *ismember = result; | |
4230 | return(error); | |
4231 | } | |
4232 | ||
4233 | static int | |
4234 | vauth_file_owner(vauth_ctx vcp) | |
4235 | { | |
4236 | int result; | |
4237 | ||
4238 | if (vcp->flags_valid & _VAC_IS_OWNER) { | |
4239 | result = (vcp->flags & _VAC_IS_OWNER) ? 1 : 0; | |
4240 | } else { | |
4241 | result = vauth_node_owner(vcp->vap, vcp->ctx->vc_ucred); | |
4242 | ||
4243 | /* cache our result */ | |
4244 | vcp->flags_valid |= _VAC_IS_OWNER; | |
4245 | if (result) { | |
4246 | vcp->flags |= _VAC_IS_OWNER; | |
4247 | } else { | |
4248 | vcp->flags &= ~_VAC_IS_OWNER; | |
4249 | } | |
4250 | } | |
4251 | return(result); | |
4252 | } | |
4253 | ||
4254 | static int | |
4255 | vauth_file_ingroup(vauth_ctx vcp, int *ismember) | |
4256 | { | |
4257 | int error; | |
4258 | ||
4259 | if (vcp->flags_valid & _VAC_IN_GROUP) { | |
4260 | *ismember = (vcp->flags & _VAC_IN_GROUP) ? 1 : 0; | |
4261 | error = 0; | |
4262 | } else { | |
4263 | error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember); | |
4264 | ||
4265 | if (!error) { | |
4266 | /* cache our result */ | |
4267 | vcp->flags_valid |= _VAC_IN_GROUP; | |
4268 | if (*ismember) { | |
4269 | vcp->flags |= _VAC_IN_GROUP; | |
4270 | } else { | |
4271 | vcp->flags &= ~_VAC_IN_GROUP; | |
4272 | } | |
4273 | } | |
4274 | ||
4275 | } | |
4276 | return(error); | |
4277 | } | |
4278 | ||
4279 | static int | |
4280 | vauth_dir_owner(vauth_ctx vcp) | |
4281 | { | |
4282 | int result; | |
4283 | ||
4284 | if (vcp->flags_valid & _VAC_IS_DIR_OWNER) { | |
4285 | result = (vcp->flags & _VAC_IS_DIR_OWNER) ? 1 : 0; | |
4286 | } else { | |
4287 | result = vauth_node_owner(vcp->dvap, vcp->ctx->vc_ucred); | |
4288 | ||
4289 | /* cache our result */ | |
4290 | vcp->flags_valid |= _VAC_IS_DIR_OWNER; | |
4291 | if (result) { | |
4292 | vcp->flags |= _VAC_IS_DIR_OWNER; | |
4293 | } else { | |
4294 | vcp->flags &= ~_VAC_IS_DIR_OWNER; | |
4295 | } | |
4296 | } | |
4297 | return(result); | |
4298 | } | |
4299 | ||
4300 | static int | |
4301 | vauth_dir_ingroup(vauth_ctx vcp, int *ismember) | |
4302 | { | |
4303 | int error; | |
4304 | ||
4305 | if (vcp->flags_valid & _VAC_IN_DIR_GROUP) { | |
4306 | *ismember = (vcp->flags & _VAC_IN_DIR_GROUP) ? 1 : 0; | |
4307 | error = 0; | |
4308 | } else { | |
4309 | error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember); | |
4310 | ||
4311 | if (!error) { | |
4312 | /* cache our result */ | |
4313 | vcp->flags_valid |= _VAC_IN_DIR_GROUP; | |
4314 | if (*ismember) { | |
4315 | vcp->flags |= _VAC_IN_DIR_GROUP; | |
4316 | } else { | |
4317 | vcp->flags &= ~_VAC_IN_DIR_GROUP; | |
4318 | } | |
4319 | } | |
4320 | } | |
4321 | return(error); | |
4322 | } | |
4323 | ||
4324 | /* | |
4325 | * Test the posix permissions in (vap) to determine whether (credential) | |
4326 | * may perform (action) | |
4327 | */ | |
4328 | static int | |
4329 | vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir) | |
4330 | { | |
4331 | struct vnode_attr *vap; | |
4332 | int needed, error, owner_ok, group_ok, world_ok, ismember; | |
4333 | #ifdef KAUTH_DEBUG_ENABLE | |
4334 | const char *where; | |
4335 | # define _SETWHERE(c) where = c; | |
4336 | #else | |
4337 | # define _SETWHERE(c) | |
4338 | #endif | |
4339 | ||
4340 | /* checking file or directory? */ | |
4341 | if (on_dir) { | |
4342 | vap = vcp->dvap; | |
4343 | } else { | |
4344 | vap = vcp->vap; | |
4345 | } | |
4346 | ||
4347 | error = 0; | |
4348 | ||
4349 | /* | |
4350 | * We want to do as little work here as possible. So first we check | |
4351 | * which sets of permissions grant us the access we need, and avoid checking | |
4352 | * whether specific permissions grant access when more generic ones would. | |
4353 | */ | |
4354 | ||
4355 | /* owner permissions */ | |
4356 | needed = 0; | |
4357 | if (action & VREAD) | |
4358 | needed |= S_IRUSR; | |
4359 | if (action & VWRITE) | |
4360 | needed |= S_IWUSR; | |
4361 | if (action & VEXEC) | |
4362 | needed |= S_IXUSR; | |
4363 | owner_ok = (needed & vap->va_mode) == needed; | |
4364 | ||
4365 | /* group permissions */ | |
4366 | needed = 0; | |
4367 | if (action & VREAD) | |
4368 | needed |= S_IRGRP; | |
4369 | if (action & VWRITE) | |
4370 | needed |= S_IWGRP; | |
4371 | if (action & VEXEC) | |
4372 | needed |= S_IXGRP; | |
4373 | group_ok = (needed & vap->va_mode) == needed; | |
4374 | ||
4375 | /* world permissions */ | |
4376 | needed = 0; | |
4377 | if (action & VREAD) | |
4378 | needed |= S_IROTH; | |
4379 | if (action & VWRITE) | |
4380 | needed |= S_IWOTH; | |
4381 | if (action & VEXEC) | |
4382 | needed |= S_IXOTH; | |
4383 | world_ok = (needed & vap->va_mode) == needed; | |
4384 | ||
4385 | /* If granted/denied by all three, we're done */ | |
4386 | if (owner_ok && group_ok && world_ok) { | |
4387 | _SETWHERE("all"); | |
4388 | goto out; | |
4389 | } | |
4390 | if (!owner_ok && !group_ok && !world_ok) { | |
4391 | _SETWHERE("all"); | |
4392 | error = EACCES; | |
4393 | goto out; | |
4394 | } | |
4395 | ||
4396 | /* Check ownership (relatively cheap) */ | |
4397 | if ((on_dir && vauth_dir_owner(vcp)) || | |
4398 | (!on_dir && vauth_file_owner(vcp))) { | |
4399 | _SETWHERE("user"); | |
4400 | if (!owner_ok) | |
4401 | error = EACCES; | |
4402 | goto out; | |
4403 | } | |
4404 | ||
4405 | /* Not owner; if group and world both grant it we're done */ | |
4406 | if (group_ok && world_ok) { | |
4407 | _SETWHERE("group/world"); | |
4408 | goto out; | |
4409 | } | |
4410 | if (!group_ok && !world_ok) { | |
4411 | _SETWHERE("group/world"); | |
4412 | error = EACCES; | |
4413 | goto out; | |
4414 | } | |
4415 | ||
4416 | /* Check group membership (most expensive) */ | |
4417 | ismember = 0; | |
4418 | if (on_dir) { | |
4419 | error = vauth_dir_ingroup(vcp, &ismember); | |
4420 | } else { | |
4421 | error = vauth_file_ingroup(vcp, &ismember); | |
4422 | } | |
4423 | if (error) | |
4424 | goto out; | |
4425 | if (ismember) { | |
4426 | _SETWHERE("group"); | |
4427 | if (!group_ok) | |
4428 | error = EACCES; | |
4429 | goto out; | |
4430 | } | |
4431 | ||
4432 | /* Not owner, not in group, use world result */ | |
4433 | _SETWHERE("world"); | |
4434 | if (!world_ok) | |
4435 | error = EACCES; | |
4436 | ||
4437 | /* FALLTHROUGH */ | |
4438 | ||
4439 | out: | |
4440 | KAUTH_DEBUG("%p %s - posix %s permissions : need %s%s%s %x have %s%s%s%s%s%s%s%s%s UID = %d file = %d,%d", | |
4441 | vcp->vp, (error == 0) ? "ALLOWED" : "DENIED", where, | |
4442 | (action & VREAD) ? "r" : "-", | |
4443 | (action & VWRITE) ? "w" : "-", | |
4444 | (action & VEXEC) ? "x" : "-", | |
4445 | needed, | |
4446 | (vap->va_mode & S_IRUSR) ? "r" : "-", | |
4447 | (vap->va_mode & S_IWUSR) ? "w" : "-", | |
4448 | (vap->va_mode & S_IXUSR) ? "x" : "-", | |
4449 | (vap->va_mode & S_IRGRP) ? "r" : "-", | |
4450 | (vap->va_mode & S_IWGRP) ? "w" : "-", | |
4451 | (vap->va_mode & S_IXGRP) ? "x" : "-", | |
4452 | (vap->va_mode & S_IROTH) ? "r" : "-", | |
4453 | (vap->va_mode & S_IWOTH) ? "w" : "-", | |
4454 | (vap->va_mode & S_IXOTH) ? "x" : "-", | |
4455 | kauth_cred_getuid(vcp->ctx->vc_ucred), | |
4456 | on_dir ? vcp->dvap->va_uid : vcp->vap->va_uid, | |
4457 | on_dir ? vcp->dvap->va_gid : vcp->vap->va_gid); | |
4458 | return(error); | |
4459 | } | |
4460 | ||
4461 | /* | |
4462 | * Authorize the deletion of the node vp from the directory dvp. | |
4463 | * | |
4464 | * We assume that: | |
4465 | * - Neither the node nor the directory are immutable. | |
4466 | * - The user is not the superuser. | |
4467 | * | |
4468 | * Deletion is not permitted if the directory is sticky and the caller is not owner of the | |
4469 | * node or directory. | |
4470 | * | |
4471 | * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be | |
4472 | * deleted. If neither denies the permission, and the caller has Posix write access to the | |
4473 | * directory, then the node may be deleted. | |
4474 | */ | |
4475 | static int | |
4476 | vnode_authorize_delete(vauth_ctx vcp) | |
4477 | { | |
4478 | struct vnode_attr *vap = vcp->vap; | |
4479 | struct vnode_attr *dvap = vcp->dvap; | |
4480 | kauth_cred_t cred = vcp->ctx->vc_ucred; | |
4481 | struct kauth_acl_eval eval; | |
4482 | int error, delete_denied, delete_child_denied, ismember; | |
4483 | ||
4484 | /* check the ACL on the directory */ | |
4485 | delete_child_denied = 0; | |
4486 | if (VATTR_IS_NOT(dvap, va_acl, NULL)) { | |
4487 | eval.ae_requested = KAUTH_VNODE_DELETE_CHILD; | |
4488 | eval.ae_acl = &dvap->va_acl->acl_ace[0]; | |
4489 | eval.ae_count = dvap->va_acl->acl_entrycount; | |
4490 | eval.ae_options = 0; | |
4491 | if (vauth_dir_owner(vcp)) | |
4492 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4493 | if ((error = vauth_dir_ingroup(vcp, &ismember)) != 0) | |
4494 | return(error); | |
4495 | if (ismember) | |
4496 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4497 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4498 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4499 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4500 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4501 | ||
4502 | error = kauth_acl_evaluate(cred, &eval); | |
4503 | ||
4504 | if (error != 0) { | |
4505 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4506 | return(error); | |
4507 | } | |
4508 | if (eval.ae_result == KAUTH_RESULT_DENY) | |
4509 | delete_child_denied = 1; | |
4510 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4511 | KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp); | |
4512 | return(0); | |
4513 | } | |
4514 | } | |
4515 | ||
4516 | /* check the ACL on the node */ | |
4517 | delete_denied = 0; | |
4518 | if (VATTR_IS_NOT(vap, va_acl, NULL)) { | |
4519 | eval.ae_requested = KAUTH_VNODE_DELETE; | |
4520 | eval.ae_acl = &vap->va_acl->acl_ace[0]; | |
4521 | eval.ae_count = vap->va_acl->acl_entrycount; | |
4522 | eval.ae_options = 0; | |
4523 | if (vauth_file_owner(vcp)) | |
4524 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4525 | if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) | |
4526 | return(error); | |
4527 | if (ismember) | |
4528 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4529 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4530 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4531 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4532 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4533 | ||
4534 | if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { | |
4535 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4536 | return(error); | |
4537 | } | |
4538 | if (eval.ae_result == KAUTH_RESULT_DENY) | |
4539 | delete_denied = 1; | |
4540 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4541 | KAUTH_DEBUG("%p ALLOWED - granted by file ACL", vcp->vp); | |
4542 | return(0); | |
4543 | } | |
4544 | } | |
4545 | ||
4546 | /* if denied by ACL on directory or node, return denial */ | |
4547 | if (delete_denied || delete_child_denied) { | |
4548 | KAUTH_DEBUG("%p ALLOWED - denied by ACL", vcp->vp); | |
4549 | return(EACCES); | |
4550 | } | |
4551 | ||
4552 | /* enforce sticky bit behaviour */ | |
4553 | if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) { | |
4554 | KAUTH_DEBUG("%p DENIED - sticky bit rules (user %d file %d dir %d)", | |
4555 | vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid); | |
4556 | return(EACCES); | |
4557 | } | |
4558 | ||
4559 | /* check the directory */ | |
4560 | if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) { | |
4561 | KAUTH_DEBUG("%p ALLOWED - granted by posix permisssions", vcp->vp); | |
4562 | return(error); | |
4563 | } | |
4564 | ||
4565 | /* not denied, must be OK */ | |
4566 | return(0); | |
4567 | } | |
4568 | ||
4569 | ||
4570 | /* | |
4571 | * Authorize an operation based on the node's attributes. | |
4572 | */ | |
4573 | static int | |
4574 | vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_rights_t preauth_rights) | |
4575 | { | |
4576 | struct vnode_attr *vap = vcp->vap; | |
4577 | kauth_cred_t cred = vcp->ctx->vc_ucred; | |
4578 | struct kauth_acl_eval eval; | |
4579 | int error, ismember; | |
4580 | mode_t posix_action; | |
4581 | ||
4582 | /* | |
4583 | * If we are the file owner, we automatically have some rights. | |
4584 | * | |
4585 | * Do we need to expand this to support group ownership? | |
4586 | */ | |
4587 | if (vauth_file_owner(vcp)) | |
4588 | acl_rights &= ~(KAUTH_VNODE_WRITE_SECURITY); | |
4589 | ||
4590 | /* | |
4591 | * If we are checking both TAKE_OWNERSHIP and WRITE_SECURITY, we can | |
4592 | * mask the latter. If TAKE_OWNERSHIP is requested the caller is about to | |
4593 | * change ownership to themselves, and WRITE_SECURITY is implicitly | |
4594 | * granted to the owner. We need to do this because at this point | |
4595 | * WRITE_SECURITY may not be granted as the caller is not currently | |
4596 | * the owner. | |
4597 | */ | |
4598 | if ((acl_rights & KAUTH_VNODE_TAKE_OWNERSHIP) && | |
4599 | (acl_rights & KAUTH_VNODE_WRITE_SECURITY)) | |
4600 | acl_rights &= ~KAUTH_VNODE_WRITE_SECURITY; | |
4601 | ||
4602 | if (acl_rights == 0) { | |
4603 | KAUTH_DEBUG("%p ALLOWED - implicit or no rights required", vcp->vp); | |
4604 | return(0); | |
4605 | } | |
4606 | ||
4607 | /* if we have an ACL, evaluate it */ | |
4608 | if (VATTR_IS_NOT(vap, va_acl, NULL)) { | |
4609 | eval.ae_requested = acl_rights; | |
4610 | eval.ae_acl = &vap->va_acl->acl_ace[0]; | |
4611 | eval.ae_count = vap->va_acl->acl_entrycount; | |
4612 | eval.ae_options = 0; | |
4613 | if (vauth_file_owner(vcp)) | |
4614 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4615 | if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) | |
4616 | return(error); | |
4617 | if (ismember) | |
4618 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4619 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4620 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4621 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4622 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4623 | ||
4624 | if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { | |
4625 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4626 | return(error); | |
4627 | } | |
4628 | ||
4629 | if (eval.ae_result == KAUTH_RESULT_DENY) { | |
4630 | KAUTH_DEBUG("%p DENIED - by ACL", vcp->vp); | |
4631 | return(EACCES); /* deny, deny, counter-allege */ | |
4632 | } | |
4633 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4634 | KAUTH_DEBUG("%p ALLOWED - all rights granted by ACL", vcp->vp); | |
4635 | return(0); | |
4636 | } | |
4637 | /* fall through and evaluate residual rights */ | |
4638 | } else { | |
4639 | /* no ACL, everything is residual */ | |
4640 | eval.ae_residual = acl_rights; | |
4641 | } | |
4642 | ||
4643 | /* | |
4644 | * Grant residual rights that have been pre-authorized. | |
4645 | */ | |
4646 | eval.ae_residual &= ~preauth_rights; | |
4647 | ||
4648 | /* | |
4649 | * We grant WRITE_ATTRIBUTES to the owner if it hasn't been denied. | |
4650 | */ | |
4651 | if (vauth_file_owner(vcp)) | |
4652 | eval.ae_residual &= ~KAUTH_VNODE_WRITE_ATTRIBUTES; | |
4653 | ||
4654 | if (eval.ae_residual == 0) { | |
4655 | KAUTH_DEBUG("%p ALLOWED - rights already authorized", vcp->vp); | |
4656 | return(0); | |
4657 | } | |
4658 | ||
4659 | /* | |
4660 | * Bail if we have residual rights that can't be granted by posix permissions, | |
4661 | * or aren't presumed granted at this point. | |
4662 | * | |
4663 | * XXX these can be collapsed for performance | |
4664 | */ | |
4665 | if (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) { | |
4666 | KAUTH_DEBUG("%p DENIED - CHANGE_OWNER not permitted", vcp->vp); | |
4667 | return(EACCES); | |
4668 | } | |
4669 | if (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) { | |
4670 | KAUTH_DEBUG("%p DENIED - WRITE_SECURITY not permitted", vcp->vp); | |
4671 | return(EACCES); | |
4672 | } | |
4673 | ||
4674 | #if DIAGNOSTIC | |
4675 | if (eval.ae_residual & KAUTH_VNODE_DELETE) | |
4676 | panic("vnode_authorize: can't be checking delete permission here"); | |
4677 | #endif | |
4678 | ||
4679 | /* | |
4680 | * Compute the fallback posix permissions that will satisfy the remaining | |
4681 | * rights. | |
4682 | */ | |
4683 | posix_action = 0; | |
4684 | if (eval.ae_residual & (KAUTH_VNODE_READ_DATA | | |
4685 | KAUTH_VNODE_LIST_DIRECTORY | | |
4686 | KAUTH_VNODE_READ_EXTATTRIBUTES)) | |
4687 | posix_action |= VREAD; | |
4688 | if (eval.ae_residual & (KAUTH_VNODE_WRITE_DATA | | |
4689 | KAUTH_VNODE_ADD_FILE | | |
4690 | KAUTH_VNODE_ADD_SUBDIRECTORY | | |
4691 | KAUTH_VNODE_DELETE_CHILD | | |
4692 | KAUTH_VNODE_WRITE_ATTRIBUTES | | |
4693 | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) | |
4694 | posix_action |= VWRITE; | |
4695 | if (eval.ae_residual & (KAUTH_VNODE_EXECUTE | | |
4696 | KAUTH_VNODE_SEARCH)) | |
4697 | posix_action |= VEXEC; | |
4698 | ||
4699 | if (posix_action != 0) { | |
4700 | return(vnode_authorize_posix(vcp, posix_action, 0 /* !on_dir */)); | |
4701 | } else { | |
4702 | KAUTH_DEBUG("%p ALLOWED - residual rights %s%s%s%s%s%s%s%s%s%s%s%s%s%s granted due to no posix mapping", | |
4703 | vcp->vp, | |
4704 | (eval.ae_residual & KAUTH_VNODE_READ_DATA) | |
4705 | ? vnode_isdir(vcp->vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", | |
4706 | (eval.ae_residual & KAUTH_VNODE_WRITE_DATA) | |
4707 | ? vnode_isdir(vcp->vp) ? " ADD_FILE" : " WRITE_DATA" : "", | |
4708 | (eval.ae_residual & KAUTH_VNODE_EXECUTE) | |
4709 | ? vnode_isdir(vcp->vp) ? " SEARCH" : " EXECUTE" : "", | |
4710 | (eval.ae_residual & KAUTH_VNODE_DELETE) | |
4711 | ? " DELETE" : "", | |
4712 | (eval.ae_residual & KAUTH_VNODE_APPEND_DATA) | |
4713 | ? vnode_isdir(vcp->vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", | |
4714 | (eval.ae_residual & KAUTH_VNODE_DELETE_CHILD) | |
4715 | ? " DELETE_CHILD" : "", | |
4716 | (eval.ae_residual & KAUTH_VNODE_READ_ATTRIBUTES) | |
4717 | ? " READ_ATTRIBUTES" : "", | |
4718 | (eval.ae_residual & KAUTH_VNODE_WRITE_ATTRIBUTES) | |
4719 | ? " WRITE_ATTRIBUTES" : "", | |
4720 | (eval.ae_residual & KAUTH_VNODE_READ_EXTATTRIBUTES) | |
4721 | ? " READ_EXTATTRIBUTES" : "", | |
4722 | (eval.ae_residual & KAUTH_VNODE_WRITE_EXTATTRIBUTES) | |
4723 | ? " WRITE_EXTATTRIBUTES" : "", | |
4724 | (eval.ae_residual & KAUTH_VNODE_READ_SECURITY) | |
4725 | ? " READ_SECURITY" : "", | |
4726 | (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) | |
4727 | ? " WRITE_SECURITY" : "", | |
4728 | (eval.ae_residual & KAUTH_VNODE_CHECKIMMUTABLE) | |
4729 | ? " CHECKIMMUTABLE" : "", | |
4730 | (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) | |
4731 | ? " CHANGE_OWNER" : ""); | |
4732 | } | |
4733 | ||
4734 | /* | |
4735 | * Lack of required Posix permissions implies no reason to deny access. | |
4736 | */ | |
4737 | return(0); | |
4738 | } | |
4739 | ||
4740 | /* | |
4741 | * Check for file immutability. | |
4742 | */ | |
4743 | static int | |
4744 | vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore) | |
4745 | { | |
4746 | mount_t mp; | |
4747 | int error; | |
4748 | int append; | |
4749 | ||
4750 | /* | |
4751 | * Perform immutability checks for operations that change data. | |
4752 | * | |
4753 | * Sockets, fifos and devices require special handling. | |
4754 | */ | |
4755 | switch(vp->v_type) { | |
4756 | case VSOCK: | |
4757 | case VFIFO: | |
4758 | case VBLK: | |
4759 | case VCHR: | |
4760 | /* | |
4761 | * Writing to these nodes does not change the filesystem data, | |
4762 | * so forget that it's being tried. | |
4763 | */ | |
4764 | rights &= ~KAUTH_VNODE_WRITE_DATA; | |
4765 | break; | |
4766 | default: | |
4767 | break; | |
4768 | } | |
4769 | ||
4770 | error = 0; | |
4771 | if (rights & KAUTH_VNODE_WRITE_RIGHTS) { | |
4772 | ||
4773 | /* check per-filesystem options if possible */ | |
4774 | mp = vnode_mount(vp); | |
4775 | if (mp != NULL) { | |
4776 | ||
4777 | /* check for no-EA filesystems */ | |
4778 | if ((rights & KAUTH_VNODE_WRITE_EXTATTRIBUTES) && | |
4779 | (vfs_flags(mp) & MNT_NOUSERXATTR)) { | |
4780 | KAUTH_DEBUG("%p DENIED - filesystem disallowed extended attributes", vp); | |
4781 | error = EACCES; /* User attributes disabled */ | |
4782 | goto out; | |
4783 | } | |
4784 | } | |
4785 | ||
4786 | /* check for file immutability */ | |
4787 | append = 0; | |
4788 | if (vp->v_type == VDIR) { | |
4789 | if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY)) == rights) | |
4790 | append = 1; | |
4791 | } else { | |
4792 | if ((rights & KAUTH_VNODE_APPEND_DATA) == rights) | |
4793 | append = 1; | |
4794 | } | |
4795 | if ((error = vnode_immutable(vap, append, ignore)) != 0) { | |
4796 | KAUTH_DEBUG("%p DENIED - file is immutable", vp); | |
4797 | goto out; | |
4798 | } | |
4799 | } | |
4800 | out: | |
4801 | return(error); | |
4802 | } | |
4803 | ||
4804 | /* | |
4805 | * Handle authorization actions for filesystems that advertise that the server will | |
4806 | * be enforcing. | |
4807 | */ | |
4808 | static int | |
4809 | vnode_authorize_opaque(vnode_t vp, int *resultp, kauth_action_t action, vfs_context_t ctx) | |
4810 | { | |
4811 | int error; | |
4812 | ||
4813 | /* | |
4814 | * If the vp is a device node, socket or FIFO it actually represents a local | |
4815 | * endpoint, so we need to handle it locally. | |
4816 | */ | |
4817 | switch(vp->v_type) { | |
4818 | case VBLK: | |
4819 | case VCHR: | |
4820 | case VSOCK: | |
4821 | case VFIFO: | |
4822 | return(0); | |
4823 | default: | |
4824 | break; | |
4825 | } | |
4826 | ||
4827 | /* | |
4828 | * In the advisory request case, if the filesystem doesn't think it's reliable | |
4829 | * we will attempt to formulate a result ourselves based on VNOP_GETATTR data. | |
4830 | */ | |
4831 | if ((action & KAUTH_VNODE_ACCESS) && !vfs_authopaqueaccess(vnode_mount(vp))) | |
4832 | return(0); | |
4833 | ||
4834 | /* | |
4835 | * Let the filesystem have a say in the matter. It's OK for it to not implemnent | |
4836 | * VNOP_ACCESS, as most will authorise inline with the actual request. | |
4837 | */ | |
4838 | if ((error = VNOP_ACCESS(vp, action, ctx)) != ENOTSUP) { | |
4839 | *resultp = error; | |
4840 | KAUTH_DEBUG("%p DENIED - opaque filesystem VNOP_ACCESS denied access", vp); | |
4841 | return(1); | |
4842 | } | |
4843 | ||
4844 | /* | |
4845 | * Typically opaque filesystems do authorisation in-line, but exec is a special case. In | |
4846 | * order to be reasonably sure that exec will be permitted, we try a bit harder here. | |
4847 | */ | |
4848 | if ((action & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp)) { | |
4849 | /* try a VNOP_OPEN for readonly access */ | |
4850 | if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { | |
4851 | *resultp = error; | |
4852 | KAUTH_DEBUG("%p DENIED - EXECUTE denied because file could not be opened readonly", vp); | |
4853 | return(1); | |
4854 | } | |
4855 | VNOP_CLOSE(vp, FREAD, ctx); | |
4856 | } | |
4857 | ||
4858 | /* | |
4859 | * We don't have any reason to believe that the request has to be denied at this point, | |
4860 | * so go ahead and allow it. | |
4861 | */ | |
4862 | *resultp = 0; | |
4863 | KAUTH_DEBUG("%p ALLOWED - bypassing access check for non-local filesystem", vp); | |
4864 | return(1); | |
4865 | } | |
4866 | ||
4867 | static int | |
4868 | vnode_authorize_callback(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action, | |
4869 | uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) | |
4870 | { | |
4871 | struct _vnode_authorize_context auth_context; | |
4872 | vauth_ctx vcp; | |
4873 | vfs_context_t ctx; | |
4874 | vnode_t vp, dvp; | |
4875 | kauth_cred_t cred; | |
4876 | kauth_ace_rights_t rights; | |
4877 | struct vnode_attr va, dva; | |
4878 | int result; | |
4879 | int *errorp; | |
4880 | int noimmutable; | |
4881 | ||
4882 | vcp = &auth_context; | |
4883 | ctx = vcp->ctx = (vfs_context_t)arg0; | |
4884 | vp = vcp->vp = (vnode_t)arg1; | |
4885 | dvp = vcp->dvp = (vnode_t)arg2; | |
4886 | errorp = (int *)arg3; | |
4887 | /* note that we authorize against the context, not the passed cred (the same thing anyway) */ | |
4888 | cred = ctx->vc_ucred; | |
4889 | ||
4890 | VATTR_INIT(&va); | |
4891 | vcp->vap = &va; | |
4892 | VATTR_INIT(&dva); | |
4893 | vcp->dvap = &dva; | |
4894 | ||
4895 | vcp->flags = vcp->flags_valid = 0; | |
4896 | ||
4897 | #if DIAGNOSTIC | |
4898 | if ((ctx == NULL) || (vp == NULL) || (cred == NULL)) | |
4899 | panic("vnode_authorize: bad arguments (context %p vp %p cred %p)", ctx, vp, cred); | |
4900 | #endif | |
4901 | ||
4902 | KAUTH_DEBUG("%p AUTH - %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s on %s '%s' (0x%x:%p/%p)", | |
4903 | vp, vfs_context_proc(ctx)->p_comm, | |
4904 | (action & KAUTH_VNODE_ACCESS) ? "access" : "auth", | |
4905 | (action & KAUTH_VNODE_READ_DATA) ? vnode_isdir(vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", | |
4906 | (action & KAUTH_VNODE_WRITE_DATA) ? vnode_isdir(vp) ? " ADD_FILE" : " WRITE_DATA" : "", | |
4907 | (action & KAUTH_VNODE_EXECUTE) ? vnode_isdir(vp) ? " SEARCH" : " EXECUTE" : "", | |
4908 | (action & KAUTH_VNODE_DELETE) ? " DELETE" : "", | |
4909 | (action & KAUTH_VNODE_APPEND_DATA) ? vnode_isdir(vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", | |
4910 | (action & KAUTH_VNODE_DELETE_CHILD) ? " DELETE_CHILD" : "", | |
4911 | (action & KAUTH_VNODE_READ_ATTRIBUTES) ? " READ_ATTRIBUTES" : "", | |
4912 | (action & KAUTH_VNODE_WRITE_ATTRIBUTES) ? " WRITE_ATTRIBUTES" : "", | |
4913 | (action & KAUTH_VNODE_READ_EXTATTRIBUTES) ? " READ_EXTATTRIBUTES" : "", | |
4914 | (action & KAUTH_VNODE_WRITE_EXTATTRIBUTES) ? " WRITE_EXTATTRIBUTES" : "", | |
4915 | (action & KAUTH_VNODE_READ_SECURITY) ? " READ_SECURITY" : "", | |
4916 | (action & KAUTH_VNODE_WRITE_SECURITY) ? " WRITE_SECURITY" : "", | |
4917 | (action & KAUTH_VNODE_CHANGE_OWNER) ? " CHANGE_OWNER" : "", | |
4918 | (action & KAUTH_VNODE_NOIMMUTABLE) ? " (noimmutable)" : "", | |
4919 | vnode_isdir(vp) ? "directory" : "file", | |
4920 | vp->v_name ? vp->v_name : "<NULL>", action, vp, dvp); | |
4921 | ||
4922 | /* | |
4923 | * Extract the control bits from the action, everything else is | |
4924 | * requested rights. | |
4925 | */ | |
4926 | noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0; | |
4927 | rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE); | |
4928 | ||
4929 | if (rights & KAUTH_VNODE_DELETE) { | |
4930 | #if DIAGNOSTIC | |
4931 | if (dvp == NULL) | |
4932 | panic("vnode_authorize: KAUTH_VNODE_DELETE test requires a directory"); | |
4933 | #endif | |
4934 | } else { | |
4935 | dvp = NULL; | |
4936 | } | |
4937 | ||
4938 | /* | |
4939 | * Check for read-only filesystems. | |
4940 | */ | |
4941 | if ((rights & KAUTH_VNODE_WRITE_RIGHTS) && | |
4942 | (vp->v_mount->mnt_flag & MNT_RDONLY) && | |
4943 | ((vp->v_type == VREG) || (vp->v_type == VDIR) || | |
4944 | (vp->v_type == VLNK) || (vp->v_type == VCPLX) || | |
4945 | (rights & KAUTH_VNODE_DELETE) || (rights & KAUTH_VNODE_DELETE_CHILD))) { | |
4946 | result = EROFS; | |
4947 | goto out; | |
4948 | } | |
4949 | ||
4950 | /* | |
4951 | * Check for noexec filesystems. | |
4952 | */ | |
4953 | if ((rights & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp) && (vp->v_mount->mnt_flag & MNT_NOEXEC)) { | |
4954 | result = EACCES; | |
4955 | goto out; | |
4956 | } | |
4957 | ||
4958 | /* | |
4959 | * Handle cases related to filesystems with non-local enforcement. | |
4960 | * This call can return 0, in which case we will fall through to perform a | |
4961 | * check based on VNOP_GETATTR data. Otherwise it returns 1 and sets | |
4962 | * an appropriate result, at which point we can return immediately. | |
4963 | */ | |
4964 | if (vfs_authopaque(vp->v_mount) && vnode_authorize_opaque(vp, &result, action, ctx)) | |
4965 | goto out; | |
4966 | ||
4967 | /* | |
4968 | * Get vnode attributes and extended security information for the vnode | |
4969 | * and directory if required. | |
4970 | */ | |
4971 | VATTR_WANTED(&va, va_mode); | |
4972 | VATTR_WANTED(&va, va_uid); | |
4973 | VATTR_WANTED(&va, va_gid); | |
4974 | VATTR_WANTED(&va, va_flags); | |
4975 | VATTR_WANTED(&va, va_acl); | |
4976 | if ((result = vnode_getattr(vp, &va, ctx)) != 0) { | |
4977 | KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result); | |
4978 | goto out; | |
4979 | } | |
4980 | if (dvp) { | |
4981 | VATTR_WANTED(&dva, va_mode); | |
4982 | VATTR_WANTED(&dva, va_uid); | |
4983 | VATTR_WANTED(&dva, va_gid); | |
4984 | VATTR_WANTED(&dva, va_flags); | |
4985 | VATTR_WANTED(&dva, va_acl); | |
4986 | if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) { | |
4987 | KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result); | |
4988 | goto out; | |
4989 | } | |
4990 | } | |
4991 | ||
4992 | /* | |
4993 | * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes | |
4994 | * *_EXTATTRIBUTES. | |
4995 | */ | |
4996 | if (S_ISXATTR(va.va_mode)) { | |
4997 | if (rights & KAUTH_VNODE_READ_DATA) { | |
4998 | rights &= ~KAUTH_VNODE_READ_DATA; | |
4999 | rights |= KAUTH_VNODE_READ_EXTATTRIBUTES; | |
5000 | } | |
5001 | if (rights & KAUTH_VNODE_WRITE_DATA) { | |
5002 | rights &= ~KAUTH_VNODE_WRITE_DATA; | |
5003 | rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES; | |
5004 | } | |
5005 | } | |
5006 | ||
5007 | /* | |
5008 | * Check for immutability. | |
5009 | * | |
5010 | * In the deletion case, parent directory immutability vetoes specific | |
5011 | * file rights. | |
5012 | */ | |
5013 | if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0) | |
5014 | goto out; | |
5015 | if ((rights & KAUTH_VNODE_DELETE) && | |
5016 | ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0)) | |
5017 | goto out; | |
5018 | ||
5019 | /* | |
5020 | * Clear rights that have been authorized by reaching this point, bail if nothing left to | |
5021 | * check. | |
5022 | */ | |
5023 | rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE); | |
5024 | if (rights == 0) | |
5025 | goto out; | |
5026 | ||
5027 | /* | |
5028 | * If we're not the superuser, authorize based on file properties. | |
5029 | */ | |
5030 | if (!vfs_context_issuser(ctx)) { | |
5031 | /* process delete rights */ | |
5032 | if ((rights & KAUTH_VNODE_DELETE) && | |
5033 | ((result = vnode_authorize_delete(vcp)) != 0)) | |
5034 | goto out; | |
5035 | ||
5036 | /* process remaining rights */ | |
5037 | if ((rights & ~KAUTH_VNODE_DELETE) && | |
5038 | ((result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE)) != 0)) | |
5039 | goto out; | |
5040 | } else { | |
5041 | ||
5042 | /* | |
5043 | * Execute is only granted to root if one of the x bits is set. This check only | |
5044 | * makes sense if the posix mode bits are actually supported. | |
5045 | */ | |
5046 | if ((rights & KAUTH_VNODE_EXECUTE) && | |
5047 | (vp->v_type == VREG) && | |
5048 | VATTR_IS_SUPPORTED(&va, va_mode) && | |
5049 | !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { | |
5050 | result = EPERM; | |
5051 | KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode); | |
5052 | goto out; | |
5053 | } | |
5054 | ||
5055 | KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp); | |
5056 | } | |
5057 | ||
5058 | out: | |
5059 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) | |
5060 | kauth_acl_free(va.va_acl); | |
5061 | if (VATTR_IS_SUPPORTED(&dva, va_acl) && (dva.va_acl != NULL)) | |
5062 | kauth_acl_free(dva.va_acl); | |
5063 | if (result) { | |
5064 | *errorp = result; | |
5065 | KAUTH_DEBUG("%p DENIED - auth denied", vp); | |
5066 | return(KAUTH_RESULT_DENY); | |
5067 | } | |
5068 | ||
5069 | /* | |
5070 | * Note that this implies that we will allow requests for no rights, as well as | |
5071 | * for rights that we do not recognise. There should be none of these. | |
5072 | */ | |
5073 | KAUTH_DEBUG("%p ALLOWED - auth granted", vp); | |
5074 | return(KAUTH_RESULT_ALLOW); | |
5075 | } | |
5076 | ||
5077 | /* | |
5078 | * Check that the attribute information in vattr can be legally applied to | |
5079 | * a new file by the context. | |
5080 | */ | |
5081 | int | |
5082 | vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx) | |
5083 | { | |
5084 | int error; | |
5085 | int is_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode; | |
5086 | kauth_cred_t cred; | |
5087 | guid_t changer; | |
5088 | mount_t dmp; | |
5089 | ||
5090 | error = 0; | |
5091 | defaulted_owner = defaulted_group = defaulted_mode = 0; | |
5092 | ||
5093 | /* | |
5094 | * Require that the filesystem support extended security to apply any. | |
5095 | */ | |
5096 | if (!vfs_extendedsecurity(dvp->v_mount) && | |
5097 | (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { | |
5098 | error = EINVAL; | |
5099 | goto out; | |
5100 | } | |
5101 | ||
5102 | /* | |
5103 | * Default some fields. | |
5104 | */ | |
5105 | dmp = dvp->v_mount; | |
5106 | ||
5107 | /* | |
5108 | * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit owner is set, that | |
5109 | * owner takes ownership of all new files. | |
5110 | */ | |
5111 | if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsowner != KAUTH_UID_NONE)) { | |
5112 | VATTR_SET(vap, va_uid, dmp->mnt_fsowner); | |
5113 | defaulted_owner = 1; | |
5114 | } else { | |
5115 | if (!VATTR_IS_ACTIVE(vap, va_uid)) { | |
5116 | /* default owner is current user */ | |
5117 | VATTR_SET(vap, va_uid, kauth_cred_getuid(vfs_context_ucred(ctx))); | |
5118 | defaulted_owner = 1; | |
5119 | } | |
5120 | } | |
5121 | ||
5122 | /* | |
5123 | * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that | |
5124 | * group takes ownership of all new files. | |
5125 | */ | |
5126 | if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsgroup != KAUTH_GID_NONE)) { | |
5127 | VATTR_SET(vap, va_gid, dmp->mnt_fsgroup); | |
5128 | defaulted_group = 1; | |
5129 | } else { | |
5130 | if (!VATTR_IS_ACTIVE(vap, va_gid)) { | |
5131 | /* default group comes from parent object, fallback to current user */ | |
5132 | struct vnode_attr dva; | |
5133 | VATTR_INIT(&dva); | |
5134 | VATTR_WANTED(&dva, va_gid); | |
5135 | if ((error = vnode_getattr(dvp, &dva, ctx)) != 0) | |
5136 | goto out; | |
5137 | if (VATTR_IS_SUPPORTED(&dva, va_gid)) { | |
5138 | VATTR_SET(vap, va_gid, dva.va_gid); | |
5139 | } else { | |
5140 | VATTR_SET(vap, va_gid, kauth_cred_getgid(vfs_context_ucred(ctx))); | |
5141 | } | |
5142 | defaulted_group = 1; | |
5143 | } | |
5144 | } | |
5145 | ||
5146 | if (!VATTR_IS_ACTIVE(vap, va_flags)) | |
5147 | VATTR_SET(vap, va_flags, 0); | |
5148 | ||
5149 | /* default mode is everything, masked with current umask */ | |
5150 | if (!VATTR_IS_ACTIVE(vap, va_mode)) { | |
5151 | VATTR_SET(vap, va_mode, ACCESSPERMS & ~vfs_context_proc(ctx)->p_fd->fd_cmask); | |
5152 | KAUTH_DEBUG("ATTR - defaulting new file mode to %o from umask %o", vap->va_mode, vfs_context_proc(ctx)->p_fd->fd_cmask); | |
5153 | defaulted_mode = 1; | |
5154 | } | |
5155 | /* set timestamps to now */ | |
5156 | if (!VATTR_IS_ACTIVE(vap, va_create_time)) { | |
5157 | nanotime(&vap->va_create_time); | |
5158 | VATTR_SET_ACTIVE(vap, va_create_time); | |
5159 | } | |
5160 | ||
5161 | /* | |
5162 | * Check for attempts to set nonsensical fields. | |
5163 | */ | |
5164 | if (vap->va_active & ~VNODE_ATTR_NEWOBJ) { | |
5165 | error = EINVAL; | |
5166 | KAUTH_DEBUG("ATTR - ERROR - attempt to set unsupported new-file attributes %llx", | |
5167 | vap->va_active & ~VNODE_ATTR_NEWOBJ); | |
5168 | goto out; | |
5169 | } | |
5170 | ||
5171 | /* | |
5172 | * Quickly check for the applicability of any enforcement here. | |
5173 | * Tests below maintain the integrity of the local security model. | |
5174 | */ | |
5175 | if (vfs_authopaque(vnode_mount(dvp))) | |
5176 | goto out; | |
5177 | ||
5178 | /* | |
5179 | * We need to know if the caller is the superuser, or if the work is | |
5180 | * otherwise already authorised. | |
5181 | */ | |
5182 | cred = vfs_context_ucred(ctx); | |
5183 | if (noauth) { | |
5184 | /* doing work for the kernel */ | |
5185 | is_suser = 1; | |
5186 | } else { | |
5187 | is_suser = vfs_context_issuser(ctx); | |
5188 | } | |
5189 | ||
5190 | ||
5191 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5192 | if (is_suser) { | |
5193 | if ((vap->va_flags & (UF_SETTABLE | SF_SETTABLE)) != vap->va_flags) { | |
5194 | error = EPERM; | |
5195 | KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); | |
5196 | goto out; | |
5197 | } | |
5198 | } else { | |
5199 | if ((vap->va_flags & UF_SETTABLE) != vap->va_flags) { | |
5200 | error = EPERM; | |
5201 | KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); | |
5202 | goto out; | |
5203 | } | |
5204 | } | |
5205 | } | |
5206 | ||
5207 | /* if not superuser, validate legality of new-item attributes */ | |
5208 | if (!is_suser) { | |
5209 | if (!defaulted_mode && VATTR_IS_ACTIVE(vap, va_mode)) { | |
5210 | /* setgid? */ | |
5211 | if (vap->va_mode & S_ISGID) { | |
5212 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5213 | KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); | |
5214 | goto out; | |
5215 | } | |
5216 | if (!ismember) { | |
5217 | KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", vap->va_gid); | |
5218 | error = EPERM; | |
5219 | goto out; | |
5220 | } | |
5221 | } | |
5222 | ||
5223 | /* setuid? */ | |
5224 | if ((vap->va_mode & S_ISUID) && (vap->va_uid != kauth_cred_getuid(cred))) { | |
5225 | KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); | |
5226 | error = EPERM; | |
5227 | goto out; | |
5228 | } | |
5229 | } | |
5230 | if (!defaulted_owner && (vap->va_uid != kauth_cred_getuid(cred))) { | |
5231 | KAUTH_DEBUG(" DENIED - cannot create new item owned by %d", vap->va_uid); | |
5232 | error = EPERM; | |
5233 | goto out; | |
5234 | } | |
5235 | if (!defaulted_group) { | |
5236 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5237 | KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); | |
5238 | goto out; | |
5239 | } | |
5240 | if (!ismember) { | |
5241 | KAUTH_DEBUG(" DENIED - cannot create new item with group %d - not a member", vap->va_gid); | |
5242 | error = EPERM; | |
5243 | goto out; | |
5244 | } | |
5245 | } | |
5246 | ||
5247 | /* initialising owner/group UUID */ | |
5248 | if (VATTR_IS_ACTIVE(vap, va_uuuid)) { | |
5249 | if ((error = kauth_cred_getguid(cred, &changer)) != 0) { | |
5250 | KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); | |
5251 | /* XXX ENOENT here - no GUID - should perhaps become EPERM */ | |
5252 | goto out; | |
5253 | } | |
5254 | if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { | |
5255 | KAUTH_DEBUG(" ERROR - cannot create item with supplied owner UUID - not us"); | |
5256 | error = EPERM; | |
5257 | goto out; | |
5258 | } | |
5259 | } | |
5260 | if (VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5261 | if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { | |
5262 | KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); | |
5263 | goto out; | |
5264 | } | |
5265 | if (!ismember) { | |
5266 | KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); | |
5267 | error = EPERM; | |
5268 | goto out; | |
5269 | } | |
5270 | } | |
5271 | } | |
5272 | out: | |
5273 | return(error); | |
5274 | } | |
5275 | ||
5276 | /* | |
5277 | * Check that the attribute information in vap can be legally written by the context. | |
5278 | * | |
5279 | * Call this when you're not sure about the vnode_attr; either its contents have come | |
5280 | * from an unknown source, or when they are variable. | |
5281 | * | |
5282 | * Returns errno, or zero and sets *actionp to the KAUTH_VNODE_* actions that | |
5283 | * must be authorized to be permitted to write the vattr. | |
5284 | */ | |
5285 | int | |
5286 | vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_context_t ctx) | |
5287 | { | |
5288 | struct vnode_attr ova; | |
5289 | kauth_action_t required_action; | |
5290 | int error, is_suser, ismember, chowner, chgroup; | |
5291 | guid_t changer; | |
5292 | gid_t group; | |
5293 | uid_t owner; | |
5294 | mode_t newmode; | |
5295 | kauth_cred_t cred; | |
5296 | uint32_t fdelta; | |
5297 | ||
5298 | VATTR_INIT(&ova); | |
5299 | required_action = 0; | |
5300 | error = 0; | |
5301 | ||
5302 | /* | |
5303 | * Quickly check for enforcement applicability. | |
5304 | */ | |
5305 | if (vfs_authopaque(vnode_mount(vp))) | |
5306 | goto out; | |
5307 | ||
5308 | /* | |
5309 | * Check for attempts to set nonsensical fields. | |
5310 | */ | |
5311 | if (vap->va_active & VNODE_ATTR_RDONLY) { | |
5312 | KAUTH_DEBUG("ATTR - ERROR: attempt to set readonly attribute(s)"); | |
5313 | error = EINVAL; | |
5314 | goto out; | |
5315 | } | |
5316 | ||
5317 | /* | |
5318 | * We need to know if the caller is the superuser. | |
5319 | */ | |
5320 | cred = vfs_context_ucred(ctx); | |
5321 | is_suser = kauth_cred_issuser(cred); | |
5322 | ||
5323 | /* | |
5324 | * If any of the following are changing, we need information from the old file: | |
5325 | * va_uid | |
5326 | * va_gid | |
5327 | * va_mode | |
5328 | * va_uuuid | |
5329 | * va_guuid | |
5330 | */ | |
5331 | if (VATTR_IS_ACTIVE(vap, va_uid) || | |
5332 | VATTR_IS_ACTIVE(vap, va_gid) || | |
5333 | VATTR_IS_ACTIVE(vap, va_mode) || | |
5334 | VATTR_IS_ACTIVE(vap, va_uuuid) || | |
5335 | VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5336 | VATTR_WANTED(&ova, va_mode); | |
5337 | VATTR_WANTED(&ova, va_uid); | |
5338 | VATTR_WANTED(&ova, va_gid); | |
5339 | VATTR_WANTED(&ova, va_uuuid); | |
5340 | VATTR_WANTED(&ova, va_guuid); | |
5341 | KAUTH_DEBUG("ATTR - security information changing, fetching existing attributes"); | |
5342 | } | |
5343 | ||
5344 | /* | |
5345 | * If timestamps are being changed, we need to know who the file is owned | |
5346 | * by. | |
5347 | */ | |
5348 | if (VATTR_IS_ACTIVE(vap, va_create_time) || | |
5349 | VATTR_IS_ACTIVE(vap, va_change_time) || | |
5350 | VATTR_IS_ACTIVE(vap, va_modify_time) || | |
5351 | VATTR_IS_ACTIVE(vap, va_access_time) || | |
5352 | VATTR_IS_ACTIVE(vap, va_backup_time)) { | |
5353 | ||
5354 | VATTR_WANTED(&ova, va_uid); | |
5355 | #if 0 /* enable this when we support UUIDs as official owners */ | |
5356 | VATTR_WANTED(&ova, va_uuuid); | |
5357 | #endif | |
5358 | KAUTH_DEBUG("ATTR - timestamps changing, fetching uid and GUID"); | |
5359 | } | |
5360 | ||
5361 | /* | |
5362 | * If flags are being changed, we need the old flags. | |
5363 | */ | |
5364 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5365 | KAUTH_DEBUG("ATTR - flags changing, fetching old flags"); | |
5366 | VATTR_WANTED(&ova, va_flags); | |
5367 | } | |
5368 | ||
5369 | /* | |
5370 | * If the size is being set, make sure it's not a directory. | |
5371 | */ | |
5372 | if (VATTR_IS_ACTIVE(vap, va_data_size)) { | |
5373 | /* size is meaningless on a directory, don't permit this */ | |
5374 | if (vnode_isdir(vp)) { | |
5375 | KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory"); | |
5376 | error = EISDIR; | |
5377 | goto out; | |
5378 | } | |
5379 | } | |
5380 | ||
5381 | /* | |
5382 | * Get old data. | |
5383 | */ | |
5384 | KAUTH_DEBUG("ATTR - fetching old attributes %016llx", ova.va_active); | |
5385 | if ((error = vnode_getattr(vp, &ova, ctx)) != 0) { | |
5386 | KAUTH_DEBUG(" ERROR - got %d trying to get attributes", error); | |
5387 | goto out; | |
5388 | } | |
5389 | ||
5390 | /* | |
5391 | * Size changes require write access to the file data. | |
5392 | */ | |
5393 | if (VATTR_IS_ACTIVE(vap, va_data_size)) { | |
5394 | /* if we can't get the size, or it's different, we need write access */ | |
5395 | KAUTH_DEBUG("ATTR - size change, requiring WRITE_DATA"); | |
5396 | required_action |= KAUTH_VNODE_WRITE_DATA; | |
5397 | } | |
5398 | ||
5399 | /* | |
5400 | * Changing timestamps? | |
5401 | * | |
5402 | * Note that we are only called to authorize user-requested time changes; | |
5403 | * side-effect time changes are not authorized. Authorisation is only | |
5404 | * required for existing files. | |
5405 | * | |
5406 | * Non-owners are not permitted to change the time on an existing | |
5407 | * file to anything other than the current time. | |
5408 | */ | |
5409 | if (VATTR_IS_ACTIVE(vap, va_create_time) || | |
5410 | VATTR_IS_ACTIVE(vap, va_change_time) || | |
5411 | VATTR_IS_ACTIVE(vap, va_modify_time) || | |
5412 | VATTR_IS_ACTIVE(vap, va_access_time) || | |
5413 | VATTR_IS_ACTIVE(vap, va_backup_time)) { | |
5414 | /* | |
5415 | * The owner and root may set any timestamps they like, | |
5416 | * provided that the file is not immutable. The owner still needs | |
5417 | * WRITE_ATTRIBUTES (implied by ownership but still deniable). | |
5418 | */ | |
5419 | if (is_suser || vauth_node_owner(&ova, cred)) { | |
5420 | KAUTH_DEBUG("ATTR - root or owner changing timestamps"); | |
5421 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE | KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5422 | } else { | |
5423 | /* just setting the current time? */ | |
5424 | if (vap->va_vaflags & VA_UTIMES_NULL) { | |
5425 | KAUTH_DEBUG("ATTR - non-root/owner changing timestamps, requiring WRITE_ATTRIBUTES"); | |
5426 | required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5427 | } else { | |
5428 | KAUTH_DEBUG("ATTR - ERROR: illegal timestamp modification attempted"); | |
5429 | error = EACCES; | |
5430 | goto out; | |
5431 | } | |
5432 | } | |
5433 | } | |
5434 | ||
5435 | /* | |
5436 | * Changing file mode? | |
5437 | */ | |
5438 | if (VATTR_IS_ACTIVE(vap, va_mode) && VATTR_IS_SUPPORTED(&ova, va_mode) && (ova.va_mode != vap->va_mode)) { | |
5439 | KAUTH_DEBUG("ATTR - mode change from %06o to %06o", ova.va_mode, vap->va_mode); | |
5440 | ||
5441 | /* | |
5442 | * Mode changes always have the same basic auth requirements. | |
5443 | */ | |
5444 | if (is_suser) { | |
5445 | KAUTH_DEBUG("ATTR - superuser mode change, requiring immutability check"); | |
5446 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; | |
5447 | } else { | |
5448 | /* need WRITE_SECURITY */ | |
5449 | KAUTH_DEBUG("ATTR - non-superuser mode change, requiring WRITE_SECURITY"); | |
5450 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5451 | } | |
5452 | ||
5453 | /* | |
5454 | * Can't set the setgid bit if you're not in the group and not root. Have to have | |
5455 | * existing group information in the case we're not setting it right now. | |
5456 | */ | |
5457 | if (vap->va_mode & S_ISGID) { | |
5458 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ | |
5459 | if (!is_suser) { | |
5460 | if (VATTR_IS_ACTIVE(vap, va_gid)) { | |
5461 | group = vap->va_gid; | |
5462 | } else if (VATTR_IS_SUPPORTED(&ova, va_gid)) { | |
5463 | group = ova.va_gid; | |
5464 | } else { | |
5465 | KAUTH_DEBUG("ATTR - ERROR: setgid but no gid available"); | |
5466 | error = EINVAL; | |
5467 | goto out; | |
5468 | } | |
5469 | /* | |
5470 | * This might be too restrictive; WRITE_SECURITY might be implied by | |
5471 | * membership in this case, rather than being an additional requirement. | |
5472 | */ | |
5473 | if ((error = kauth_cred_ismember_gid(cred, group, &ismember)) != 0) { | |
5474 | KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); | |
5475 | goto out; | |
5476 | } | |
5477 | if (!ismember) { | |
5478 | KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", group); | |
5479 | error = EPERM; | |
5480 | goto out; | |
5481 | } | |
5482 | } | |
5483 | } | |
5484 | ||
5485 | /* | |
5486 | * Can't set the setuid bit unless you're root or the file's owner. | |
5487 | */ | |
5488 | if (vap->va_mode & S_ISUID) { | |
5489 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ | |
5490 | if (!is_suser) { | |
5491 | if (VATTR_IS_ACTIVE(vap, va_uid)) { | |
5492 | owner = vap->va_uid; | |
5493 | } else if (VATTR_IS_SUPPORTED(&ova, va_uid)) { | |
5494 | owner = ova.va_uid; | |
5495 | } else { | |
5496 | KAUTH_DEBUG("ATTR - ERROR: setuid but no uid available"); | |
5497 | error = EINVAL; | |
5498 | goto out; | |
5499 | } | |
5500 | if (owner != kauth_cred_getuid(cred)) { | |
5501 | /* | |
5502 | * We could allow this if WRITE_SECURITY is permitted, perhaps. | |
5503 | */ | |
5504 | KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); | |
5505 | error = EPERM; | |
5506 | goto out; | |
5507 | } | |
5508 | } | |
5509 | } | |
5510 | } | |
5511 | ||
5512 | /* | |
5513 | * Validate/mask flags changes. This checks that only the flags in | |
5514 | * the UF_SETTABLE mask are being set, and preserves the flags in | |
5515 | * the SF_SETTABLE case. | |
5516 | * | |
5517 | * Since flags changes may be made in conjunction with other changes, | |
5518 | * we will ask the auth code to ignore immutability in the case that | |
5519 | * the SF_* flags are not set and we are only manipulating the file flags. | |
5520 | * | |
5521 | */ | |
5522 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5523 | /* compute changing flags bits */ | |
5524 | if (VATTR_IS_SUPPORTED(&ova, va_flags)) { | |
5525 | fdelta = vap->va_flags ^ ova.va_flags; | |
5526 | } else { | |
5527 | fdelta = vap->va_flags; | |
5528 | } | |
5529 | ||
5530 | if (fdelta != 0) { | |
5531 | KAUTH_DEBUG("ATTR - flags changing, requiring WRITE_SECURITY"); | |
5532 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5533 | ||
5534 | /* check that changing bits are legal */ | |
5535 | if (is_suser) { | |
5536 | /* | |
5537 | * The immutability check will prevent us from clearing the SF_* | |
5538 | * flags unless the system securelevel permits it, so just check | |
5539 | * for legal flags here. | |
5540 | */ | |
5541 | if (fdelta & ~(UF_SETTABLE | SF_SETTABLE)) { | |
5542 | error = EPERM; | |
5543 | KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); | |
5544 | goto out; | |
5545 | } | |
5546 | } else { | |
5547 | if (fdelta & ~UF_SETTABLE) { | |
5548 | error = EPERM; | |
5549 | KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); | |
5550 | goto out; | |
5551 | } | |
5552 | } | |
5553 | /* | |
5554 | * If the caller has the ability to manipulate file flags, | |
5555 | * security is not reduced by ignoring them for this operation. | |
5556 | * | |
5557 | * A more complete test here would consider the 'after' states of the flags | |
5558 | * to determine whether it would permit the operation, but this becomes | |
5559 | * very complex. | |
5560 | * | |
5561 | * Ignoring immutability is conditional on securelevel; this does not bypass | |
5562 | * the SF_* flags if securelevel > 0. | |
5563 | */ | |
5564 | required_action |= KAUTH_VNODE_NOIMMUTABLE; | |
5565 | } | |
5566 | } | |
5567 | ||
5568 | /* | |
5569 | * Validate ownership information. | |
5570 | */ | |
5571 | chowner = 0; | |
5572 | chgroup = 0; | |
5573 | ||
5574 | /* | |
5575 | * uid changing | |
5576 | * Note that if the filesystem didn't give us a UID, we expect that it doesn't | |
5577 | * support them in general, and will ignore it if/when we try to set it. | |
5578 | * We might want to clear the uid out of vap completely here. | |
5579 | */ | |
5580 | if (VATTR_IS_ACTIVE(vap, va_uid) && VATTR_IS_SUPPORTED(&ova, va_uid) && (vap->va_uid != ova.va_uid)) { | |
5581 | if (!is_suser && (kauth_cred_getuid(cred) != vap->va_uid)) { | |
5582 | KAUTH_DEBUG(" DENIED - non-superuser cannot change ownershipt to a third party"); | |
5583 | error = EPERM; | |
5584 | goto out; | |
5585 | } | |
5586 | chowner = 1; | |
5587 | } | |
5588 | ||
5589 | /* | |
5590 | * gid changing | |
5591 | * Note that if the filesystem didn't give us a GID, we expect that it doesn't | |
5592 | * support them in general, and will ignore it if/when we try to set it. | |
5593 | * We might want to clear the gid out of vap completely here. | |
5594 | */ | |
5595 | if (VATTR_IS_ACTIVE(vap, va_gid) && VATTR_IS_SUPPORTED(&ova, va_gid) && (vap->va_gid != ova.va_gid)) { | |
5596 | if (!is_suser) { | |
5597 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5598 | KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); | |
5599 | goto out; | |
5600 | } | |
5601 | if (!ismember) { | |
5602 | KAUTH_DEBUG(" DENIED - group change from %d to %d but not a member of target group", | |
5603 | ova.va_gid, vap->va_gid); | |
5604 | error = EPERM; | |
5605 | goto out; | |
5606 | } | |
5607 | } | |
5608 | chgroup = 1; | |
5609 | } | |
5610 | ||
5611 | /* | |
5612 | * Owner UUID being set or changed. | |
5613 | */ | |
5614 | if (VATTR_IS_ACTIVE(vap, va_uuuid)) { | |
5615 | /* if the owner UUID is not actually changing ... */ | |
5616 | if (VATTR_IS_SUPPORTED(&ova, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid)) | |
5617 | goto no_uuuid_change; | |
5618 | ||
5619 | /* | |
5620 | * The owner UUID cannot be set by a non-superuser to anything other than | |
5621 | * their own. | |
5622 | */ | |
5623 | if (!is_suser) { | |
5624 | if ((error = kauth_cred_getguid(cred, &changer)) != 0) { | |
5625 | KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); | |
5626 | /* XXX ENOENT here - no UUID - should perhaps become EPERM */ | |
5627 | goto out; | |
5628 | } | |
5629 | if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { | |
5630 | KAUTH_DEBUG(" ERROR - cannot set supplied owner UUID - not us"); | |
5631 | error = EPERM; | |
5632 | goto out; | |
5633 | } | |
5634 | } | |
5635 | chowner = 1; | |
5636 | } | |
5637 | no_uuuid_change: | |
5638 | /* | |
5639 | * Group UUID being set or changed. | |
5640 | */ | |
5641 | if (VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5642 | /* if the group UUID is not actually changing ... */ | |
5643 | if (VATTR_IS_SUPPORTED(&ova, va_guuid) && kauth_guid_equal(&vap->va_guuid, &ova.va_guuid)) | |
5644 | goto no_guuid_change; | |
5645 | ||
5646 | /* | |
5647 | * The group UUID cannot be set by a non-superuser to anything other than | |
5648 | * one of which they are a member. | |
5649 | */ | |
5650 | if (!is_suser) { | |
5651 | if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { | |
5652 | KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); | |
5653 | goto out; | |
5654 | } | |
5655 | if (!ismember) { | |
5656 | KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); | |
5657 | error = EPERM; | |
5658 | goto out; | |
5659 | } | |
5660 | } | |
5661 | chgroup = 1; | |
5662 | } | |
5663 | no_guuid_change: | |
5664 | ||
5665 | /* | |
5666 | * Compute authorisation for group/ownership changes. | |
5667 | */ | |
5668 | if (chowner || chgroup) { | |
5669 | if (is_suser) { | |
5670 | KAUTH_DEBUG("ATTR - superuser changing file owner/group, requiring immutability check"); | |
5671 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; | |
5672 | } else { | |
5673 | if (chowner) { | |
5674 | KAUTH_DEBUG("ATTR - ownership change, requiring TAKE_OWNERSHIP"); | |
5675 | required_action |= KAUTH_VNODE_TAKE_OWNERSHIP; | |
5676 | } | |
5677 | if (chgroup && !chowner) { | |
5678 | KAUTH_DEBUG("ATTR - group change, requiring WRITE_SECURITY"); | |
5679 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5680 | } | |
5681 | ||
5682 | /* clear set-uid and set-gid bits as required by Posix */ | |
5683 | if (VATTR_IS_ACTIVE(vap, va_mode)) { | |
5684 | newmode = vap->va_mode; | |
5685 | } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) { | |
5686 | newmode = ova.va_mode; | |
5687 | } else { | |
5688 | KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits"); | |
5689 | newmode = 0; | |
5690 | } | |
5691 | if (newmode & (S_ISUID | S_ISGID)) { | |
5692 | VATTR_SET(vap, va_mode, newmode & ~(S_ISUID | S_ISGID)); | |
5693 | KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode, vap->va_mode); | |
5694 | } | |
5695 | } | |
5696 | } | |
5697 | ||
5698 | /* | |
5699 | * Authorise changes in the ACL. | |
5700 | */ | |
5701 | if (VATTR_IS_ACTIVE(vap, va_acl)) { | |
5702 | ||
5703 | /* no existing ACL */ | |
5704 | if (!VATTR_IS_ACTIVE(&ova, va_acl) || (ova.va_acl == NULL)) { | |
5705 | ||
5706 | /* adding an ACL */ | |
5707 | if (vap->va_acl != NULL) { | |
5708 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5709 | KAUTH_DEBUG("CHMOD - adding ACL"); | |
5710 | } | |
5711 | ||
5712 | /* removing an existing ACL */ | |
5713 | } else if (vap->va_acl == NULL) { | |
5714 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5715 | KAUTH_DEBUG("CHMOD - removing ACL"); | |
5716 | ||
5717 | /* updating an existing ACL */ | |
5718 | } else { | |
5719 | if (vap->va_acl->acl_entrycount != ova.va_acl->acl_entrycount) { | |
5720 | /* entry count changed, must be different */ | |
5721 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5722 | KAUTH_DEBUG("CHMOD - adding/removing ACL entries"); | |
5723 | } else if (vap->va_acl->acl_entrycount > 0) { | |
5724 | /* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */ | |
5725 | if (!memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0], | |
5726 | sizeof(struct kauth_ace) * vap->va_acl->acl_entrycount)) { | |
5727 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5728 | KAUTH_DEBUG("CHMOD - changing ACL entries"); | |
5729 | } | |
5730 | } | |
5731 | } | |
5732 | } | |
5733 | ||
5734 | /* | |
5735 | * Other attributes that require authorisation. | |
5736 | */ | |
5737 | if (VATTR_IS_ACTIVE(vap, va_encoding)) | |
5738 | required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5739 | ||
5740 | out: | |
5741 | if (VATTR_IS_SUPPORTED(&ova, va_acl) && (ova.va_acl != NULL)) | |
5742 | kauth_acl_free(ova.va_acl); | |
5743 | if (error == 0) | |
5744 | *actionp = required_action; | |
5745 | return(error); | |
5746 | } | |
5747 | ||
5748 | ||
5749 | void | |
5750 | vfs_setlocklocal(mount_t mp) | |
5751 | { | |
5752 | vnode_t vp; | |
5753 | ||
5754 | mount_lock(mp); | |
5755 | mp->mnt_kern_flag |= MNTK_LOCK_LOCAL; | |
5756 | ||
5757 | /* | |
5758 | * We do not expect anyone to be using any vnodes at the | |
5759 | * time this routine is called. So no need for vnode locking | |
5760 | */ | |
5761 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { | |
5762 | vp->v_flag |= VLOCKLOCAL; | |
5763 | } | |
5764 | TAILQ_FOREACH(vp, &mp->mnt_workerqueue, v_mntvnodes) { | |
5765 | vp->v_flag |= VLOCKLOCAL; | |
5766 | } | |
5767 | TAILQ_FOREACH(vp, &mp->mnt_newvnodes, v_mntvnodes) { | |
5768 | vp->v_flag |= VLOCKLOCAL; | |
5769 | } | |
5770 | mount_unlock(mp); | |
5771 | } | |
5772 | ||
5773 | ||
5774 | #ifdef JOE_DEBUG | |
5775 | ||
5776 | record_vp(vnode_t vp, int count) { | |
5777 | struct uthread *ut; | |
5778 | int i; | |
5779 | ||
5780 | if ((vp->v_flag & VSYSTEM)) | |
5781 | return; | |
5782 | ||
5783 | ut = get_bsdthread_info(current_thread()); | |
5784 | ut->uu_iocount += count; | |
5785 | ||
5786 | if (ut->uu_vpindex < 32) { | |
5787 | for (i = 0; i < ut->uu_vpindex; i++) { | |
5788 | if (ut->uu_vps[i] == vp) | |
5789 | return; | |
5790 | } | |
5791 | ut->uu_vps[ut->uu_vpindex] = vp; | |
5792 | ut->uu_vpindex++; | |
5793 | } | |
5794 | } | |
5795 | #endif |