]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
91447636 | 2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
ff6e181a A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
1c79356b | 12 | * |
ff6e181a A |
13 | * The Original Code and all software distributed under the License are |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
ff6e181a A |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
1c79356b A |
20 | * |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
24 | /* | |
25 | * Copyright (c) 1989, 1993 | |
26 | * The Regents of the University of California. All rights reserved. | |
27 | * (c) UNIX System Laboratories, Inc. | |
28 | * All or some portions of this file are derived from material licensed | |
29 | * to the University of California by American Telephone and Telegraph | |
30 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
31 | * the permission of UNIX System Laboratories, Inc. | |
32 | * | |
33 | * Redistribution and use in source and binary forms, with or without | |
34 | * modification, are permitted provided that the following conditions | |
35 | * are met: | |
36 | * 1. Redistributions of source code must retain the above copyright | |
37 | * notice, this list of conditions and the following disclaimer. | |
38 | * 2. Redistributions in binary form must reproduce the above copyright | |
39 | * notice, this list of conditions and the following disclaimer in the | |
40 | * documentation and/or other materials provided with the distribution. | |
41 | * 3. All advertising materials mentioning features or use of this software | |
42 | * must display the following acknowledgement: | |
43 | * This product includes software developed by the University of | |
44 | * California, Berkeley and its contributors. | |
45 | * 4. Neither the name of the University nor the names of its contributors | |
46 | * may be used to endorse or promote products derived from this software | |
47 | * without specific prior written permission. | |
48 | * | |
49 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
50 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
52 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
53 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
54 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
55 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
56 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
57 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
58 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
59 | * SUCH DAMAGE. | |
60 | * | |
61 | * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 | |
62 | */ | |
63 | ||
64 | /* | |
65 | * External virtual filesystem routines | |
66 | */ | |
67 | ||
9bccf70c | 68 | #undef DIAGNOSTIC |
1c79356b A |
69 | #define DIAGNOSTIC 1 |
70 | ||
71 | #include <sys/param.h> | |
72 | #include <sys/systm.h> | |
91447636 A |
73 | #include <sys/proc_internal.h> |
74 | #include <sys/kauth.h> | |
75 | #include <sys/mount_internal.h> | |
1c79356b | 76 | #include <sys/time.h> |
91447636 A |
77 | #include <sys/lock.h> |
78 | #include <sys/vnode_internal.h> | |
1c79356b A |
79 | #include <sys/stat.h> |
80 | #include <sys/namei.h> | |
81 | #include <sys/ucred.h> | |
91447636 | 82 | #include <sys/buf_internal.h> |
1c79356b A |
83 | #include <sys/errno.h> |
84 | #include <sys/malloc.h> | |
85 | #include <sys/domain.h> | |
86 | #include <sys/mbuf.h> | |
87 | #include <sys/syslog.h> | |
91447636 | 88 | #include <sys/ubc_internal.h> |
1c79356b A |
89 | #include <sys/vm.h> |
90 | #include <sys/sysctl.h> | |
55e303ae A |
91 | #include <sys/filedesc.h> |
92 | #include <sys/event.h> | |
91447636 A |
93 | #include <sys/kdebug.h> |
94 | #include <sys/kauth.h> | |
95 | #include <sys/user.h> | |
96 | #include <miscfs/fifofs/fifo.h> | |
55e303ae A |
97 | |
98 | #include <string.h> | |
99 | #include <machine/spl.h> | |
100 | ||
1c79356b A |
101 | |
102 | #include <kern/assert.h> | |
103 | ||
104 | #include <miscfs/specfs/specdev.h> | |
105 | ||
0b4e3aa0 A |
106 | #include <mach/mach_types.h> |
107 | #include <mach/memory_object_types.h> | |
108 | ||
91447636 A |
109 | extern lck_grp_t *vnode_lck_grp; |
110 | extern lck_attr_t *vnode_lck_attr; | |
111 | ||
112 | ||
113 | extern lck_mtx_t * mnt_list_mtx_lock; | |
0b4e3aa0 | 114 | |
1c79356b A |
115 | enum vtype iftovt_tab[16] = { |
116 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | |
117 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, | |
118 | }; | |
119 | int vttoif_tab[9] = { | |
120 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, | |
121 | S_IFSOCK, S_IFIFO, S_IFMT, | |
122 | }; | |
123 | ||
91447636 A |
124 | extern int ubc_isinuse_locked(vnode_t, int, int); |
125 | extern kern_return_t adjust_vm_object_cache(vm_size_t oval, vm_size_t nval); | |
126 | ||
127 | static void vnode_list_add(vnode_t); | |
128 | static void vnode_list_remove(vnode_t); | |
129 | ||
130 | static errno_t vnode_drain(vnode_t); | |
131 | static void vgone(vnode_t); | |
132 | static void vclean(vnode_t vp, int flag, proc_t p); | |
133 | static void vnode_reclaim_internal(vnode_t, int, int); | |
134 | ||
135 | static void vnode_dropiocount (vnode_t, int); | |
136 | static errno_t vnode_getiocount(vnode_t vp, int locked, int vid, int vflags); | |
137 | static int vget_internal(vnode_t, int, int); | |
138 | ||
139 | static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev); | |
140 | static int vnode_reload(vnode_t); | |
141 | static int vnode_isinuse_locked(vnode_t, int, int); | |
142 | ||
143 | static void insmntque(vnode_t vp, mount_t mp); | |
144 | mount_t mount_list_lookupby_fsid(fsid_t *, int, int); | |
145 | static int mount_getvfscnt(void); | |
146 | static int mount_fillfsids(fsid_t *, int ); | |
147 | static void vnode_iterate_setup(mount_t); | |
148 | static int vnode_umount_preflight(mount_t, vnode_t, int); | |
149 | static int vnode_iterate_prepare(mount_t); | |
150 | static int vnode_iterate_reloadq(mount_t); | |
151 | static void vnode_iterate_clear(mount_t); | |
1c79356b | 152 | |
1c79356b A |
153 | TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ |
154 | TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */ | |
155 | struct mntlist mountlist; /* mounted filesystem list */ | |
91447636 | 156 | static int nummounts = 0; |
1c79356b A |
157 | |
158 | #if DIAGNOSTIC | |
159 | #define VLISTCHECK(fun, vp, list) \ | |
160 | if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \ | |
161 | panic("%s: %s vnode not on %slist", (fun), (list), (list)); | |
162 | ||
163 | #define VINACTIVECHECK(fun, vp, expected) \ | |
164 | do { \ | |
165 | int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \ | |
166 | if (__is_inactive ^ expected) \ | |
167 | panic("%s: %sinactive vnode, expected %s", (fun), \ | |
168 | __is_inactive? "" : "not ", \ | |
169 | expected? "inactive": "not inactive"); \ | |
170 | } while(0) | |
171 | #else | |
172 | #define VLISTCHECK(fun, vp, list) | |
173 | #define VINACTIVECHECK(fun, vp, expected) | |
174 | #endif /* DIAGNOSTIC */ | |
175 | ||
176 | #define VLISTNONE(vp) \ | |
177 | do { \ | |
178 | (vp)->v_freelist.tqe_next = (struct vnode *)0; \ | |
179 | (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \ | |
180 | } while(0) | |
181 | ||
182 | #define VONLIST(vp) \ | |
183 | ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb) | |
184 | ||
185 | /* remove a vnode from free vnode list */ | |
186 | #define VREMFREE(fun, vp) \ | |
187 | do { \ | |
188 | VLISTCHECK((fun), (vp), "free"); \ | |
189 | TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \ | |
190 | VLISTNONE((vp)); \ | |
191 | freevnodes--; \ | |
192 | } while(0) | |
193 | ||
194 | /* remove a vnode from inactive vnode list */ | |
195 | #define VREMINACTIVE(fun, vp) \ | |
196 | do { \ | |
197 | VLISTCHECK((fun), (vp), "inactive"); \ | |
198 | VINACTIVECHECK((fun), (vp), VUINACTIVE); \ | |
199 | TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \ | |
200 | CLR((vp)->v_flag, VUINACTIVE); \ | |
201 | VLISTNONE((vp)); \ | |
202 | inactivevnodes--; \ | |
203 | } while(0) | |
204 | ||
1c79356b A |
205 | /* |
206 | * Have to declare first two locks as actual data even if !MACH_SLOCKS, since | |
207 | * a pointers to them get passed around. | |
208 | */ | |
91447636 A |
209 | void * mntvnode_slock; |
210 | void * mntid_slock; | |
211 | void * spechash_slock; | |
1c79356b A |
212 | |
213 | /* | |
214 | * vnodetarget is the amount of vnodes we expect to get back | |
215 | * from the the inactive vnode list and VM object cache. | |
216 | * As vnreclaim() is a mainly cpu bound operation for faster | |
217 | * processers this number could be higher. | |
218 | * Having this number too high introduces longer delays in | |
91447636 | 219 | * the execution of new_vnode(). |
1c79356b A |
220 | */ |
221 | unsigned long vnodetarget; /* target for vnreclaim() */ | |
222 | #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */ | |
223 | ||
224 | /* | |
225 | * We need quite a few vnodes on the free list to sustain the | |
226 | * rapid stat() the compilation process does, and still benefit from the name | |
227 | * cache. Having too few vnodes on the free list causes serious disk | |
228 | * thrashing as we cycle through them. | |
229 | */ | |
0b4e3aa0 | 230 | #define VNODE_FREE_MIN 300 /* freelist should have at least these many */ |
1c79356b A |
231 | |
232 | /* | |
233 | * We need to get vnodes back from the VM object cache when a certain # | |
234 | * of vnodes are reused from the freelist. This is essential for the | |
235 | * caching to be effective in the namecache and the buffer cache [for the | |
236 | * metadata]. | |
237 | */ | |
238 | #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4) | |
239 | ||
240 | /* | |
241 | * If we have enough vnodes on the freelist we do not want to reclaim | |
242 | * the vnodes from the VM object cache. | |
243 | */ | |
244 | #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2)) | |
245 | ||
246 | /* | |
247 | * Initialize the vnode management data structures. | |
248 | */ | |
0b4e3aa0 | 249 | __private_extern__ void |
91447636 | 250 | vntblinit(void) |
1c79356b | 251 | { |
1c79356b | 252 | TAILQ_INIT(&vnode_free_list); |
1c79356b | 253 | TAILQ_INIT(&vnode_inactive_list); |
91447636 | 254 | TAILQ_INIT(&mountlist); |
1c79356b A |
255 | |
256 | if (!vnodetarget) | |
257 | vnodetarget = VNODE_FREE_TARGET; | |
258 | ||
259 | /* | |
260 | * Scale the vm_object_cache to accomodate the vnodes | |
261 | * we want to cache | |
262 | */ | |
263 | (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN); | |
264 | } | |
265 | ||
266 | /* Reset the VM Object Cache with the values passed in */ | |
0b4e3aa0 | 267 | __private_extern__ kern_return_t |
1c79356b A |
268 | reset_vmobjectcache(unsigned int val1, unsigned int val2) |
269 | { | |
270 | vm_size_t oval = val1 - VNODE_FREE_MIN; | |
9bccf70c A |
271 | vm_size_t nval; |
272 | ||
273 | if(val2 < VNODE_FREE_MIN) | |
274 | nval = 0; | |
275 | else | |
276 | nval = val2 - VNODE_FREE_MIN; | |
1c79356b A |
277 | |
278 | return(adjust_vm_object_cache(oval, nval)); | |
279 | } | |
280 | ||
91447636 A |
281 | |
282 | /* the timeout is in 10 msecs */ | |
1c79356b | 283 | int |
91447636 A |
284 | vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, char *msg) { |
285 | int error = 0; | |
286 | struct timespec ts; | |
1c79356b | 287 | |
91447636 A |
288 | KERNEL_DEBUG(0x3010280 | DBG_FUNC_START, (int)vp, output_target, vp->v_numoutput, 0, 0); |
289 | ||
290 | if (vp->v_numoutput > output_target) { | |
291 | ||
292 | slpflag &= ~PDROP; | |
293 | ||
294 | vnode_lock(vp); | |
295 | ||
296 | while ((vp->v_numoutput > output_target) && error == 0) { | |
297 | if (output_target) | |
298 | vp->v_flag |= VTHROTTLED; | |
299 | else | |
300 | vp->v_flag |= VBWAIT; | |
301 | ts.tv_sec = (slptimeout/100); | |
302 | ts.tv_nsec = (slptimeout % 1000) * 10 * NSEC_PER_USEC * 1000 ; | |
303 | error = msleep((caddr_t)&vp->v_numoutput, &vp->v_lock, (slpflag | (PRIBIO + 1)), msg, &ts); | |
304 | } | |
305 | vnode_unlock(vp); | |
1c79356b | 306 | } |
91447636 A |
307 | KERNEL_DEBUG(0x3010280 | DBG_FUNC_END, (int)vp, output_target, vp->v_numoutput, error, 0); |
308 | ||
309 | return error; | |
1c79356b A |
310 | } |
311 | ||
91447636 | 312 | |
1c79356b | 313 | void |
91447636 A |
314 | vnode_startwrite(vnode_t vp) { |
315 | ||
316 | OSAddAtomic(1, &vp->v_numoutput); | |
317 | } | |
318 | ||
319 | ||
320 | void | |
321 | vnode_writedone(vnode_t vp) | |
1c79356b | 322 | { |
91447636 A |
323 | if (vp) { |
324 | int need_wakeup = 0; | |
325 | ||
326 | OSAddAtomic(-1, &vp->v_numoutput); | |
327 | ||
328 | vnode_lock(vp); | |
1c79356b | 329 | |
91447636 A |
330 | if (vp->v_numoutput < 0) |
331 | panic("vnode_writedone: numoutput < 0"); | |
332 | ||
333 | if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput < (VNODE_ASYNC_THROTTLE / 3))) { | |
334 | vp->v_flag &= ~VTHROTTLED; | |
335 | need_wakeup = 1; | |
336 | } | |
337 | if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) { | |
338 | vp->v_flag &= ~VBWAIT; | |
339 | need_wakeup = 1; | |
340 | } | |
341 | vnode_unlock(vp); | |
342 | ||
343 | if (need_wakeup) | |
344 | wakeup((caddr_t)&vp->v_numoutput); | |
345 | } | |
1c79356b A |
346 | } |
347 | ||
91447636 A |
348 | |
349 | ||
1c79356b | 350 | int |
91447636 | 351 | vnode_hasdirtyblks(vnode_t vp) |
1c79356b | 352 | { |
91447636 | 353 | struct cl_writebehind *wbp; |
1c79356b | 354 | |
91447636 A |
355 | /* |
356 | * Not taking the buf_mtxp as there is little | |
357 | * point doing it. Even if the lock is taken the | |
358 | * state can change right after that. If their | |
359 | * needs to be a synchronization, it must be driven | |
360 | * by the caller | |
361 | */ | |
362 | if (vp->v_dirtyblkhd.lh_first) | |
363 | return (1); | |
364 | ||
365 | if (!UBCINFOEXISTS(vp)) | |
366 | return (0); | |
0b4e3aa0 | 367 | |
91447636 A |
368 | wbp = vp->v_ubcinfo->cl_wbehind; |
369 | ||
370 | if (wbp && (wbp->cl_number || wbp->cl_scmap)) | |
371 | return (1); | |
0b4e3aa0 | 372 | |
1c79356b A |
373 | return (0); |
374 | } | |
375 | ||
1c79356b | 376 | int |
91447636 | 377 | vnode_hascleanblks(vnode_t vp) |
1c79356b | 378 | { |
91447636 A |
379 | /* |
380 | * Not taking the buf_mtxp as there is little | |
381 | * point doing it. Even if the lock is taken the | |
382 | * state can change right after that. If their | |
383 | * needs to be a synchronization, it must be driven | |
384 | * by the caller | |
385 | */ | |
386 | if (vp->v_cleanblkhd.lh_first) | |
387 | return (1); | |
388 | return (0); | |
389 | } | |
1c79356b | 390 | |
91447636 A |
391 | void |
392 | vnode_iterate_setup(mount_t mp) | |
393 | { | |
394 | while (mp->mnt_lflag & MNT_LITER) { | |
395 | mp->mnt_lflag |= MNT_LITERWAIT; | |
396 | msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", 0); | |
1c79356b | 397 | } |
91447636 A |
398 | |
399 | mp->mnt_lflag |= MNT_LITER; | |
400 | ||
1c79356b A |
401 | } |
402 | ||
91447636 A |
403 | static int |
404 | vnode_umount_preflight(mount_t mp, vnode_t skipvp, int flags) | |
1c79356b | 405 | { |
91447636 | 406 | vnode_t vp; |
1c79356b | 407 | |
91447636 A |
408 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { |
409 | if (vp->v_type == VDIR) | |
410 | continue; | |
411 | if (vp == skipvp) | |
412 | continue; | |
413 | if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || | |
414 | (vp->v_flag & VNOFLUSH))) | |
415 | continue; | |
416 | if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) | |
417 | continue; | |
418 | if ((flags & WRITECLOSE) && | |
419 | (vp->v_writecount == 0 || vp->v_type != VREG)) | |
420 | continue; | |
421 | /* Look for busy vnode */ | |
422 | if (((vp->v_usecount != 0) && | |
423 | ((vp->v_usecount - vp->v_kusecount) != 0))) | |
424 | return(1); | |
1c79356b | 425 | } |
91447636 A |
426 | |
427 | return(0); | |
1c79356b A |
428 | } |
429 | ||
91447636 A |
430 | /* |
431 | * This routine prepares iteration by moving all the vnodes to worker queue | |
432 | * called with mount lock held | |
1c79356b | 433 | */ |
91447636 A |
434 | int |
435 | vnode_iterate_prepare(mount_t mp) | |
1c79356b | 436 | { |
91447636 | 437 | vnode_t vp; |
1c79356b | 438 | |
91447636 A |
439 | if (TAILQ_EMPTY(&mp->mnt_vnodelist)) { |
440 | /* nothing to do */ | |
441 | return (0); | |
442 | } | |
1c79356b | 443 | |
91447636 A |
444 | vp = TAILQ_FIRST(&mp->mnt_vnodelist); |
445 | vp->v_mntvnodes.tqe_prev = &(mp->mnt_workerqueue.tqh_first); | |
446 | mp->mnt_workerqueue.tqh_first = mp->mnt_vnodelist.tqh_first; | |
447 | mp->mnt_workerqueue.tqh_last = mp->mnt_vnodelist.tqh_last; | |
448 | ||
449 | TAILQ_INIT(&mp->mnt_vnodelist); | |
450 | if (mp->mnt_newvnodes.tqh_first != NULL) | |
451 | panic("vnode_iterate_prepare: newvnode when entering vnode"); | |
452 | TAILQ_INIT(&mp->mnt_newvnodes); | |
453 | ||
454 | return (1); | |
1c79356b A |
455 | } |
456 | ||
91447636 A |
457 | |
458 | /* called with mount lock held */ | |
459 | int | |
460 | vnode_iterate_reloadq(mount_t mp) | |
1c79356b | 461 | { |
91447636 A |
462 | int moved = 0; |
463 | ||
464 | /* add the remaining entries in workerq to the end of mount vnode list */ | |
465 | if (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
466 | struct vnode * mvp; | |
467 | mvp = TAILQ_LAST(&mp->mnt_vnodelist, vnodelst); | |
468 | ||
469 | /* Joining the workerque entities to mount vnode list */ | |
470 | if (mvp) | |
471 | mvp->v_mntvnodes.tqe_next = mp->mnt_workerqueue.tqh_first; | |
472 | else | |
473 | mp->mnt_vnodelist.tqh_first = mp->mnt_workerqueue.tqh_first; | |
474 | mp->mnt_workerqueue.tqh_first->v_mntvnodes.tqe_prev = mp->mnt_vnodelist.tqh_last; | |
475 | mp->mnt_vnodelist.tqh_last = mp->mnt_workerqueue.tqh_last; | |
476 | TAILQ_INIT(&mp->mnt_workerqueue); | |
477 | } | |
478 | ||
479 | /* add the newvnodes to the head of mount vnode list */ | |
480 | if (!TAILQ_EMPTY(&mp->mnt_newvnodes)) { | |
481 | struct vnode * nlvp; | |
482 | nlvp = TAILQ_LAST(&mp->mnt_newvnodes, vnodelst); | |
483 | ||
484 | mp->mnt_newvnodes.tqh_first->v_mntvnodes.tqe_prev = &mp->mnt_vnodelist.tqh_first; | |
485 | nlvp->v_mntvnodes.tqe_next = mp->mnt_vnodelist.tqh_first; | |
486 | if(mp->mnt_vnodelist.tqh_first) | |
487 | mp->mnt_vnodelist.tqh_first->v_mntvnodes.tqe_prev = &nlvp->v_mntvnodes.tqe_next; | |
488 | else | |
489 | mp->mnt_vnodelist.tqh_last = mp->mnt_newvnodes.tqh_last; | |
490 | mp->mnt_vnodelist.tqh_first = mp->mnt_newvnodes.tqh_first; | |
491 | TAILQ_INIT(&mp->mnt_newvnodes); | |
492 | moved = 1; | |
493 | } | |
1c79356b | 494 | |
91447636 | 495 | return(moved); |
1c79356b A |
496 | } |
497 | ||
1c79356b | 498 | |
91447636 A |
499 | void |
500 | vnode_iterate_clear(mount_t mp) | |
501 | { | |
502 | mp->mnt_lflag &= ~MNT_LITER; | |
503 | if (mp->mnt_lflag & MNT_LITERWAIT) { | |
504 | mp->mnt_lflag &= ~MNT_LITERWAIT; | |
505 | wakeup(mp); | |
506 | } | |
507 | } | |
1c79356b | 508 | |
1c79356b | 509 | |
1c79356b | 510 | int |
91447636 A |
511 | vnode_iterate(mp, flags, callout, arg) |
512 | mount_t mp; | |
513 | int flags; | |
514 | int (*callout)(struct vnode *, void *); | |
515 | void * arg; | |
1c79356b | 516 | { |
1c79356b | 517 | struct vnode *vp; |
91447636 A |
518 | int vid, retval; |
519 | int ret = 0; | |
1c79356b | 520 | |
91447636 | 521 | mount_lock(mp); |
1c79356b | 522 | |
91447636 | 523 | vnode_iterate_setup(mp); |
1c79356b | 524 | |
91447636 A |
525 | /* it is returns 0 then there is nothing to do */ |
526 | retval = vnode_iterate_prepare(mp); | |
1c79356b | 527 | |
91447636 A |
528 | if (retval == 0) { |
529 | vnode_iterate_clear(mp); | |
530 | mount_unlock(mp); | |
531 | return(ret); | |
1c79356b | 532 | } |
91447636 A |
533 | |
534 | /* iterate over all the vnodes */ | |
535 | while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
536 | vp = TAILQ_FIRST(&mp->mnt_workerqueue); | |
537 | TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); | |
538 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
539 | vid = vp->v_id; | |
540 | if ((vp->v_data == NULL) || (vp->v_type == VNON) || (vp->v_mount != mp)) { | |
541 | continue; | |
542 | } | |
543 | mount_unlock(mp); | |
1c79356b | 544 | |
91447636 A |
545 | if ( vget_internal(vp, vid, (flags | VNODE_NODEAD| VNODE_WITHID | VNODE_NOSUSPEND))) { |
546 | mount_lock(mp); | |
547 | continue; | |
548 | } | |
549 | if (flags & VNODE_RELOAD) { | |
550 | /* | |
551 | * we're reloading the filesystem | |
552 | * cast out any inactive vnodes... | |
553 | */ | |
554 | if (vnode_reload(vp)) { | |
555 | /* vnode will be recycled on the refcount drop */ | |
556 | vnode_put(vp); | |
557 | mount_lock(mp); | |
558 | continue; | |
559 | } | |
560 | } | |
55e303ae | 561 | |
91447636 A |
562 | retval = callout(vp, arg); |
563 | ||
564 | switch (retval) { | |
565 | case VNODE_RETURNED: | |
566 | case VNODE_RETURNED_DONE: | |
567 | vnode_put(vp); | |
568 | if (retval == VNODE_RETURNED_DONE) { | |
569 | mount_lock(mp); | |
570 | ret = 0; | |
571 | goto out; | |
572 | } | |
573 | break; | |
574 | ||
575 | case VNODE_CLAIMED_DONE: | |
576 | mount_lock(mp); | |
577 | ret = 0; | |
578 | goto out; | |
579 | case VNODE_CLAIMED: | |
580 | default: | |
581 | break; | |
582 | } | |
583 | mount_lock(mp); | |
55e303ae | 584 | } |
1c79356b | 585 | |
91447636 A |
586 | out: |
587 | (void)vnode_iterate_reloadq(mp); | |
588 | vnode_iterate_clear(mp); | |
589 | mount_unlock(mp); | |
590 | return (ret); | |
591 | } | |
55e303ae | 592 | |
91447636 A |
593 | void |
594 | mount_lock_renames(mount_t mp) | |
595 | { | |
596 | lck_mtx_lock(&mp->mnt_renamelock); | |
1c79356b A |
597 | } |
598 | ||
1c79356b | 599 | void |
91447636 | 600 | mount_unlock_renames(mount_t mp) |
1c79356b | 601 | { |
91447636 A |
602 | lck_mtx_unlock(&mp->mnt_renamelock); |
603 | } | |
1c79356b | 604 | |
91447636 A |
605 | void |
606 | mount_lock(mount_t mp) | |
607 | { | |
608 | lck_mtx_lock(&mp->mnt_mlock); | |
1c79356b A |
609 | } |
610 | ||
91447636 A |
611 | void |
612 | mount_unlock(mount_t mp) | |
fa4905b1 | 613 | { |
91447636 | 614 | lck_mtx_unlock(&mp->mnt_mlock); |
fa4905b1 A |
615 | } |
616 | ||
91447636 | 617 | |
1c79356b | 618 | void |
91447636 | 619 | mount_ref(mount_t mp, int locked) |
1c79356b | 620 | { |
91447636 A |
621 | if ( !locked) |
622 | mount_lock(mp); | |
623 | ||
624 | mp->mnt_count++; | |
625 | ||
626 | if ( !locked) | |
627 | mount_unlock(mp); | |
1c79356b A |
628 | } |
629 | ||
91447636 A |
630 | |
631 | void | |
632 | mount_drop(mount_t mp, int locked) | |
633 | { | |
634 | if ( !locked) | |
635 | mount_lock(mp); | |
636 | ||
637 | mp->mnt_count--; | |
638 | ||
639 | if (mp->mnt_count == 0 && (mp->mnt_lflag & MNT_LDRAIN)) | |
640 | wakeup(&mp->mnt_lflag); | |
641 | ||
642 | if ( !locked) | |
643 | mount_unlock(mp); | |
644 | } | |
645 | ||
646 | ||
1c79356b | 647 | int |
91447636 | 648 | mount_iterref(mount_t mp, int locked) |
1c79356b | 649 | { |
91447636 | 650 | int retval = 0; |
1c79356b | 651 | |
91447636 A |
652 | if (!locked) |
653 | mount_list_lock(); | |
654 | if (mp->mnt_iterref < 0) { | |
655 | retval = 1; | |
656 | } else { | |
657 | mp->mnt_iterref++; | |
1c79356b | 658 | } |
91447636 A |
659 | if (!locked) |
660 | mount_list_unlock(); | |
661 | return(retval); | |
662 | } | |
1c79356b | 663 | |
91447636 A |
664 | int |
665 | mount_isdrained(mount_t mp, int locked) | |
666 | { | |
667 | int retval; | |
1c79356b | 668 | |
91447636 A |
669 | if (!locked) |
670 | mount_list_lock(); | |
671 | if (mp->mnt_iterref < 0) | |
672 | retval = 1; | |
673 | else | |
674 | retval = 0; | |
675 | if (!locked) | |
676 | mount_list_unlock(); | |
677 | return(retval); | |
678 | } | |
679 | ||
680 | void | |
681 | mount_iterdrop(mount_t mp) | |
682 | { | |
683 | mount_list_lock(); | |
684 | mp->mnt_iterref--; | |
685 | wakeup(&mp->mnt_iterref); | |
686 | mount_list_unlock(); | |
687 | } | |
688 | ||
689 | void | |
690 | mount_iterdrain(mount_t mp) | |
691 | { | |
692 | mount_list_lock(); | |
693 | while (mp->mnt_iterref) | |
694 | msleep((caddr_t)&mp->mnt_iterref, mnt_list_mtx_lock, PVFS, "mount_iterdrain", 0 ); | |
695 | /* mount iterations drained */ | |
696 | mp->mnt_iterref = -1; | |
697 | mount_list_unlock(); | |
698 | } | |
699 | void | |
700 | mount_iterreset(mount_t mp) | |
701 | { | |
702 | mount_list_lock(); | |
703 | if (mp->mnt_iterref == -1) | |
704 | mp->mnt_iterref = 0; | |
705 | mount_list_unlock(); | |
706 | } | |
707 | ||
708 | /* always called with mount lock held */ | |
709 | int | |
710 | mount_refdrain(mount_t mp) | |
711 | { | |
712 | if (mp->mnt_lflag & MNT_LDRAIN) | |
713 | panic("already in drain"); | |
714 | mp->mnt_lflag |= MNT_LDRAIN; | |
715 | ||
716 | while (mp->mnt_count) | |
717 | msleep((caddr_t)&mp->mnt_lflag, &mp->mnt_mlock, PVFS, "mount_drain", 0 ); | |
718 | ||
719 | if (mp->mnt_vnodelist.tqh_first != NULL) | |
720 | panic("mount_refdrain: dangling vnode"); | |
721 | ||
722 | mp->mnt_lflag &= ~MNT_LDRAIN; | |
723 | ||
724 | return(0); | |
725 | } | |
726 | ||
727 | ||
728 | /* | |
729 | * Mark a mount point as busy. Used to synchronize access and to delay | |
730 | * unmounting. | |
731 | */ | |
732 | int | |
733 | vfs_busy(mount_t mp, int flags) | |
734 | { | |
735 | ||
736 | restart: | |
737 | if (mp->mnt_lflag & MNT_LDEAD) | |
738 | return(ENOENT); | |
739 | ||
740 | if (mp->mnt_lflag & MNT_LUNMOUNT) { | |
741 | if (flags & LK_NOWAIT) | |
742 | return (ENOENT); | |
743 | ||
744 | mount_lock(mp); | |
745 | ||
746 | if (mp->mnt_lflag & MNT_LDEAD) { | |
747 | mount_unlock(mp); | |
748 | return(ENOENT); | |
749 | } | |
750 | if (mp->mnt_lflag & MNT_LUNMOUNT) { | |
751 | mp->mnt_lflag |= MNT_LWAIT; | |
1c79356b | 752 | /* |
91447636 A |
753 | * Since all busy locks are shared except the exclusive |
754 | * lock granted when unmounting, the only place that a | |
755 | * wakeup needs to be done is at the release of the | |
756 | * exclusive lock at the end of dounmount. | |
1c79356b | 757 | */ |
91447636 A |
758 | msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", 0 ); |
759 | return (ENOENT); | |
1c79356b | 760 | } |
91447636 A |
761 | mount_unlock(mp); |
762 | } | |
763 | ||
764 | lck_rw_lock_shared(&mp->mnt_rwlock); | |
765 | ||
766 | /* | |
767 | * until we are granted the rwlock, it's possible for the mount point to | |
768 | * change state, so reevaluate before granting the vfs_busy | |
769 | */ | |
770 | if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { | |
771 | lck_rw_done(&mp->mnt_rwlock); | |
772 | goto restart; | |
1c79356b | 773 | } |
1c79356b A |
774 | return (0); |
775 | } | |
776 | ||
91447636 A |
777 | /* |
778 | * Free a busy filesystem. | |
779 | */ | |
780 | ||
781 | void | |
782 | vfs_unbusy(mount_t mp) | |
783 | { | |
784 | lck_rw_done(&mp->mnt_rwlock); | |
785 | } | |
786 | ||
787 | ||
788 | ||
789 | static void | |
790 | vfs_rootmountfailed(mount_t mp) { | |
791 | ||
792 | mount_list_lock(); | |
793 | mp->mnt_vtable->vfc_refcount--; | |
794 | mount_list_unlock(); | |
795 | ||
796 | vfs_unbusy(mp); | |
797 | ||
798 | mount_lock_destroy(mp); | |
799 | ||
800 | FREE_ZONE(mp, sizeof(struct mount), M_MOUNT); | |
801 | } | |
802 | ||
803 | /* | |
804 | * Lookup a filesystem type, and if found allocate and initialize | |
805 | * a mount structure for it. | |
806 | * | |
807 | * Devname is usually updated by mount(8) after booting. | |
808 | */ | |
809 | static mount_t | |
810 | vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) | |
811 | { | |
812 | mount_t mp; | |
813 | ||
814 | mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); | |
815 | bzero((char *)mp, (u_long)sizeof(struct mount)); | |
816 | ||
817 | /* Initialize the default IO constraints */ | |
818 | mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; | |
819 | mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; | |
820 | mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; | |
821 | mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; | |
822 | mp->mnt_devblocksize = DEV_BSIZE; | |
823 | ||
824 | mount_lock_init(mp); | |
825 | (void)vfs_busy(mp, LK_NOWAIT); | |
826 | ||
827 | TAILQ_INIT(&mp->mnt_vnodelist); | |
828 | TAILQ_INIT(&mp->mnt_workerqueue); | |
829 | TAILQ_INIT(&mp->mnt_newvnodes); | |
830 | ||
831 | mp->mnt_vtable = vfsp; | |
832 | mp->mnt_op = vfsp->vfc_vfsops; | |
833 | mp->mnt_flag = MNT_RDONLY | MNT_ROOTFS; | |
834 | mp->mnt_vnodecovered = NULLVP; | |
835 | //mp->mnt_stat.f_type = vfsp->vfc_typenum; | |
836 | mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; | |
837 | ||
838 | mount_list_lock(); | |
839 | vfsp->vfc_refcount++; | |
840 | mount_list_unlock(); | |
841 | ||
842 | strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); | |
843 | mp->mnt_vfsstat.f_mntonname[0] = '/'; | |
844 | (void) copystr((char *)devname, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0); | |
845 | ||
846 | return (mp); | |
847 | } | |
848 | ||
849 | errno_t | |
850 | vfs_rootmountalloc(const char *fstypename, const char *devname, mount_t *mpp) | |
851 | { | |
852 | struct vfstable *vfsp; | |
853 | ||
854 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) | |
855 | if (!strcmp(vfsp->vfc_name, fstypename)) | |
856 | break; | |
857 | if (vfsp == NULL) | |
858 | return (ENODEV); | |
859 | ||
860 | *mpp = vfs_rootmountalloc_internal(vfsp, devname); | |
861 | ||
862 | if (*mpp) | |
863 | return (0); | |
864 | ||
865 | return (ENOMEM); | |
866 | } | |
867 | ||
868 | ||
869 | /* | |
870 | * Find an appropriate filesystem to use for the root. If a filesystem | |
871 | * has not been preselected, walk through the list of known filesystems | |
872 | * trying those that have mountroot routines, and try them until one | |
873 | * works or we have tried them all. | |
874 | */ | |
875 | extern int (*mountroot)(void); | |
876 | ||
877 | int | |
878 | vfs_mountroot() | |
879 | { | |
880 | struct vfstable *vfsp; | |
881 | struct vfs_context context; | |
882 | int error; | |
883 | mount_t mp; | |
884 | ||
885 | if (mountroot != NULL) { | |
886 | /* | |
887 | * used for netboot which follows a different set of rules | |
888 | */ | |
889 | error = (*mountroot)(); | |
890 | return (error); | |
891 | } | |
892 | if ((error = bdevvp(rootdev, &rootvp))) { | |
893 | printf("vfs_mountroot: can't setup bdevvp\n"); | |
894 | return (error); | |
895 | } | |
896 | context.vc_proc = current_proc(); | |
897 | context.vc_ucred = kauth_cred_get(); | |
898 | ||
899 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { | |
900 | if (vfsp->vfc_mountroot == NULL) | |
901 | continue; | |
902 | ||
903 | mp = vfs_rootmountalloc_internal(vfsp, "root_device"); | |
904 | mp->mnt_devvp = rootvp; | |
905 | ||
906 | if ((error = (*vfsp->vfc_mountroot)(mp, rootvp, &context)) == 0) { | |
907 | mp->mnt_devvp->v_specflags |= SI_MOUNTEDON; | |
908 | ||
909 | vfs_unbusy(mp); | |
910 | ||
911 | mount_list_add(mp); | |
912 | ||
913 | /* | |
914 | * cache the IO attributes for the underlying physical media... | |
915 | * an error return indicates the underlying driver doesn't | |
916 | * support all the queries necessary... however, reasonable | |
917 | * defaults will have been set, so no reason to bail or care | |
918 | */ | |
919 | vfs_init_io_attributes(rootvp, mp); | |
920 | /* | |
921 | * get rid of iocount reference returned | |
922 | * by bdevvp... it will have also taken | |
923 | * a usecount reference which we want to keep | |
924 | */ | |
925 | vnode_put(rootvp); | |
926 | ||
927 | return (0); | |
928 | } | |
929 | vfs_rootmountfailed(mp); | |
930 | ||
931 | if (error != EINVAL) | |
932 | printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); | |
933 | } | |
934 | return (ENODEV); | |
935 | } | |
936 | ||
937 | /* | |
938 | * Lookup a mount point by filesystem identifier. | |
939 | */ | |
940 | extern mount_t vfs_getvfs_locked(fsid_t *); | |
941 | ||
942 | struct mount * | |
943 | vfs_getvfs(fsid) | |
944 | fsid_t *fsid; | |
945 | { | |
946 | return (mount_list_lookupby_fsid(fsid, 0, 0)); | |
947 | } | |
948 | ||
949 | struct mount * | |
950 | vfs_getvfs_locked(fsid) | |
951 | fsid_t *fsid; | |
952 | { | |
953 | return(mount_list_lookupby_fsid(fsid, 1, 0)); | |
954 | } | |
955 | ||
956 | struct mount * | |
957 | vfs_getvfs_by_mntonname(u_char *path) | |
958 | { | |
959 | mount_t retmp = (mount_t)0; | |
960 | mount_t mp; | |
961 | ||
962 | mount_list_lock(); | |
963 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
964 | if (!strcmp(mp->mnt_vfsstat.f_mntonname, path)) { | |
965 | retmp = mp; | |
966 | goto out; | |
967 | } | |
968 | } | |
969 | out: | |
970 | mount_list_unlock(); | |
971 | return (retmp); | |
972 | } | |
973 | ||
974 | /* generation number for creation of new fsids */ | |
975 | u_short mntid_gen = 0; | |
976 | /* | |
977 | * Get a new unique fsid | |
978 | */ | |
979 | void | |
980 | vfs_getnewfsid(mp) | |
981 | struct mount *mp; | |
982 | { | |
983 | ||
984 | fsid_t tfsid; | |
985 | int mtype; | |
986 | mount_t nmp; | |
987 | ||
988 | mount_list_lock(); | |
989 | ||
990 | /* generate a new fsid */ | |
991 | mtype = mp->mnt_vtable->vfc_typenum; | |
992 | if (++mntid_gen == 0) | |
993 | mntid_gen++; | |
994 | tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); | |
995 | tfsid.val[1] = mtype; | |
996 | ||
997 | TAILQ_FOREACH(nmp, &mountlist, mnt_list) { | |
998 | while (vfs_getvfs_locked(&tfsid)) { | |
999 | if (++mntid_gen == 0) | |
1000 | mntid_gen++; | |
1001 | tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); | |
1002 | } | |
1003 | } | |
1004 | mp->mnt_vfsstat.f_fsid.val[0] = tfsid.val[0]; | |
1005 | mp->mnt_vfsstat.f_fsid.val[1] = tfsid.val[1]; | |
1006 | mount_list_unlock(); | |
1007 | } | |
1008 | ||
1009 | /* | |
1010 | * Routines having to do with the management of the vnode table. | |
1011 | */ | |
1012 | extern int (**dead_vnodeop_p)(void *); | |
1013 | long numvnodes, freevnodes; | |
1014 | long inactivevnodes; | |
1015 | ||
1016 | ||
1017 | /* | |
1018 | * Move a vnode from one mount queue to another. | |
1019 | */ | |
1020 | static void | |
1021 | insmntque(vnode_t vp, mount_t mp) | |
1022 | { | |
1023 | mount_t lmp; | |
1024 | /* | |
1025 | * Delete from old mount point vnode list, if on one. | |
1026 | */ | |
3a60a9f5 | 1027 | if ( (lmp = vp->v_mount) != NULL && lmp != dead_mountp) { |
91447636 A |
1028 | if ((vp->v_lflag & VNAMED_MOUNT) == 0) |
1029 | panic("insmntque: vp not in mount vnode list"); | |
1030 | vp->v_lflag &= ~VNAMED_MOUNT; | |
1031 | ||
1032 | mount_lock(lmp); | |
1033 | ||
1034 | mount_drop(lmp, 1); | |
1035 | ||
1036 | if (vp->v_mntvnodes.tqe_next == NULL) { | |
1037 | if (TAILQ_LAST(&lmp->mnt_vnodelist, vnodelst) == vp) | |
1038 | TAILQ_REMOVE(&lmp->mnt_vnodelist, vp, v_mntvnodes); | |
1039 | else if (TAILQ_LAST(&lmp->mnt_newvnodes, vnodelst) == vp) | |
1040 | TAILQ_REMOVE(&lmp->mnt_newvnodes, vp, v_mntvnodes); | |
1041 | else if (TAILQ_LAST(&lmp->mnt_workerqueue, vnodelst) == vp) | |
1042 | TAILQ_REMOVE(&lmp->mnt_workerqueue, vp, v_mntvnodes); | |
1043 | } else { | |
1044 | vp->v_mntvnodes.tqe_next->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_prev; | |
1045 | *vp->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_next; | |
1046 | } | |
1047 | vp->v_mntvnodes.tqe_next = 0; | |
1048 | vp->v_mntvnodes.tqe_prev = 0; | |
1049 | mount_unlock(lmp); | |
1050 | return; | |
1051 | } | |
1052 | ||
1053 | /* | |
1054 | * Insert into list of vnodes for the new mount point, if available. | |
1055 | */ | |
1056 | if ((vp->v_mount = mp) != NULL) { | |
1057 | mount_lock(mp); | |
1058 | if ((vp->v_mntvnodes.tqe_next != 0) && (vp->v_mntvnodes.tqe_prev != 0)) | |
1059 | panic("vp already in mount list"); | |
1060 | if (mp->mnt_lflag & MNT_LITER) | |
1061 | TAILQ_INSERT_HEAD(&mp->mnt_newvnodes, vp, v_mntvnodes); | |
1062 | else | |
1063 | TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
1064 | if (vp->v_lflag & VNAMED_MOUNT) | |
1065 | panic("insmntque: vp already in mount vnode list"); | |
1066 | if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) | |
1067 | panic("insmntque: vp on the free list\n"); | |
1068 | vp->v_lflag |= VNAMED_MOUNT; | |
1069 | mount_ref(mp, 1); | |
1070 | mount_unlock(mp); | |
1071 | } | |
1072 | } | |
1073 | ||
1074 | ||
1c79356b A |
1075 | /* |
1076 | * Create a vnode for a block device. | |
1077 | * Used for root filesystem, argdev, and swap areas. | |
1078 | * Also used for memory file system special devices. | |
1079 | */ | |
1080 | int | |
91447636 | 1081 | bdevvp(dev_t dev, vnode_t *vpp) |
1c79356b | 1082 | { |
91447636 A |
1083 | vnode_t nvp; |
1084 | int error; | |
1085 | struct vnode_fsparam vfsp; | |
1086 | struct vfs_context context; | |
1c79356b A |
1087 | |
1088 | if (dev == NODEV) { | |
1089 | *vpp = NULLVP; | |
1090 | return (ENODEV); | |
1091 | } | |
91447636 A |
1092 | |
1093 | context.vc_proc = current_proc(); | |
1094 | context.vc_ucred = FSCRED; | |
1095 | ||
1096 | vfsp.vnfs_mp = (struct mount *)0; | |
1097 | vfsp.vnfs_vtype = VBLK; | |
1098 | vfsp.vnfs_str = "bdevvp"; | |
1099 | vfsp.vnfs_dvp = 0; | |
1100 | vfsp.vnfs_fsnode = 0; | |
1101 | vfsp.vnfs_cnp = 0; | |
1102 | vfsp.vnfs_vops = spec_vnodeop_p; | |
1103 | vfsp.vnfs_rdev = dev; | |
1104 | vfsp.vnfs_filesize = 0; | |
1105 | ||
1106 | vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; | |
1107 | ||
1108 | vfsp.vnfs_marksystem = 0; | |
1109 | vfsp.vnfs_markroot = 0; | |
1110 | ||
1111 | if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &nvp)) ) { | |
1c79356b A |
1112 | *vpp = NULLVP; |
1113 | return (error); | |
1114 | } | |
91447636 A |
1115 | if ( (error = vnode_ref(nvp)) ) { |
1116 | panic("bdevvp failed: vnode_ref"); | |
1117 | return (error); | |
1c79356b | 1118 | } |
91447636 A |
1119 | if ( (error = VNOP_FSYNC(nvp, MNT_WAIT, &context)) ) { |
1120 | panic("bdevvp failed: fsync"); | |
1121 | return (error); | |
1122 | } | |
1123 | if ( (error = buf_invalidateblks(nvp, BUF_WRITE_DATA, 0, 0)) ) { | |
1124 | panic("bdevvp failed: invalidateblks"); | |
1125 | return (error); | |
1126 | } | |
1127 | if ( (error = VNOP_OPEN(nvp, FREAD, &context)) ) { | |
1128 | panic("bdevvp failed: open"); | |
1129 | return (error); | |
1130 | } | |
1131 | *vpp = nvp; | |
1132 | ||
1c79356b A |
1133 | return (0); |
1134 | } | |
1135 | ||
1136 | /* | |
1137 | * Check to see if the new vnode represents a special device | |
1138 | * for which we already have a vnode (either because of | |
1139 | * bdevvp() or because of a different vnode representing | |
1140 | * the same block device). If such an alias exists, deallocate | |
1141 | * the existing contents and return the aliased vnode. The | |
1142 | * caller is responsible for filling it with its new contents. | |
1143 | */ | |
91447636 A |
1144 | static vnode_t |
1145 | checkalias(nvp, nvp_rdev) | |
1c79356b A |
1146 | register struct vnode *nvp; |
1147 | dev_t nvp_rdev; | |
1c79356b | 1148 | { |
1c79356b A |
1149 | struct vnode *vp; |
1150 | struct vnode **vpp; | |
91447636 | 1151 | int vid = 0; |
1c79356b | 1152 | |
1c79356b A |
1153 | vpp = &speclisth[SPECHASH(nvp_rdev)]; |
1154 | loop: | |
91447636 A |
1155 | SPECHASH_LOCK(); |
1156 | ||
1c79356b | 1157 | for (vp = *vpp; vp; vp = vp->v_specnext) { |
91447636 A |
1158 | if (nvp_rdev == vp->v_rdev && nvp->v_type == vp->v_type) { |
1159 | vid = vp->v_id; | |
1160 | break; | |
1161 | } | |
1162 | } | |
1163 | SPECHASH_UNLOCK(); | |
1164 | ||
1165 | if (vp) { | |
1166 | if (vnode_getwithvid(vp,vid)) { | |
1167 | goto loop; | |
1168 | } | |
1169 | /* | |
1170 | * Termination state is checked in vnode_getwithvid | |
1171 | */ | |
1172 | vnode_lock(vp); | |
1173 | ||
1c79356b A |
1174 | /* |
1175 | * Alias, but not in use, so flush it out. | |
1176 | */ | |
91447636 A |
1177 | if ((vp->v_iocount == 1) && (vp->v_usecount == 0)) { |
1178 | vnode_reclaim_internal(vp, 1, 0); | |
1179 | vnode_unlock(vp); | |
1180 | vnode_put(vp); | |
1c79356b A |
1181 | goto loop; |
1182 | } | |
1c79356b A |
1183 | } |
1184 | if (vp == NULL || vp->v_tag != VT_NON) { | |
91447636 A |
1185 | MALLOC_ZONE(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), |
1186 | M_SPECINFO, M_WAITOK); | |
1c79356b A |
1187 | bzero(nvp->v_specinfo, sizeof(struct specinfo)); |
1188 | nvp->v_rdev = nvp_rdev; | |
91447636 A |
1189 | nvp->v_specflags = 0; |
1190 | nvp->v_speclastr = -1; | |
1191 | ||
1192 | SPECHASH_LOCK(); | |
1c79356b A |
1193 | nvp->v_hashchain = vpp; |
1194 | nvp->v_specnext = *vpp; | |
1c79356b | 1195 | *vpp = nvp; |
91447636 A |
1196 | SPECHASH_UNLOCK(); |
1197 | ||
1c79356b A |
1198 | if (vp != NULLVP) { |
1199 | nvp->v_flag |= VALIASED; | |
1200 | vp->v_flag |= VALIASED; | |
91447636 A |
1201 | vnode_unlock(vp); |
1202 | vnode_put(vp); | |
1c79356b | 1203 | } |
1c79356b A |
1204 | return (NULLVP); |
1205 | } | |
1c79356b A |
1206 | return (vp); |
1207 | } | |
1208 | ||
91447636 | 1209 | |
1c79356b | 1210 | /* |
0b4e3aa0 A |
1211 | * Get a reference on a particular vnode and lock it if requested. |
1212 | * If the vnode was on the inactive list, remove it from the list. | |
1213 | * If the vnode was on the free list, remove it from the list and | |
1214 | * move it to inactive list as needed. | |
1215 | * The vnode lock bit is set if the vnode is being eliminated in | |
1216 | * vgone. The process is awakened when the transition is completed, | |
1217 | * and an error returned to indicate that the vnode is no longer | |
1218 | * usable (possibly having been changed to a new file system type). | |
1c79356b | 1219 | */ |
91447636 A |
1220 | static int |
1221 | vget_internal(vnode_t vp, int vid, int vflags) | |
1c79356b A |
1222 | { |
1223 | int error = 0; | |
55e303ae A |
1224 | u_long vpid; |
1225 | ||
91447636 | 1226 | vnode_lock(vp); |
55e303ae | 1227 | |
91447636 A |
1228 | if (vflags & VNODE_WITHID) |
1229 | vpid = vid; | |
1230 | else | |
1231 | vpid = vp->v_id; // save off the original v_id | |
0b4e3aa0 | 1232 | |
91447636 A |
1233 | if ((vflags & VNODE_WRITEABLE) && (vp->v_writecount == 0)) |
1234 | /* | |
1235 | * vnode to be returned only if it has writers opened | |
1236 | */ | |
1237 | error = EINVAL; | |
1238 | else | |
1239 | error = vnode_getiocount(vp, 1, vpid, vflags); | |
55e303ae | 1240 | |
91447636 | 1241 | vnode_unlock(vp); |
55e303ae | 1242 | |
0b4e3aa0 A |
1243 | return (error); |
1244 | } | |
1245 | ||
1c79356b | 1246 | int |
91447636 | 1247 | vnode_ref(vnode_t vp) |
1c79356b | 1248 | { |
1c79356b | 1249 | |
91447636 | 1250 | return (vnode_ref_ext(vp, 0)); |
1c79356b A |
1251 | } |
1252 | ||
1c79356b | 1253 | int |
91447636 | 1254 | vnode_ref_ext(vnode_t vp, int fmode) |
1c79356b | 1255 | { |
91447636 | 1256 | int error = 0; |
1c79356b | 1257 | |
91447636 | 1258 | vnode_lock(vp); |
1c79356b | 1259 | |
91447636 A |
1260 | /* |
1261 | * once all the current call sites have been fixed to insure they have | |
1262 | * taken an iocount, we can toughen this assert up and insist that the | |
1263 | * iocount is non-zero... a non-zero usecount doesn't insure correctness | |
1264 | */ | |
1265 | if (vp->v_iocount <= 0 && vp->v_usecount <= 0) | |
1266 | panic("vnode_ref_ext: vp %x has no valid reference %d, %d", vp, vp->v_iocount, vp->v_usecount); | |
1c79356b | 1267 | |
91447636 A |
1268 | /* |
1269 | * if you are the owner of drain/termination, can acquire usecount | |
1270 | */ | |
1271 | if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) { | |
1272 | if (vp->v_owner != current_thread()) { | |
1273 | error = ENOENT; | |
1274 | goto out; | |
1275 | } | |
1276 | } | |
1277 | vp->v_usecount++; | |
1c79356b | 1278 | |
91447636 A |
1279 | if (fmode & FWRITE) { |
1280 | if (++vp->v_writecount <= 0) | |
1281 | panic("vnode_ref_ext: v_writecount"); | |
55e303ae | 1282 | } |
91447636 A |
1283 | if (fmode & O_EVTONLY) { |
1284 | if (++vp->v_kusecount <= 0) | |
1285 | panic("vnode_ref_ext: v_kusecount"); | |
55e303ae | 1286 | } |
91447636 A |
1287 | out: |
1288 | vnode_unlock(vp); | |
1289 | ||
1290 | return (error); | |
55e303ae A |
1291 | } |
1292 | ||
1293 | ||
1c79356b A |
1294 | /* |
1295 | * put the vnode on appropriate free list. | |
91447636 | 1296 | * called with vnode LOCKED |
1c79356b A |
1297 | */ |
1298 | static void | |
91447636 | 1299 | vnode_list_add(vnode_t vp) |
1c79356b | 1300 | { |
55e303ae | 1301 | |
1c79356b | 1302 | /* |
91447636 | 1303 | * if it is already on a list or non zero references return |
1c79356b | 1304 | */ |
91447636 | 1305 | if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0)) |
1c79356b | 1306 | return; |
91447636 | 1307 | vnode_list_lock(); |
1c79356b | 1308 | |
91447636 A |
1309 | /* |
1310 | * insert at tail of LRU list or at head if VAGE or VL_DEAD is set | |
1311 | */ | |
1312 | if ((vp->v_flag & VAGE) || (vp->v_lflag & VL_DEAD)) { | |
1313 | TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); | |
1c79356b | 1314 | vp->v_flag &= ~VAGE; |
91447636 A |
1315 | } else { |
1316 | TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); | |
1317 | } | |
1c79356b | 1318 | freevnodes++; |
91447636 A |
1319 | |
1320 | vnode_list_unlock(); | |
1c79356b A |
1321 | } |
1322 | ||
1323 | /* | |
91447636 | 1324 | * remove the vnode from appropriate free list. |
1c79356b A |
1325 | */ |
1326 | static void | |
91447636 | 1327 | vnode_list_remove(vnode_t vp) |
1c79356b | 1328 | { |
91447636 A |
1329 | /* |
1330 | * we want to avoid taking the list lock | |
1331 | * in the case where we're not on the free | |
1332 | * list... this will be true for most | |
1333 | * directories and any currently in use files | |
1334 | * | |
1335 | * we're guaranteed that we can't go from | |
1336 | * the not-on-list state to the on-list | |
1337 | * state since we hold the vnode lock... | |
1338 | * all calls to vnode_list_add are done | |
1339 | * under the vnode lock... so we can | |
1340 | * check for that condition (the prevelant one) | |
1341 | * without taking the list lock | |
1342 | */ | |
1343 | if (VONLIST(vp)) { | |
1344 | vnode_list_lock(); | |
1345 | /* | |
1346 | * however, we're not guaranteed that | |
1347 | * we won't go from the on-list state | |
1348 | * to the non-on-list state until we | |
1349 | * hold the vnode_list_lock... this | |
1350 | * is due to new_vnode removing vnodes | |
1351 | * from the free list uder the list_lock | |
1352 | * w/o the vnode lock... so we need to | |
1353 | * check again whether we're currently | |
1354 | * on the free list | |
1355 | */ | |
1356 | if (VONLIST(vp)) { | |
1357 | VREMFREE("vnode_list_remove", vp); | |
1358 | VLISTNONE(vp); | |
1359 | } | |
1360 | vnode_list_unlock(); | |
1361 | } | |
1c79356b A |
1362 | } |
1363 | ||
1364 | ||
1c79356b | 1365 | void |
91447636 | 1366 | vnode_rele(vnode_t vp) |
1c79356b | 1367 | { |
91447636 A |
1368 | vnode_rele_internal(vp, 0, 0, 0); |
1369 | } | |
1c79356b | 1370 | |
1c79356b | 1371 | |
91447636 A |
1372 | void |
1373 | vnode_rele_ext(vnode_t vp, int fmode, int dont_reenter) | |
1374 | { | |
1375 | vnode_rele_internal(vp, fmode, dont_reenter, 0); | |
1c79356b A |
1376 | } |
1377 | ||
91447636 | 1378 | |
1c79356b | 1379 | void |
91447636 | 1380 | vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) |
1c79356b | 1381 | { |
91447636 | 1382 | struct vfs_context context; |
55e303ae | 1383 | |
91447636 A |
1384 | if ( !locked) |
1385 | vnode_lock(vp); | |
1c79356b | 1386 | |
91447636 A |
1387 | if (--vp->v_usecount < 0) |
1388 | panic("vnode_rele_ext: vp %x usecount -ve : %d", vp, vp->v_usecount); | |
1389 | ||
1390 | if (fmode & FWRITE) { | |
1391 | if (--vp->v_writecount < 0) | |
1392 | panic("vnode_rele_ext: vp %x writecount -ve : %d", vp, vp->v_writecount); | |
1c79356b | 1393 | } |
91447636 A |
1394 | if (fmode & O_EVTONLY) { |
1395 | if (--vp->v_kusecount < 0) | |
1396 | panic("vnode_rele_ext: vp %x kusecount -ve : %d", vp, vp->v_kusecount); | |
1c79356b | 1397 | } |
91447636 A |
1398 | if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) { |
1399 | /* | |
1400 | * vnode is still busy... if we're the last | |
1401 | * usecount, mark for a future call to VNOP_INACTIVE | |
1402 | * when the iocount finally drops to 0 | |
1403 | */ | |
1404 | if (vp->v_usecount == 0) { | |
1405 | vp->v_lflag |= VL_NEEDINACTIVE; | |
1406 | vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); | |
1407 | } | |
1408 | if ( !locked) | |
1409 | vnode_unlock(vp); | |
1c79356b A |
1410 | return; |
1411 | } | |
91447636 A |
1412 | vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); |
1413 | ||
1414 | if ( (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) || dont_reenter) { | |
1415 | /* | |
1416 | * vnode is being cleaned, or | |
1417 | * we've requested that we don't reenter | |
1418 | * the filesystem on this release... in | |
1419 | * this case, we'll mark the vnode aged | |
1420 | * if it's been marked for termination | |
1c79356b | 1421 | */ |
91447636 A |
1422 | if (dont_reenter) { |
1423 | if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) | |
1424 | vp->v_lflag |= VL_NEEDINACTIVE; | |
1425 | vp->v_flag |= VAGE; | |
1c79356b | 1426 | } |
91447636 A |
1427 | vnode_list_add(vp); |
1428 | if ( !locked) | |
1429 | vnode_unlock(vp); | |
1430 | return; | |
1c79356b | 1431 | } |
91447636 A |
1432 | /* |
1433 | * at this point both the iocount and usecount | |
1434 | * are zero | |
1435 | * pick up an iocount so that we can call | |
1436 | * VNOP_INACTIVE with the vnode lock unheld | |
1437 | */ | |
1438 | vp->v_iocount++; | |
1439 | #ifdef JOE_DEBUG | |
1440 | record_vp(vp, 1); | |
1c79356b | 1441 | #endif |
91447636 A |
1442 | vp->v_lflag &= ~VL_NEEDINACTIVE; |
1443 | vnode_unlock(vp); | |
1c79356b | 1444 | |
91447636 A |
1445 | context.vc_proc = current_proc(); |
1446 | context.vc_ucred = kauth_cred_get(); | |
1447 | VNOP_INACTIVE(vp, &context); | |
1c79356b | 1448 | |
91447636 A |
1449 | vnode_lock(vp); |
1450 | /* | |
1451 | * because we dropped the vnode lock to call VNOP_INACTIVE | |
1452 | * the state of the vnode may have changed... we may have | |
1453 | * picked up an iocount, usecount or the MARKTERM may have | |
1454 | * been set... we need to reevaluate the reference counts | |
1455 | * to determine if we can call vnode_reclaim_internal at | |
1456 | * this point... if the reference counts are up, we'll pick | |
1457 | * up the MARKTERM state when they get subsequently dropped | |
1458 | */ | |
1459 | if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && | |
1460 | ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { | |
1461 | struct uthread *ut; | |
1c79356b | 1462 | |
91447636 A |
1463 | ut = get_bsdthread_info(current_thread()); |
1464 | ||
1465 | if (ut->uu_defer_reclaims) { | |
1466 | vp->v_defer_reclaimlist = ut->uu_vreclaims; | |
1467 | ut->uu_vreclaims = vp; | |
1468 | goto defer_reclaim; | |
1469 | } | |
1470 | vnode_reclaim_internal(vp, 1, 0); | |
1471 | } | |
1472 | vnode_dropiocount(vp, 1); | |
1473 | vnode_list_add(vp); | |
1474 | defer_reclaim: | |
1475 | if ( !locked) | |
1476 | vnode_unlock(vp); | |
1477 | return; | |
1c79356b A |
1478 | } |
1479 | ||
1480 | /* | |
1481 | * Remove any vnodes in the vnode table belonging to mount point mp. | |
1482 | * | |
1483 | * If MNT_NOFORCE is specified, there should not be any active ones, | |
1484 | * return error if any are found (nb: this is a user error, not a | |
1485 | * system error). If MNT_FORCE is specified, detach any active vnodes | |
1486 | * that are found. | |
1487 | */ | |
1488 | #if DIAGNOSTIC | |
1489 | int busyprt = 0; /* print out busy vnodes */ | |
1490 | #if 0 | |
1491 | struct ctldebug debug1 = { "busyprt", &busyprt }; | |
1492 | #endif /* 0 */ | |
1493 | #endif | |
1494 | ||
1495 | int | |
1496 | vflush(mp, skipvp, flags) | |
1497 | struct mount *mp; | |
1498 | struct vnode *skipvp; | |
1499 | int flags; | |
1500 | { | |
0b4e3aa0 | 1501 | struct proc *p = current_proc(); |
91447636 | 1502 | struct vnode *vp; |
1c79356b | 1503 | int busy = 0; |
91447636 A |
1504 | int reclaimed = 0; |
1505 | int vid, retval; | |
1c79356b | 1506 | |
91447636 A |
1507 | mount_lock(mp); |
1508 | vnode_iterate_setup(mp); | |
1509 | /* | |
1510 | * On regular unmounts(not forced) do a | |
1511 | * quick check for vnodes to be in use. This | |
1512 | * preserves the caching of vnodes. automounter | |
1513 | * tries unmounting every so often to see whether | |
1514 | * it is still busy or not. | |
1515 | */ | |
1516 | if ((flags & FORCECLOSE)==0) { | |
1517 | if (vnode_umount_preflight(mp, skipvp, flags)) { | |
1518 | vnode_iterate_clear(mp); | |
1519 | mount_unlock(mp); | |
1520 | return(EBUSY); | |
1521 | } | |
1522 | } | |
1c79356b | 1523 | loop: |
91447636 A |
1524 | /* it is returns 0 then there is nothing to do */ |
1525 | retval = vnode_iterate_prepare(mp); | |
1526 | ||
1527 | if (retval == 0) { | |
1528 | vnode_iterate_clear(mp); | |
1529 | mount_unlock(mp); | |
1530 | return(retval); | |
1531 | } | |
1532 | ||
1533 | /* iterate over all the vnodes */ | |
1534 | while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
1535 | vp = TAILQ_FIRST(&mp->mnt_workerqueue); | |
1536 | TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); | |
1537 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
1538 | if ( (vp->v_mount != mp) || (vp == skipvp)) { | |
1539 | continue; | |
1540 | } | |
1541 | vid = vp->v_id; | |
1542 | mount_unlock(mp); | |
1543 | vnode_lock(vp); | |
1544 | ||
1545 | if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) { | |
1546 | vnode_unlock(vp); | |
1547 | mount_lock(mp); | |
1548 | continue; | |
1549 | } | |
1550 | ||
1c79356b | 1551 | /* |
91447636 A |
1552 | * If requested, skip over vnodes marked VSYSTEM. |
1553 | * Skip over all vnodes marked VNOFLUSH. | |
1554 | */ | |
1555 | if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || | |
1556 | (vp->v_flag & VNOFLUSH))) { | |
1557 | vnode_unlock(vp); | |
1558 | mount_lock(mp); | |
1c79356b | 1559 | continue; |
91447636 | 1560 | } |
1c79356b | 1561 | /* |
91447636 | 1562 | * If requested, skip over vnodes marked VSWAP. |
1c79356b | 1563 | */ |
91447636 A |
1564 | if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) { |
1565 | vnode_unlock(vp); | |
1566 | mount_lock(mp); | |
1c79356b A |
1567 | continue; |
1568 | } | |
1569 | /* | |
91447636 | 1570 | * If requested, skip over vnodes marked VSWAP. |
1c79356b | 1571 | */ |
91447636 A |
1572 | if ((flags & SKIPROOT) && (vp->v_flag & VROOT)) { |
1573 | vnode_unlock(vp); | |
1574 | mount_lock(mp); | |
1c79356b A |
1575 | continue; |
1576 | } | |
1577 | /* | |
1578 | * If WRITECLOSE is set, only flush out regular file | |
1579 | * vnodes open for writing. | |
1580 | */ | |
1581 | if ((flags & WRITECLOSE) && | |
1582 | (vp->v_writecount == 0 || vp->v_type != VREG)) { | |
91447636 A |
1583 | vnode_unlock(vp); |
1584 | mount_lock(mp); | |
1c79356b A |
1585 | continue; |
1586 | } | |
1587 | /* | |
91447636 | 1588 | * If the real usecount is 0, all we need to do is clear |
1c79356b A |
1589 | * out the vnode data structures and we are done. |
1590 | */ | |
91447636 A |
1591 | if (((vp->v_usecount == 0) || |
1592 | ((vp->v_usecount - vp->v_kusecount) == 0))) { | |
1593 | vp->v_iocount++; /* so that drain waits for * other iocounts */ | |
1594 | #ifdef JOE_DEBUG | |
1595 | record_vp(vp, 1); | |
1596 | #endif | |
1597 | vnode_reclaim_internal(vp, 1, 0); | |
1598 | vnode_dropiocount(vp, 1); | |
1599 | vnode_list_add(vp); | |
1600 | ||
1601 | vnode_unlock(vp); | |
1602 | reclaimed++; | |
1603 | mount_lock(mp); | |
1c79356b A |
1604 | continue; |
1605 | } | |
1606 | /* | |
1607 | * If FORCECLOSE is set, forcibly close the vnode. | |
1608 | * For block or character devices, revert to an | |
1609 | * anonymous device. For all other files, just kill them. | |
1610 | */ | |
1611 | if (flags & FORCECLOSE) { | |
1c79356b | 1612 | if (vp->v_type != VBLK && vp->v_type != VCHR) { |
91447636 A |
1613 | vp->v_iocount++; /* so that drain waits * for other iocounts */ |
1614 | #ifdef JOE_DEBUG | |
1615 | record_vp(vp, 1); | |
1616 | #endif | |
1617 | vnode_reclaim_internal(vp, 1, 0); | |
1618 | vnode_dropiocount(vp, 1); | |
1619 | vnode_list_add(vp); | |
1620 | vnode_unlock(vp); | |
1c79356b A |
1621 | } else { |
1622 | vclean(vp, 0, p); | |
91447636 | 1623 | vp->v_lflag &= ~VL_DEAD; |
1c79356b | 1624 | vp->v_op = spec_vnodeop_p; |
91447636 | 1625 | vnode_unlock(vp); |
1c79356b | 1626 | } |
91447636 | 1627 | mount_lock(mp); |
1c79356b A |
1628 | continue; |
1629 | } | |
1630 | #if DIAGNOSTIC | |
1631 | if (busyprt) | |
1632 | vprint("vflush: busy vnode", vp); | |
1633 | #endif | |
91447636 A |
1634 | vnode_unlock(vp); |
1635 | mount_lock(mp); | |
1c79356b A |
1636 | busy++; |
1637 | } | |
91447636 A |
1638 | |
1639 | /* At this point the worker queue is completed */ | |
1640 | if (busy && ((flags & FORCECLOSE)==0) && reclaimed) { | |
1641 | busy = 0; | |
1642 | reclaimed = 0; | |
1643 | (void)vnode_iterate_reloadq(mp); | |
1644 | /* returned with mount lock held */ | |
1645 | goto loop; | |
1646 | } | |
1647 | ||
1648 | /* if new vnodes were created in between retry the reclaim */ | |
1649 | if ( vnode_iterate_reloadq(mp) != 0) { | |
1650 | if (!(busy && ((flags & FORCECLOSE)==0))) | |
1651 | goto loop; | |
1652 | } | |
1653 | vnode_iterate_clear(mp); | |
1654 | mount_unlock(mp); | |
1655 | ||
9bccf70c | 1656 | if (busy && ((flags & FORCECLOSE)==0)) |
1c79356b A |
1657 | return (EBUSY); |
1658 | return (0); | |
1659 | } | |
1660 | ||
91447636 | 1661 | int num_recycledvnodes=0; |
1c79356b A |
1662 | /* |
1663 | * Disassociate the underlying file system from a vnode. | |
91447636 | 1664 | * The vnode lock is held on entry. |
1c79356b A |
1665 | */ |
1666 | static void | |
91447636 | 1667 | vclean(vnode_t vp, int flags, proc_t p) |
1c79356b | 1668 | { |
91447636 | 1669 | struct vfs_context context; |
1c79356b | 1670 | int active; |
91447636 A |
1671 | int need_inactive; |
1672 | int already_terminating; | |
1673 | kauth_cred_t ucred = NULL; | |
1c79356b | 1674 | |
91447636 A |
1675 | context.vc_proc = p; |
1676 | context.vc_ucred = kauth_cred_get(); | |
1c79356b A |
1677 | |
1678 | /* | |
1679 | * Check to see if the vnode is in use. | |
1680 | * If so we have to reference it before we clean it out | |
1681 | * so that its count cannot fall to zero and generate a | |
1682 | * race against ourselves to recycle it. | |
1683 | */ | |
91447636 | 1684 | active = vp->v_usecount; |
55e303ae | 1685 | |
91447636 A |
1686 | /* |
1687 | * just in case we missed sending a needed | |
1688 | * VNOP_INACTIVE, we'll do it now | |
1689 | */ | |
1690 | need_inactive = (vp->v_lflag & VL_NEEDINACTIVE); | |
1691 | ||
1692 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
55e303ae | 1693 | |
1c79356b A |
1694 | /* |
1695 | * Prevent the vnode from being recycled or | |
1696 | * brought into use while we clean it out. | |
1697 | */ | |
91447636 A |
1698 | already_terminating = (vp->v_lflag & VL_TERMINATE); |
1699 | ||
1700 | vp->v_lflag |= VL_TERMINATE; | |
1c79356b A |
1701 | |
1702 | /* | |
91447636 A |
1703 | * remove the vnode from any mount list |
1704 | * it might be on... | |
1c79356b | 1705 | */ |
91447636 | 1706 | insmntque(vp, (struct mount *)0); |
1c79356b | 1707 | |
91447636 A |
1708 | ucred = vp->v_cred; |
1709 | vp->v_cred = NULL; | |
1710 | ||
1711 | vnode_unlock(vp); | |
1712 | ||
1713 | if (ucred) | |
1714 | kauth_cred_rele(ucred); | |
1715 | ||
1716 | OSAddAtomic(1, &num_recycledvnodes); | |
1c79356b | 1717 | /* |
91447636 | 1718 | * purge from the name cache as early as possible... |
1c79356b | 1719 | */ |
91447636 | 1720 | cache_purge(vp); |
1c79356b | 1721 | |
0b4e3aa0 | 1722 | if (active && (flags & DOCLOSE)) |
91447636 | 1723 | VNOP_CLOSE(vp, IO_NDELAY, &context); |
1c79356b A |
1724 | |
1725 | /* | |
1726 | * Clean out any buffers associated with the vnode. | |
1727 | */ | |
1728 | if (flags & DOCLOSE) { | |
91447636 | 1729 | #if NFSCLIENT |
1c79356b | 1730 | if (vp->v_tag == VT_NFS) |
55e303ae A |
1731 | nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0); |
1732 | else | |
91447636 A |
1733 | #endif |
1734 | { | |
1735 | VNOP_FSYNC(vp, MNT_WAIT, &context); | |
1736 | buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
1737 | } | |
1738 | if (UBCINFOEXISTS(vp)) | |
1739 | /* | |
1740 | * Clean the pages in VM. | |
1741 | */ | |
1742 | (void)ubc_sync_range(vp, (off_t)0, ubc_getsize(vp), UBC_PUSHALL); | |
55e303ae | 1743 | } |
91447636 A |
1744 | if (UBCINFOEXISTS(vp)) |
1745 | cluster_release(vp->v_ubcinfo); | |
0b4e3aa0 | 1746 | |
91447636 A |
1747 | if (active || need_inactive) |
1748 | VNOP_INACTIVE(vp, &context); | |
0b4e3aa0 A |
1749 | |
1750 | /* Destroy ubc named reference */ | |
91447636 | 1751 | ubc_destroy_named(vp); |
0b4e3aa0 | 1752 | |
1c79356b A |
1753 | /* |
1754 | * Reclaim the vnode. | |
1755 | */ | |
91447636 | 1756 | if (VNOP_RECLAIM(vp, &context)) |
1c79356b | 1757 | panic("vclean: cannot reclaim"); |
55e303ae A |
1758 | |
1759 | // make sure the name & parent ptrs get cleaned out! | |
91447636 | 1760 | vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); |
55e303ae | 1761 | |
91447636 | 1762 | vnode_lock(vp); |
1c79356b | 1763 | |
91447636 | 1764 | vp->v_mount = dead_mountp; |
1c79356b A |
1765 | vp->v_op = dead_vnodeop_p; |
1766 | vp->v_tag = VT_NON; | |
91447636 | 1767 | vp->v_data = NULL; |
1c79356b | 1768 | |
91447636 | 1769 | vp->v_lflag |= VL_DEAD; |
55e303ae | 1770 | |
91447636 A |
1771 | if (already_terminating == 0) { |
1772 | vp->v_lflag &= ~VL_TERMINATE; | |
1773 | /* | |
1774 | * Done with purge, notify sleepers of the grim news. | |
1775 | */ | |
1776 | if (vp->v_lflag & VL_TERMWANT) { | |
1777 | vp->v_lflag &= ~VL_TERMWANT; | |
1778 | wakeup(&vp->v_lflag); | |
1779 | } | |
1c79356b A |
1780 | } |
1781 | } | |
1782 | ||
1783 | /* | |
1784 | * Eliminate all activity associated with the requested vnode | |
1785 | * and with all vnodes aliased to the requested vnode. | |
1786 | */ | |
1787 | int | |
91447636 | 1788 | vn_revoke(vnode_t vp, int flags, __unused vfs_context_t a_context) |
1c79356b | 1789 | { |
91447636 A |
1790 | struct vnode *vq; |
1791 | int vid; | |
1c79356b A |
1792 | |
1793 | #if DIAGNOSTIC | |
91447636 A |
1794 | if ((flags & REVOKEALL) == 0) |
1795 | panic("vnop_revoke"); | |
1c79356b A |
1796 | #endif |
1797 | ||
1c79356b A |
1798 | if (vp->v_flag & VALIASED) { |
1799 | /* | |
1800 | * If a vgone (or vclean) is already in progress, | |
1801 | * wait until it is done and return. | |
1802 | */ | |
91447636 A |
1803 | vnode_lock(vp); |
1804 | if (vp->v_lflag & VL_TERMINATE) { | |
1805 | vnode_unlock(vp); | |
1806 | return(ENOENT); | |
1c79356b | 1807 | } |
91447636 | 1808 | vnode_unlock(vp); |
1c79356b A |
1809 | /* |
1810 | * Ensure that vp will not be vgone'd while we | |
1811 | * are eliminating its aliases. | |
1812 | */ | |
91447636 | 1813 | SPECHASH_LOCK(); |
1c79356b | 1814 | while (vp->v_flag & VALIASED) { |
1c79356b A |
1815 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { |
1816 | if (vq->v_rdev != vp->v_rdev || | |
1817 | vq->v_type != vp->v_type || vp == vq) | |
1818 | continue; | |
91447636 A |
1819 | vid = vq->v_id; |
1820 | SPECHASH_UNLOCK(); | |
1821 | if (vnode_getwithvid(vq,vid)){ | |
1822 | SPECHASH_LOCK(); | |
1823 | break; | |
1824 | } | |
1825 | vnode_reclaim_internal(vq, 0, 0); | |
1826 | vnode_put(vq); | |
1827 | SPECHASH_LOCK(); | |
1c79356b A |
1828 | break; |
1829 | } | |
1c79356b | 1830 | } |
91447636 | 1831 | SPECHASH_UNLOCK(); |
1c79356b | 1832 | } |
91447636 A |
1833 | vnode_reclaim_internal(vp, 0, 0); |
1834 | ||
1c79356b A |
1835 | return (0); |
1836 | } | |
1837 | ||
1838 | /* | |
1839 | * Recycle an unused vnode to the front of the free list. | |
1840 | * Release the passed interlock if the vnode will be recycled. | |
1841 | */ | |
1842 | int | |
91447636 | 1843 | vnode_recycle(vp) |
1c79356b | 1844 | struct vnode *vp; |
1c79356b | 1845 | { |
91447636 | 1846 | vnode_lock(vp); |
1c79356b | 1847 | |
91447636 A |
1848 | if (vp->v_iocount || vp->v_usecount) { |
1849 | vp->v_lflag |= VL_MARKTERM; | |
1850 | vnode_unlock(vp); | |
1851 | return(0); | |
1852 | } | |
1853 | vnode_reclaim_internal(vp, 1, 0); | |
1854 | vnode_unlock(vp); | |
1855 | ||
1856 | return (1); | |
1c79356b A |
1857 | } |
1858 | ||
91447636 A |
1859 | static int |
1860 | vnode_reload(vnode_t vp) | |
1c79356b | 1861 | { |
91447636 | 1862 | vnode_lock(vp); |
1c79356b | 1863 | |
91447636 A |
1864 | if ((vp->v_iocount > 1) || vp->v_usecount) { |
1865 | vnode_unlock(vp); | |
1866 | return(0); | |
1867 | } | |
1868 | if (vp->v_iocount <= 0) | |
1869 | panic("vnode_reload with no iocount %d", vp->v_iocount); | |
1870 | ||
1871 | /* mark for release when iocount is dopped */ | |
1872 | vp->v_lflag |= VL_MARKTERM; | |
1873 | vnode_unlock(vp); | |
1874 | ||
1875 | return (1); | |
1c79356b A |
1876 | } |
1877 | ||
91447636 A |
1878 | |
1879 | static void | |
1880 | vgone(vnode_t vp) | |
1c79356b A |
1881 | { |
1882 | struct vnode *vq; | |
1883 | struct vnode *vx; | |
1884 | ||
1c79356b A |
1885 | /* |
1886 | * Clean out the filesystem specific data. | |
91447636 A |
1887 | * vclean also takes care of removing the |
1888 | * vnode from any mount list it might be on | |
1c79356b | 1889 | */ |
91447636 A |
1890 | vclean(vp, DOCLOSE, current_proc()); |
1891 | ||
1c79356b A |
1892 | /* |
1893 | * If special device, remove it from special device alias list | |
1894 | * if it is on one. | |
1895 | */ | |
1896 | if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { | |
91447636 A |
1897 | SPECHASH_LOCK(); |
1898 | if (*vp->v_hashchain == vp) { | |
1899 | *vp->v_hashchain = vp->v_specnext; | |
1900 | } else { | |
1901 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { | |
1902 | if (vq->v_specnext != vp) | |
1903 | continue; | |
1904 | vq->v_specnext = vp->v_specnext; | |
1905 | break; | |
1906 | } | |
1c79356b A |
1907 | if (vq == NULL) |
1908 | panic("missing bdev"); | |
1c79356b | 1909 | } |
91447636 A |
1910 | if (vp->v_flag & VALIASED) { |
1911 | vx = NULL; | |
1912 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { | |
1913 | if (vq->v_rdev != vp->v_rdev || | |
1914 | vq->v_type != vp->v_type) | |
1915 | continue; | |
1916 | if (vx) | |
1917 | break; | |
1918 | vx = vq; | |
1919 | } | |
1920 | if (vx == NULL) | |
1921 | panic("missing alias"); | |
1922 | if (vq == NULL) | |
1923 | vx->v_flag &= ~VALIASED; | |
1924 | vp->v_flag &= ~VALIASED; | |
1925 | } | |
1926 | SPECHASH_UNLOCK(); | |
1927 | { | |
1928 | struct specinfo *tmp = vp->v_specinfo; | |
1929 | vp->v_specinfo = NULL; | |
1930 | FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO); | |
1931 | } | |
1c79356b | 1932 | } |
1c79356b A |
1933 | } |
1934 | ||
1935 | /* | |
1936 | * Lookup a vnode by device number. | |
1937 | */ | |
1938 | int | |
91447636 | 1939 | check_mountedon(dev_t dev, enum vtype type, int *errorp) |
1c79356b | 1940 | { |
91447636 | 1941 | vnode_t vp; |
1c79356b | 1942 | int rc = 0; |
91447636 | 1943 | int vid; |
1c79356b | 1944 | |
91447636 A |
1945 | loop: |
1946 | SPECHASH_LOCK(); | |
1c79356b A |
1947 | for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { |
1948 | if (dev != vp->v_rdev || type != vp->v_type) | |
1949 | continue; | |
91447636 A |
1950 | vid = vp->v_id; |
1951 | SPECHASH_UNLOCK(); | |
1952 | if (vnode_getwithvid(vp,vid)) | |
1953 | goto loop; | |
1954 | vnode_lock(vp); | |
1955 | if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { | |
1956 | vnode_unlock(vp); | |
1957 | if ((*errorp = vfs_mountedon(vp)) != 0) | |
1958 | rc = 1; | |
1959 | } else | |
1960 | vnode_unlock(vp); | |
1961 | vnode_put(vp); | |
1962 | return(rc); | |
1c79356b | 1963 | } |
91447636 A |
1964 | SPECHASH_UNLOCK(); |
1965 | return (0); | |
1c79356b A |
1966 | } |
1967 | ||
1968 | /* | |
1969 | * Calculate the total number of references to a special device. | |
1970 | */ | |
1971 | int | |
91447636 | 1972 | vcount(vnode_t vp) |
1c79356b | 1973 | { |
91447636 | 1974 | vnode_t vq, vnext; |
1c79356b | 1975 | int count; |
91447636 | 1976 | int vid; |
1c79356b A |
1977 | |
1978 | loop: | |
1979 | if ((vp->v_flag & VALIASED) == 0) | |
91447636 A |
1980 | return (vp->v_usecount - vp->v_kusecount); |
1981 | ||
1982 | SPECHASH_LOCK(); | |
1c79356b A |
1983 | for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { |
1984 | vnext = vq->v_specnext; | |
1985 | if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) | |
1986 | continue; | |
91447636 A |
1987 | vid = vq->v_id; |
1988 | SPECHASH_UNLOCK(); | |
1989 | ||
1990 | if (vnode_getwithvid(vq, vid)) { | |
1991 | goto loop; | |
1992 | } | |
1c79356b A |
1993 | /* |
1994 | * Alias, but not in use, so flush it out. | |
1995 | */ | |
91447636 A |
1996 | vnode_lock(vq); |
1997 | if ((vq->v_usecount == 0) && (vq->v_iocount == 1) && vq != vp) { | |
1998 | vnode_reclaim_internal(vq, 1, 0); | |
1999 | vnode_unlock(vq); | |
2000 | vnode_put(vq); | |
1c79356b A |
2001 | goto loop; |
2002 | } | |
91447636 A |
2003 | count += (vq->v_usecount - vq->v_kusecount); |
2004 | vnode_unlock(vq); | |
2005 | vnode_put(vq); | |
2006 | ||
2007 | SPECHASH_LOCK(); | |
1c79356b | 2008 | } |
91447636 A |
2009 | SPECHASH_UNLOCK(); |
2010 | ||
1c79356b A |
2011 | return (count); |
2012 | } | |
2013 | ||
2014 | int prtactive = 0; /* 1 => print out reclaim of active vnodes */ | |
2015 | ||
2016 | /* | |
2017 | * Print out a description of a vnode. | |
2018 | */ | |
2019 | static char *typename[] = | |
2020 | { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; | |
2021 | ||
2022 | void | |
91447636 | 2023 | vprint(const char *label, struct vnode *vp) |
1c79356b | 2024 | { |
91447636 | 2025 | char sbuf[64]; |
1c79356b A |
2026 | |
2027 | if (label != NULL) | |
2028 | printf("%s: ", label); | |
91447636 A |
2029 | printf("type %s, usecount %d, writecount %d", |
2030 | typename[vp->v_type], vp->v_usecount, vp->v_writecount); | |
2031 | sbuf[0] = '\0'; | |
1c79356b | 2032 | if (vp->v_flag & VROOT) |
91447636 | 2033 | strcat(sbuf, "|VROOT"); |
1c79356b | 2034 | if (vp->v_flag & VTEXT) |
91447636 | 2035 | strcat(sbuf, "|VTEXT"); |
1c79356b | 2036 | if (vp->v_flag & VSYSTEM) |
91447636 | 2037 | strcat(sbuf, "|VSYSTEM"); |
9bccf70c | 2038 | if (vp->v_flag & VNOFLUSH) |
91447636 | 2039 | strcat(sbuf, "|VNOFLUSH"); |
1c79356b | 2040 | if (vp->v_flag & VBWAIT) |
91447636 | 2041 | strcat(sbuf, "|VBWAIT"); |
1c79356b | 2042 | if (vp->v_flag & VALIASED) |
91447636 A |
2043 | strcat(sbuf, "|VALIASED"); |
2044 | if (sbuf[0] != '\0') | |
2045 | printf(" flags (%s)", &sbuf[1]); | |
1c79356b A |
2046 | } |
2047 | ||
1c79356b | 2048 | |
91447636 A |
2049 | int |
2050 | vn_getpath(struct vnode *vp, char *pathbuf, int *len) | |
2051 | { | |
2052 | return build_path(vp, pathbuf, *len, len); | |
1c79356b | 2053 | } |
91447636 A |
2054 | |
2055 | ||
2056 | static char *extension_table=NULL; | |
2057 | static int nexts; | |
2058 | static int max_ext_width; | |
1c79356b | 2059 | |
55e303ae | 2060 | static int |
91447636 A |
2061 | extension_cmp(void *a, void *b) |
2062 | { | |
2063 | return (strlen((char *)a) - strlen((char *)b)); | |
2064 | } | |
55e303ae | 2065 | |
55e303ae | 2066 | |
91447636 A |
2067 | // |
2068 | // This is the api LaunchServices uses to inform the kernel | |
2069 | // the list of package extensions to ignore. | |
2070 | // | |
2071 | // Internally we keep the list sorted by the length of the | |
2072 | // the extension (from longest to shortest). We sort the | |
2073 | // list of extensions so that we can speed up our searches | |
2074 | // when comparing file names -- we only compare extensions | |
2075 | // that could possibly fit into the file name, not all of | |
2076 | // them (i.e. a short 8 character name can't have an 8 | |
2077 | // character extension). | |
2078 | // | |
2079 | __private_extern__ int | |
2080 | set_package_extensions_table(void *data, int nentries, int maxwidth) | |
2081 | { | |
2082 | char *new_exts, *ptr; | |
2083 | int error, i, len; | |
2084 | ||
2085 | if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) { | |
2086 | return EINVAL; | |
2087 | } | |
2088 | ||
2089 | MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK); | |
2090 | ||
2091 | error = copyin(CAST_USER_ADDR_T(data), new_exts, nentries * maxwidth); | |
2092 | if (error) { | |
2093 | FREE(new_exts, M_TEMP); | |
2094 | return error; | |
2095 | } | |
2096 | ||
2097 | if (extension_table) { | |
2098 | FREE(extension_table, M_TEMP); | |
2099 | } | |
2100 | extension_table = new_exts; | |
2101 | nexts = nentries; | |
2102 | max_ext_width = maxwidth; | |
2103 | ||
2104 | qsort(extension_table, nexts, maxwidth, extension_cmp); | |
2105 | ||
2106 | return 0; | |
2107 | } | |
2108 | ||
2109 | ||
2110 | __private_extern__ int | |
2111 | is_package_name(char *name, int len) | |
2112 | { | |
2113 | int i, extlen; | |
2114 | char *ptr, *name_ext; | |
2115 | ||
2116 | if (len <= 3) { | |
2117 | return 0; | |
2118 | } | |
2119 | ||
2120 | name_ext = NULL; | |
2121 | for(ptr=name; *ptr != '\0'; ptr++) { | |
2122 | if (*ptr == '.') { | |
2123 | name_ext = ptr; | |
55e303ae | 2124 | } |
91447636 | 2125 | } |
55e303ae | 2126 | |
91447636 A |
2127 | // if there is no "." extension, it can't match |
2128 | if (name_ext == NULL) { | |
2129 | return 0; | |
2130 | } | |
55e303ae | 2131 | |
91447636 A |
2132 | // advance over the "." |
2133 | name_ext++; | |
55e303ae | 2134 | |
91447636 A |
2135 | // now iterate over all the extensions to see if any match |
2136 | ptr = &extension_table[0]; | |
2137 | for(i=0; i < nexts; i++, ptr+=max_ext_width) { | |
2138 | extlen = strlen(ptr); | |
2139 | if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') { | |
2140 | // aha, a match! | |
2141 | return 1; | |
55e303ae A |
2142 | } |
2143 | } | |
2144 | ||
91447636 A |
2145 | // if we get here, no extension matched |
2146 | return 0; | |
55e303ae A |
2147 | } |
2148 | ||
91447636 A |
2149 | int |
2150 | vn_path_package_check(__unused vnode_t vp, char *path, int pathlen, int *component) | |
55e303ae | 2151 | { |
91447636 A |
2152 | char *ptr, *end; |
2153 | int comp=0; | |
2154 | ||
2155 | *component = -1; | |
2156 | if (*path != '/') { | |
2157 | return EINVAL; | |
2158 | } | |
2159 | ||
2160 | end = path + 1; | |
2161 | while(end < path + pathlen && *end != '\0') { | |
2162 | while(end < path + pathlen && *end == '/' && *end != '\0') { | |
2163 | end++; | |
2164 | } | |
2165 | ||
2166 | ptr = end; | |
2167 | ||
2168 | while(end < path + pathlen && *end != '/' && *end != '\0') { | |
2169 | end++; | |
2170 | } | |
2171 | ||
2172 | if (end > path + pathlen) { | |
2173 | // hmm, string wasn't null terminated | |
2174 | return EINVAL; | |
2175 | } | |
2176 | ||
2177 | *end = '\0'; | |
2178 | if (is_package_name(ptr, end - ptr)) { | |
2179 | *component = comp; | |
2180 | break; | |
2181 | } | |
55e303ae | 2182 | |
91447636 A |
2183 | end++; |
2184 | comp++; | |
2185 | } | |
2186 | ||
2187 | return 0; | |
2188 | } | |
55e303ae A |
2189 | |
2190 | ||
1c79356b A |
2191 | /* |
2192 | * Top level filesystem related information gathering. | |
2193 | */ | |
91447636 A |
2194 | extern unsigned int vfs_nummntops; |
2195 | ||
1c79356b | 2196 | int |
91447636 A |
2197 | vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, |
2198 | user_addr_t newp, size_t newlen, struct proc *p) | |
1c79356b | 2199 | { |
91447636 | 2200 | struct vfstable *vfsp; |
55e303ae A |
2201 | int *username; |
2202 | u_int usernamelen; | |
2203 | int error; | |
91447636 | 2204 | struct vfsconf *vfsc; |
1c79356b | 2205 | |
9bccf70c A |
2206 | /* |
2207 | * The VFS_NUMMNTOPS shouldn't be at name[0] since | |
2208 | * is a VFS generic variable. So now we must check | |
2209 | * namelen so we don't end up covering any UFS | |
2210 | * variables (sinc UFS vfc_typenum is 1). | |
2211 | * | |
2212 | * It should have been: | |
2213 | * name[0]: VFS_GENERIC | |
2214 | * name[1]: VFS_NUMMNTOPS | |
2215 | */ | |
2216 | if (namelen == 1 && name[0] == VFS_NUMMNTOPS) { | |
1c79356b A |
2217 | return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops)); |
2218 | } | |
2219 | ||
2220 | /* all sysctl names at this level are at least name and field */ | |
2221 | if (namelen < 2) | |
55e303ae | 2222 | return (EISDIR); /* overloaded */ |
1c79356b | 2223 | if (name[0] != VFS_GENERIC) { |
91447636 A |
2224 | struct vfs_context context; |
2225 | ||
1c79356b A |
2226 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) |
2227 | if (vfsp->vfc_typenum == name[0]) | |
2228 | break; | |
2229 | if (vfsp == NULL) | |
91447636 A |
2230 | return (ENOTSUP); |
2231 | context.vc_proc = p; | |
2232 | context.vc_ucred = kauth_cred_get(); | |
2233 | ||
1c79356b | 2234 | return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, |
91447636 | 2235 | oldp, oldlenp, newp, newlen, &context)); |
1c79356b A |
2236 | } |
2237 | switch (name[1]) { | |
2238 | case VFS_MAXTYPENUM: | |
2239 | return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); | |
2240 | case VFS_CONF: | |
2241 | if (namelen < 3) | |
2242 | return (ENOTDIR); /* overloaded */ | |
2243 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) | |
2244 | if (vfsp->vfc_typenum == name[2]) | |
2245 | break; | |
2246 | if (vfsp == NULL) | |
91447636 A |
2247 | return (ENOTSUP); |
2248 | vfsc = (struct vfsconf *)vfsp; | |
2249 | if (proc_is64bit(p)) { | |
2250 | struct user_vfsconf usr_vfsc; | |
2251 | usr_vfsc.vfc_vfsops = CAST_USER_ADDR_T(vfsc->vfc_vfsops); | |
2252 | bcopy(vfsc->vfc_name, usr_vfsc.vfc_name, sizeof(usr_vfsc.vfc_name)); | |
2253 | usr_vfsc.vfc_typenum = vfsc->vfc_typenum; | |
2254 | usr_vfsc.vfc_refcount = vfsc->vfc_refcount; | |
2255 | usr_vfsc.vfc_flags = vfsc->vfc_flags; | |
2256 | usr_vfsc.vfc_mountroot = CAST_USER_ADDR_T(vfsc->vfc_mountroot); | |
2257 | usr_vfsc.vfc_next = CAST_USER_ADDR_T(vfsc->vfc_next); | |
2258 | return (sysctl_rdstruct(oldp, oldlenp, newp, &usr_vfsc, | |
2259 | sizeof(usr_vfsc))); | |
2260 | } | |
2261 | else { | |
2262 | return (sysctl_rdstruct(oldp, oldlenp, newp, vfsc, | |
2263 | sizeof(struct vfsconf))); | |
2264 | } | |
2265 | ||
2266 | case VFS_SET_PACKAGE_EXTS: | |
2267 | return set_package_extensions_table((void *)name[1], name[2], name[3]); | |
1c79356b | 2268 | } |
55e303ae A |
2269 | /* |
2270 | * We need to get back into the general MIB, so we need to re-prepend | |
2271 | * CTL_VFS to our name and try userland_sysctl(). | |
2272 | */ | |
2273 | usernamelen = namelen + 1; | |
2274 | MALLOC(username, int *, usernamelen * sizeof(*username), | |
2275 | M_TEMP, M_WAITOK); | |
2276 | bcopy(name, username + 1, namelen * sizeof(*name)); | |
2277 | username[0] = CTL_VFS; | |
91447636 A |
2278 | error = userland_sysctl(p, username, usernamelen, oldp, |
2279 | oldlenp, 1, newp, newlen, oldlenp); | |
55e303ae A |
2280 | FREE(username, M_TEMP); |
2281 | return (error); | |
1c79356b A |
2282 | } |
2283 | ||
2284 | int kinfo_vdebug = 1; | |
2285 | #define KINFO_VNODESLOP 10 | |
2286 | /* | |
2287 | * Dump vnode list (via sysctl). | |
2288 | * Copyout address of vnode followed by vnode. | |
2289 | */ | |
2290 | /* ARGSUSED */ | |
2291 | int | |
91447636 | 2292 | sysctl_vnode(__unused user_addr_t where, __unused size_t *sizep) |
1c79356b | 2293 | { |
91447636 | 2294 | #if 0 |
1c79356b A |
2295 | struct mount *mp, *nmp; |
2296 | struct vnode *nvp, *vp; | |
2297 | char *bp = where, *savebp; | |
2298 | char *ewhere; | |
2299 | int error; | |
2300 | ||
2301 | #define VPTRSZ sizeof (struct vnode *) | |
2302 | #define VNODESZ sizeof (struct vnode) | |
2303 | if (where == NULL) { | |
2304 | *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); | |
2305 | return (0); | |
2306 | } | |
2307 | ewhere = where + *sizep; | |
2308 | ||
1c79356b | 2309 | for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { |
91447636 | 2310 | if (vfs_busy(mp, LK_NOWAIT)) { |
1c79356b A |
2311 | nmp = mp->mnt_list.cqe_next; |
2312 | continue; | |
2313 | } | |
2314 | savebp = bp; | |
2315 | again: | |
91447636 | 2316 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { |
1c79356b A |
2317 | /* |
2318 | * Check that the vp is still associated with | |
2319 | * this filesystem. RACE: could have been | |
2320 | * recycled onto the same filesystem. | |
2321 | */ | |
2322 | if (vp->v_mount != mp) { | |
1c79356b A |
2323 | if (kinfo_vdebug) |
2324 | printf("kinfo: vp changed\n"); | |
2325 | bp = savebp; | |
2326 | goto again; | |
2327 | } | |
1c79356b | 2328 | if (bp + VPTRSZ + VNODESZ > ewhere) { |
91447636 | 2329 | vfs_unbusy(mp); |
1c79356b A |
2330 | *sizep = bp - where; |
2331 | return (ENOMEM); | |
2332 | } | |
1c79356b | 2333 | if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || |
55e303ae | 2334 | (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) { |
91447636 | 2335 | vfs_unbusy(mp); |
1c79356b | 2336 | return (error); |
55e303ae | 2337 | } |
1c79356b | 2338 | bp += VPTRSZ + VNODESZ; |
1c79356b | 2339 | } |
1c79356b | 2340 | nmp = mp->mnt_list.cqe_next; |
91447636 | 2341 | vfs_unbusy(mp); |
1c79356b | 2342 | } |
1c79356b A |
2343 | |
2344 | *sizep = bp - where; | |
2345 | return (0); | |
91447636 A |
2346 | #else |
2347 | return(EINVAL); | |
2348 | #endif | |
1c79356b A |
2349 | } |
2350 | ||
2351 | /* | |
2352 | * Check to see if a filesystem is mounted on a block device. | |
2353 | */ | |
2354 | int | |
2355 | vfs_mountedon(vp) | |
2356 | struct vnode *vp; | |
2357 | { | |
2358 | struct vnode *vq; | |
2359 | int error = 0; | |
2360 | ||
91447636 A |
2361 | SPECHASH_LOCK(); |
2362 | if (vp->v_specflags & SI_MOUNTEDON) { | |
2363 | error = EBUSY; | |
2364 | goto out; | |
2365 | } | |
1c79356b | 2366 | if (vp->v_flag & VALIASED) { |
1c79356b A |
2367 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { |
2368 | if (vq->v_rdev != vp->v_rdev || | |
2369 | vq->v_type != vp->v_type) | |
2370 | continue; | |
2371 | if (vq->v_specflags & SI_MOUNTEDON) { | |
2372 | error = EBUSY; | |
2373 | break; | |
2374 | } | |
2375 | } | |
1c79356b | 2376 | } |
91447636 A |
2377 | out: |
2378 | SPECHASH_UNLOCK(); | |
1c79356b A |
2379 | return (error); |
2380 | } | |
2381 | ||
2382 | /* | |
2383 | * Unmount all filesystems. The list is traversed in reverse order | |
2384 | * of mounting to avoid dependencies. | |
2385 | */ | |
0b4e3aa0 | 2386 | __private_extern__ void |
1c79356b A |
2387 | vfs_unmountall() |
2388 | { | |
91447636 | 2389 | struct mount *mp; |
0b4e3aa0 | 2390 | struct proc *p = current_proc(); |
91447636 | 2391 | int error; |
1c79356b A |
2392 | |
2393 | /* | |
2394 | * Since this only runs when rebooting, it is not interlocked. | |
2395 | */ | |
91447636 A |
2396 | mount_list_lock(); |
2397 | while(!TAILQ_EMPTY(&mountlist)) { | |
2398 | mp = TAILQ_LAST(&mountlist, mntlist); | |
2399 | mount_list_unlock(); | |
2400 | error = dounmount(mp, MNT_FORCE, p); | |
2401 | if (error) { | |
2402 | mount_list_lock(); | |
2403 | TAILQ_REMOVE(&mountlist, mp, mnt_list); | |
2404 | printf("unmount of %s failed (", mp->mnt_vfsstat.f_mntonname); | |
2405 | if (error == EBUSY) | |
2406 | printf("BUSY)\n"); | |
2407 | else | |
2408 | printf("%d)\n", error); | |
2409 | continue; | |
1c79356b | 2410 | } |
91447636 | 2411 | mount_list_lock(); |
1c79356b | 2412 | } |
91447636 | 2413 | mount_list_unlock(); |
1c79356b A |
2414 | } |
2415 | ||
1c79356b | 2416 | |
91447636 A |
2417 | /* |
2418 | * This routine is called from vnode_pager_no_senders() | |
2419 | * which in turn can be called with vnode locked by vnode_uncache() | |
2420 | * But it could also get called as a result of vm_object_cache_trim(). | |
2421 | * In that case lock state is unknown. | |
2422 | * AGE the vnode so that it gets recycled quickly. | |
1c79356b | 2423 | */ |
91447636 A |
2424 | __private_extern__ void |
2425 | vnode_pager_vrele(struct vnode *vp) | |
1c79356b | 2426 | { |
91447636 | 2427 | vnode_lock(vp); |
1c79356b | 2428 | |
91447636 A |
2429 | if (!ISSET(vp->v_lflag, VL_TERMINATE)) |
2430 | panic("vnode_pager_vrele: vp not in termination"); | |
2431 | vp->v_lflag &= ~VNAMED_UBC; | |
1c79356b | 2432 | |
91447636 A |
2433 | if (UBCINFOEXISTS(vp)) { |
2434 | struct ubc_info *uip = vp->v_ubcinfo; | |
1c79356b | 2435 | |
91447636 A |
2436 | if (ISSET(uip->ui_flags, UI_WASMAPPED)) |
2437 | SET(vp->v_flag, VWASMAPPED); | |
2438 | vp->v_ubcinfo = UBC_INFO_NULL; | |
2439 | ||
2440 | ubc_info_deallocate(uip); | |
2441 | } else { | |
2442 | panic("NO ubcinfo in vnode_pager_vrele"); | |
1c79356b | 2443 | } |
91447636 A |
2444 | vnode_unlock(vp); |
2445 | ||
2446 | wakeup(&vp->v_lflag); | |
1c79356b A |
2447 | } |
2448 | ||
91447636 A |
2449 | |
2450 | #include <sys/disk.h> | |
2451 | ||
2452 | errno_t | |
2453 | vfs_init_io_attributes(vnode_t devvp, mount_t mp) | |
1c79356b | 2454 | { |
91447636 A |
2455 | int error; |
2456 | off_t readblockcnt; | |
2457 | off_t writeblockcnt; | |
2458 | off_t readmaxcnt; | |
2459 | off_t writemaxcnt; | |
2460 | off_t readsegcnt; | |
2461 | off_t writesegcnt; | |
2462 | off_t readsegsize; | |
2463 | off_t writesegsize; | |
2464 | u_long blksize; | |
2465 | u_int64_t temp; | |
2466 | struct vfs_context context; | |
1c79356b | 2467 | |
91447636 | 2468 | proc_t p = current_proc(); |
0b4e3aa0 | 2469 | |
91447636 A |
2470 | context.vc_proc = p; |
2471 | context.vc_ucred = kauth_cred_get(); | |
0b4e3aa0 | 2472 | |
55e303ae A |
2473 | int isvirtual = 0; |
2474 | /* | |
2475 | * determine if this mount point exists on the same device as the root | |
2476 | * partition... if so, then it comes under the hard throttle control | |
2477 | */ | |
2478 | int thisunit = -1; | |
2479 | static int rootunit = -1; | |
55e303ae A |
2480 | |
2481 | if (rootunit == -1) { | |
91447636 | 2482 | if (VNOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, &context)) |
55e303ae A |
2483 | rootunit = -1; |
2484 | else if (rootvp == devvp) | |
2485 | mp->mnt_kern_flag |= MNTK_ROOTDEV; | |
2486 | } | |
2487 | if (devvp != rootvp && rootunit != -1) { | |
91447636 | 2488 | if (VNOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, &context) == 0) { |
55e303ae A |
2489 | if (thisunit == rootunit) |
2490 | mp->mnt_kern_flag |= MNTK_ROOTDEV; | |
2491 | } | |
2492 | } | |
91447636 A |
2493 | /* |
2494 | * force the spec device to re-cache | |
2495 | * the underlying block size in case | |
2496 | * the filesystem overrode the initial value | |
2497 | */ | |
2498 | set_fsblocksize(devvp); | |
2499 | ||
2500 | ||
2501 | if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, | |
2502 | (caddr_t)&blksize, 0, &context))) | |
2503 | return (error); | |
2504 | ||
2505 | mp->mnt_devblocksize = blksize; | |
2506 | ||
2507 | if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, &context) == 0) { | |
55e303ae A |
2508 | if (isvirtual) |
2509 | mp->mnt_kern_flag |= MNTK_VIRTUALDEV; | |
2510 | } | |
2511 | ||
91447636 A |
2512 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, |
2513 | (caddr_t)&readblockcnt, 0, &context))) | |
0b4e3aa0 A |
2514 | return (error); |
2515 | ||
91447636 A |
2516 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, |
2517 | (caddr_t)&writeblockcnt, 0, &context))) | |
55e303ae A |
2518 | return (error); |
2519 | ||
91447636 A |
2520 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, |
2521 | (caddr_t)&readmaxcnt, 0, &context))) | |
55e303ae A |
2522 | return (error); |
2523 | ||
91447636 A |
2524 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, |
2525 | (caddr_t)&writemaxcnt, 0, &context))) | |
0b4e3aa0 A |
2526 | return (error); |
2527 | ||
91447636 A |
2528 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, |
2529 | (caddr_t)&readsegcnt, 0, &context))) | |
0b4e3aa0 A |
2530 | return (error); |
2531 | ||
91447636 A |
2532 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE, |
2533 | (caddr_t)&writesegcnt, 0, &context))) | |
55e303ae A |
2534 | return (error); |
2535 | ||
91447636 A |
2536 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD, |
2537 | (caddr_t)&readsegsize, 0, &context))) | |
55e303ae A |
2538 | return (error); |
2539 | ||
91447636 A |
2540 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, |
2541 | (caddr_t)&writesegsize, 0, &context))) | |
0b4e3aa0 A |
2542 | return (error); |
2543 | ||
55e303ae A |
2544 | if (readmaxcnt) |
2545 | temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt; | |
2546 | else { | |
2547 | if (readblockcnt) { | |
2548 | temp = readblockcnt * blksize; | |
2549 | temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; | |
2550 | } else | |
2551 | temp = MAXPHYS; | |
2552 | } | |
91447636 | 2553 | mp->mnt_maxreadcnt = (u_int32_t)temp; |
55e303ae A |
2554 | |
2555 | if (writemaxcnt) | |
2556 | temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt; | |
2557 | else { | |
2558 | if (writeblockcnt) { | |
2559 | temp = writeblockcnt * blksize; | |
2560 | temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; | |
2561 | } else | |
2562 | temp = MAXPHYS; | |
2563 | } | |
0b4e3aa0 A |
2564 | mp->mnt_maxwritecnt = (u_int32_t)temp; |
2565 | ||
55e303ae A |
2566 | if (readsegcnt) { |
2567 | temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt; | |
2568 | mp->mnt_segreadcnt = (u_int16_t)temp; | |
2569 | } | |
2570 | if (writesegcnt) { | |
2571 | temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt; | |
2572 | mp->mnt_segwritecnt = (u_int16_t)temp; | |
2573 | } | |
2574 | if (readsegsize) | |
2575 | temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize; | |
2576 | else | |
2577 | temp = mp->mnt_maxreadcnt; | |
91447636 | 2578 | mp->mnt_maxsegreadsize = (u_int32_t)temp; |
0b4e3aa0 | 2579 | |
55e303ae A |
2580 | if (writesegsize) |
2581 | temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize; | |
2582 | else | |
2583 | temp = mp->mnt_maxwritecnt; | |
91447636 | 2584 | mp->mnt_maxsegwritesize = (u_int32_t)temp; |
0b4e3aa0 | 2585 | |
55e303ae A |
2586 | return (error); |
2587 | } | |
2588 | ||
2589 | static struct klist fs_klist; | |
2590 | ||
2591 | void | |
2592 | vfs_event_init(void) | |
2593 | { | |
2594 | ||
2595 | klist_init(&fs_klist); | |
2596 | } | |
2597 | ||
2598 | void | |
91447636 | 2599 | vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data) |
55e303ae A |
2600 | { |
2601 | ||
2602 | KNOTE(&fs_klist, event); | |
2603 | } | |
2604 | ||
2605 | /* | |
2606 | * return the number of mounted filesystems. | |
2607 | */ | |
2608 | static int | |
2609 | sysctl_vfs_getvfscnt(void) | |
2610 | { | |
91447636 A |
2611 | return(mount_getvfscnt()); |
2612 | } | |
2613 | ||
0b4e3aa0 | 2614 | |
91447636 A |
2615 | static int |
2616 | mount_getvfscnt(void) | |
2617 | { | |
2618 | int ret; | |
2619 | ||
2620 | mount_list_lock(); | |
2621 | ret = nummounts; | |
2622 | mount_list_unlock(); | |
55e303ae | 2623 | return (ret); |
91447636 A |
2624 | |
2625 | } | |
2626 | ||
2627 | ||
2628 | ||
2629 | static int | |
2630 | mount_fillfsids(fsid_t *fsidlst, int count) | |
2631 | { | |
2632 | struct mount *mp; | |
2633 | int actual=0; | |
2634 | ||
2635 | actual = 0; | |
2636 | mount_list_lock(); | |
2637 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
2638 | if (actual <= count) { | |
2639 | fsidlst[actual] = mp->mnt_vfsstat.f_fsid; | |
2640 | actual++; | |
2641 | } | |
2642 | } | |
2643 | mount_list_unlock(); | |
2644 | return (actual); | |
2645 | ||
55e303ae A |
2646 | } |
2647 | ||
2648 | /* | |
2649 | * fill in the array of fsid_t's up to a max of 'count', the actual | |
2650 | * number filled in will be set in '*actual'. If there are more fsid_t's | |
2651 | * than room in fsidlst then ENOMEM will be returned and '*actual' will | |
2652 | * have the actual count. | |
2653 | * having *actual filled out even in the error case is depended upon. | |
2654 | */ | |
2655 | static int | |
2656 | sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual) | |
2657 | { | |
2658 | struct mount *mp; | |
2659 | ||
2660 | *actual = 0; | |
91447636 A |
2661 | mount_list_lock(); |
2662 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
55e303ae A |
2663 | (*actual)++; |
2664 | if (*actual <= count) | |
91447636 | 2665 | fsidlst[(*actual) - 1] = mp->mnt_vfsstat.f_fsid; |
55e303ae | 2666 | } |
91447636 | 2667 | mount_list_unlock(); |
55e303ae A |
2668 | return (*actual <= count ? 0 : ENOMEM); |
2669 | } | |
2670 | ||
2671 | static int | |
2672 | sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS | |
2673 | { | |
2674 | int actual, error; | |
2675 | size_t space; | |
2676 | fsid_t *fsidlst; | |
2677 | ||
2678 | /* This is a readonly node. */ | |
91447636 | 2679 | if (req->newptr != USER_ADDR_NULL) |
55e303ae A |
2680 | return (EPERM); |
2681 | ||
2682 | /* they are querying us so just return the space required. */ | |
91447636 | 2683 | if (req->oldptr == USER_ADDR_NULL) { |
55e303ae A |
2684 | req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t); |
2685 | return 0; | |
2686 | } | |
2687 | again: | |
2688 | /* | |
2689 | * Retrieve an accurate count of the amount of space required to copy | |
2690 | * out all the fsids in the system. | |
2691 | */ | |
2692 | space = req->oldlen; | |
2693 | req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t); | |
2694 | ||
2695 | /* they didn't give us enough space. */ | |
2696 | if (space < req->oldlen) | |
2697 | return (ENOMEM); | |
2698 | ||
2699 | MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK); | |
2700 | error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t), | |
2701 | &actual); | |
2702 | /* | |
2703 | * If we get back ENOMEM, then another mount has been added while we | |
2704 | * slept in malloc above. If this is the case then try again. | |
2705 | */ | |
2706 | if (error == ENOMEM) { | |
2707 | FREE(fsidlst, M_TEMP); | |
2708 | req->oldlen = space; | |
2709 | goto again; | |
2710 | } | |
2711 | if (error == 0) { | |
2712 | error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t)); | |
2713 | } | |
2714 | FREE(fsidlst, M_TEMP); | |
2715 | return (error); | |
2716 | } | |
2717 | ||
2718 | /* | |
2719 | * Do a sysctl by fsid. | |
2720 | */ | |
2721 | static int | |
2722 | sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS | |
2723 | { | |
2724 | struct vfsidctl vc; | |
91447636 | 2725 | struct user_vfsidctl user_vc; |
55e303ae | 2726 | struct mount *mp; |
91447636 | 2727 | struct vfsstatfs *sp; |
55e303ae A |
2728 | struct proc *p; |
2729 | int *name; | |
2730 | int error, flags, namelen; | |
91447636 A |
2731 | struct vfs_context context; |
2732 | boolean_t is_64_bit; | |
55e303ae A |
2733 | |
2734 | name = arg1; | |
2735 | namelen = arg2; | |
2736 | p = req->p; | |
91447636 A |
2737 | context.vc_proc = p; |
2738 | context.vc_ucred = kauth_cred_get(); | |
2739 | is_64_bit = proc_is64bit(p); | |
55e303ae | 2740 | |
91447636 A |
2741 | if (is_64_bit) { |
2742 | error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); | |
2743 | if (error) | |
2744 | return (error); | |
2745 | if (user_vc.vc_vers != VFS_CTL_VERS1) | |
2746 | return (EINVAL); | |
2747 | mp = mount_list_lookupby_fsid(&user_vc.vc_fsid, 0, 0); | |
2748 | } | |
2749 | else { | |
2750 | error = SYSCTL_IN(req, &vc, sizeof(vc)); | |
2751 | if (error) | |
2752 | return (error); | |
2753 | if (vc.vc_vers != VFS_CTL_VERS1) | |
2754 | return (EINVAL); | |
2755 | mp = mount_list_lookupby_fsid(&vc.vc_fsid, 0, 0); | |
2756 | } | |
55e303ae A |
2757 | if (mp == NULL) |
2758 | return (ENOENT); | |
2759 | /* reset so that the fs specific code can fetch it. */ | |
2760 | req->newidx = 0; | |
2761 | /* | |
2762 | * Note if this is a VFS_CTL then we pass the actual sysctl req | |
2763 | * in for "oldp" so that the lower layer can DTRT and use the | |
2764 | * SYSCTL_IN/OUT routines. | |
2765 | */ | |
2766 | if (mp->mnt_op->vfs_sysctl != NULL) { | |
91447636 A |
2767 | if (is_64_bit) { |
2768 | if (vfs_64bitready(mp)) { | |
2769 | error = mp->mnt_op->vfs_sysctl(name, namelen, | |
2770 | CAST_USER_ADDR_T(req), | |
2771 | NULL, USER_ADDR_NULL, 0, | |
2772 | &context); | |
2773 | } | |
2774 | else { | |
2775 | error = ENOTSUP; | |
2776 | } | |
2777 | } | |
2778 | else { | |
2779 | error = mp->mnt_op->vfs_sysctl(name, namelen, | |
2780 | CAST_USER_ADDR_T(req), | |
2781 | NULL, USER_ADDR_NULL, 0, | |
2782 | &context); | |
2783 | } | |
2784 | if (error != ENOTSUP) | |
55e303ae A |
2785 | return (error); |
2786 | } | |
2787 | switch (name[0]) { | |
2788 | case VFS_CTL_UMOUNT: | |
91447636 A |
2789 | req->newidx = 0; |
2790 | if (is_64_bit) { | |
2791 | req->newptr = user_vc.vc_ptr; | |
2792 | req->newlen = (size_t)user_vc.vc_len; | |
2793 | } | |
2794 | else { | |
2795 | req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); | |
2796 | req->newlen = vc.vc_len; | |
2797 | } | |
55e303ae A |
2798 | error = SYSCTL_IN(req, &flags, sizeof(flags)); |
2799 | if (error) | |
2800 | break; | |
2801 | error = safedounmount(mp, flags, p); | |
2802 | break; | |
2803 | case VFS_CTL_STATFS: | |
91447636 A |
2804 | req->newidx = 0; |
2805 | if (is_64_bit) { | |
2806 | req->newptr = user_vc.vc_ptr; | |
2807 | req->newlen = (size_t)user_vc.vc_len; | |
2808 | } | |
2809 | else { | |
2810 | req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); | |
2811 | req->newlen = vc.vc_len; | |
2812 | } | |
55e303ae A |
2813 | error = SYSCTL_IN(req, &flags, sizeof(flags)); |
2814 | if (error) | |
2815 | break; | |
91447636 | 2816 | sp = &mp->mnt_vfsstat; |
55e303ae | 2817 | if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) && |
91447636 | 2818 | (error = vfs_update_vfsstat(mp, &context))) |
55e303ae | 2819 | return (error); |
91447636 A |
2820 | if (is_64_bit) { |
2821 | struct user_statfs sfs; | |
2822 | bzero(&sfs, sizeof(sfs)); | |
2823 | sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; | |
2824 | sfs.f_type = mp->mnt_vtable->vfc_typenum; | |
2825 | sfs.f_bsize = (user_long_t)sp->f_bsize; | |
2826 | sfs.f_iosize = (user_long_t)sp->f_iosize; | |
2827 | sfs.f_blocks = (user_long_t)sp->f_blocks; | |
2828 | sfs.f_bfree = (user_long_t)sp->f_bfree; | |
2829 | sfs.f_bavail = (user_long_t)sp->f_bavail; | |
2830 | sfs.f_files = (user_long_t)sp->f_files; | |
2831 | sfs.f_ffree = (user_long_t)sp->f_ffree; | |
2832 | sfs.f_fsid = sp->f_fsid; | |
2833 | sfs.f_owner = sp->f_owner; | |
2834 | ||
2835 | strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); | |
2836 | strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); | |
2837 | strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); | |
2838 | ||
2839 | error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); | |
2840 | } | |
2841 | else { | |
2842 | struct statfs sfs; | |
2843 | bzero(&sfs, sizeof(struct statfs)); | |
2844 | sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; | |
2845 | sfs.f_type = mp->mnt_vtable->vfc_typenum; | |
2846 | ||
2847 | /* | |
2848 | * It's possible for there to be more than 2^^31 blocks in the filesystem, so we | |
2849 | * have to fudge the numbers here in that case. We inflate the blocksize in order | |
2850 | * to reflect the filesystem size as best we can. | |
2851 | */ | |
2852 | if (sp->f_blocks > LONG_MAX) { | |
2853 | int shift; | |
2854 | ||
2855 | /* | |
2856 | * Work out how far we have to shift the block count down to make it fit. | |
2857 | * Note that it's possible to have to shift so far that the resulting | |
2858 | * blocksize would be unreportably large. At that point, we will clip | |
2859 | * any values that don't fit. | |
2860 | * | |
2861 | * For safety's sake, we also ensure that f_iosize is never reported as | |
2862 | * being smaller than f_bsize. | |
2863 | */ | |
2864 | for (shift = 0; shift < 32; shift++) { | |
2865 | if ((sp->f_blocks >> shift) <= LONG_MAX) | |
2866 | break; | |
2867 | if ((sp->f_bsize << (shift + 1)) > LONG_MAX) | |
2868 | break; | |
2869 | } | |
2870 | #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) | |
2871 | sfs.f_blocks = (long)__SHIFT_OR_CLIP(sp->f_blocks, shift); | |
2872 | sfs.f_bfree = (long)__SHIFT_OR_CLIP(sp->f_bfree, shift); | |
2873 | sfs.f_bavail = (long)__SHIFT_OR_CLIP(sp->f_bavail, shift); | |
2874 | #undef __SHIFT_OR_CLIP | |
2875 | sfs.f_bsize = (long)(sp->f_bsize << shift); | |
2876 | sfs.f_iosize = lmax(sp->f_iosize, sp->f_bsize); | |
2877 | } else { | |
2878 | sfs.f_bsize = (long)sp->f_bsize; | |
2879 | sfs.f_iosize = (long)sp->f_iosize; | |
2880 | sfs.f_blocks = (long)sp->f_blocks; | |
2881 | sfs.f_bfree = (long)sp->f_bfree; | |
2882 | sfs.f_bavail = (long)sp->f_bavail; | |
2883 | } | |
2884 | sfs.f_files = (long)sp->f_files; | |
2885 | sfs.f_ffree = (long)sp->f_ffree; | |
2886 | sfs.f_fsid = sp->f_fsid; | |
2887 | sfs.f_owner = sp->f_owner; | |
2888 | ||
2889 | strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); | |
2890 | strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); | |
2891 | strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); | |
2892 | ||
2893 | error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); | |
2894 | } | |
55e303ae A |
2895 | break; |
2896 | default: | |
91447636 | 2897 | return (ENOTSUP); |
55e303ae | 2898 | } |
0b4e3aa0 A |
2899 | return (error); |
2900 | } | |
2901 | ||
55e303ae A |
2902 | static int filt_fsattach(struct knote *kn); |
2903 | static void filt_fsdetach(struct knote *kn); | |
2904 | static int filt_fsevent(struct knote *kn, long hint); | |
2905 | ||
2906 | struct filterops fs_filtops = | |
2907 | { 0, filt_fsattach, filt_fsdetach, filt_fsevent }; | |
2908 | ||
2909 | static int | |
2910 | filt_fsattach(struct knote *kn) | |
2911 | { | |
2912 | ||
2913 | kn->kn_flags |= EV_CLEAR; | |
2914 | KNOTE_ATTACH(&fs_klist, kn); | |
2915 | return (0); | |
2916 | } | |
2917 | ||
2918 | static void | |
2919 | filt_fsdetach(struct knote *kn) | |
2920 | { | |
2921 | ||
2922 | KNOTE_DETACH(&fs_klist, kn); | |
2923 | } | |
2924 | ||
2925 | static int | |
2926 | filt_fsevent(struct knote *kn, long hint) | |
2927 | { | |
2928 | ||
2929 | kn->kn_fflags |= hint; | |
2930 | return (kn->kn_fflags != 0); | |
2931 | } | |
2932 | ||
2933 | static int | |
2934 | sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS | |
2935 | { | |
2936 | int out, error; | |
2937 | pid_t pid; | |
2938 | size_t space; | |
2939 | struct proc *p; | |
2940 | ||
2941 | /* We need a pid. */ | |
91447636 | 2942 | if (req->newptr == USER_ADDR_NULL) |
55e303ae A |
2943 | return (EINVAL); |
2944 | ||
2945 | error = SYSCTL_IN(req, &pid, sizeof(pid)); | |
2946 | if (error) | |
2947 | return (error); | |
2948 | ||
2949 | p = pfind(pid < 0 ? -pid : pid); | |
2950 | if (p == NULL) | |
2951 | return (ESRCH); | |
2952 | ||
2953 | /* | |
2954 | * Fetching the value is ok, but we only fetch if the old | |
2955 | * pointer is given. | |
2956 | */ | |
91447636 | 2957 | if (req->oldptr != USER_ADDR_NULL) { |
55e303ae A |
2958 | out = !((p->p_flag & P_NOREMOTEHANG) == 0); |
2959 | error = SYSCTL_OUT(req, &out, sizeof(out)); | |
2960 | return (error); | |
2961 | } | |
2962 | ||
91447636 | 2963 | /* XXX req->p->p_ucred -> kauth_cred_get() ??? */ |
55e303ae A |
2964 | /* cansignal offers us enough security. */ |
2965 | if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0) | |
2966 | return (EPERM); | |
2967 | ||
2968 | if (pid < 0) | |
2969 | p->p_flag &= ~P_NOREMOTEHANG; | |
2970 | else | |
2971 | p->p_flag |= P_NOREMOTEHANG; | |
2972 | ||
2973 | return (0); | |
2974 | } | |
2975 | /* the vfs.generic. branch. */ | |
2976 | SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge"); | |
2977 | /* retreive a list of mounted filesystem fsid_t */ | |
2978 | SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD, | |
2979 | 0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids"); | |
2980 | /* perform operations on filesystem via fsid_t */ | |
2981 | SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW, | |
2982 | sysctl_vfs_ctlbyfsid, "ctlbyfsid"); | |
2983 | SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW, | |
2984 | 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang"); | |
91447636 A |
2985 | |
2986 | ||
2987 | int num_reusedvnodes=0; | |
55e303ae | 2988 | |
91447636 A |
2989 | static int |
2990 | new_vnode(vnode_t *vpp) | |
2991 | { | |
2992 | vnode_t vp; | |
2993 | int retries = 0; /* retry incase of tablefull */ | |
2994 | int vpid; | |
2995 | struct timespec ts; | |
2996 | ||
2997 | retry: | |
2998 | vnode_list_lock(); | |
2999 | ||
3000 | if ( !TAILQ_EMPTY(&vnode_free_list)) { | |
3001 | /* | |
3002 | * Pick the first vp for possible reuse | |
3003 | */ | |
3004 | vp = TAILQ_FIRST(&vnode_free_list); | |
3005 | ||
3006 | if (vp->v_lflag & VL_DEAD) | |
3007 | goto steal_this_vp; | |
3008 | } else | |
3009 | vp = NULL; | |
3010 | ||
3011 | /* | |
3012 | * we're either empty, or the next guy on the | |
3013 | * list is a valid vnode... if we're under the | |
3014 | * limit, we'll create a new vnode | |
3015 | */ | |
3016 | if (numvnodes < desiredvnodes) { | |
3017 | numvnodes++; | |
3018 | vnode_list_unlock(); | |
3019 | MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK); | |
3020 | bzero((char *)vp, sizeof *vp); | |
3021 | VLISTNONE(vp); /* avoid double queue removal */ | |
3022 | lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr); | |
3023 | ||
3024 | nanouptime(&ts); | |
3025 | vp->v_id = ts.tv_nsec; | |
3026 | vp->v_flag = VSTANDARD; | |
3027 | ||
3028 | goto done; | |
3029 | } | |
3030 | if (vp == NULL) { | |
3031 | /* | |
3032 | * we've reached the system imposed maximum number of vnodes | |
3033 | * but there isn't a single one available | |
3034 | * wait a bit and then retry... if we can't get a vnode | |
3035 | * after 100 retries, than log a complaint | |
3036 | */ | |
3037 | if (++retries <= 100) { | |
3038 | vnode_list_unlock(); | |
3039 | IOSleep(1); | |
3040 | goto retry; | |
3041 | } | |
3042 | ||
3043 | vnode_list_unlock(); | |
3044 | tablefull("vnode"); | |
3045 | log(LOG_EMERG, "%d desired, %d numvnodes, " | |
3046 | "%d free, %d inactive\n", | |
3047 | desiredvnodes, numvnodes, freevnodes, inactivevnodes); | |
3048 | *vpp = 0; | |
3049 | return (ENFILE); | |
3050 | } | |
3051 | steal_this_vp: | |
3052 | vpid = vp->v_id; | |
3053 | ||
3054 | VREMFREE("new_vnode", vp); | |
3055 | VLISTNONE(vp); | |
3056 | ||
3057 | vnode_list_unlock(); | |
3058 | vnode_lock(vp); | |
3059 | ||
3060 | /* | |
3061 | * We could wait for the vnode_lock after removing the vp from the freelist | |
3062 | * and the vid is bumped only at the very end of reclaim. So it is possible | |
3063 | * that we are looking at a vnode that is being terminated. If so skip it. | |
3064 | */ | |
3065 | if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || | |
3066 | VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) { | |
3067 | /* | |
3068 | * we lost the race between dropping the list lock | |
3069 | * and picking up the vnode_lock... someone else | |
3070 | * used this vnode and it is now in a new state | |
3071 | * so we need to go back and try again | |
3072 | */ | |
3073 | vnode_unlock(vp); | |
3074 | goto retry; | |
3075 | } | |
3076 | if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) { | |
3077 | /* | |
3078 | * we did a vnode_rele_ext that asked for | |
3079 | * us not to reenter the filesystem during | |
3080 | * the release even though VL_NEEDINACTIVE was | |
3081 | * set... we'll do it here by doing a | |
3082 | * vnode_get/vnode_put | |
3083 | * | |
3084 | * pick up an iocount so that we can call | |
3085 | * vnode_put and drive the VNOP_INACTIVE... | |
3086 | * vnode_put will either leave us off | |
3087 | * the freelist if a new ref comes in, | |
3088 | * or put us back on the end of the freelist | |
3089 | * or recycle us if we were marked for termination... | |
3090 | * so we'll just go grab a new candidate | |
3091 | */ | |
3092 | vp->v_iocount++; | |
3093 | #ifdef JOE_DEBUG | |
3094 | record_vp(vp, 1); | |
3095 | #endif | |
3096 | vnode_put_locked(vp); | |
3097 | vnode_unlock(vp); | |
3098 | goto retry; | |
3099 | } | |
3100 | OSAddAtomic(1, &num_reusedvnodes); | |
3101 | ||
3102 | /* Checks for anyone racing us for recycle */ | |
3103 | if (vp->v_type != VBAD) { | |
3104 | if (vp->v_lflag & VL_DEAD) | |
3105 | panic("new_vnode: the vnode is VL_DEAD but not VBAD"); | |
3106 | ||
3107 | (void)vnode_reclaim_internal(vp, 1, 1); | |
3108 | ||
3109 | if ((VONLIST(vp))) | |
3110 | panic("new_vnode: vp on list "); | |
3111 | if (vp->v_usecount || vp->v_iocount || vp->v_kusecount || | |
3112 | (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH))) | |
3113 | panic("new_vnode: free vnode still referenced\n"); | |
3114 | if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0)) | |
3115 | panic("new_vnode: vnode seems to be on mount list "); | |
3116 | if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren)) | |
3117 | panic("new_vnode: vnode still hooked into the name cache"); | |
3118 | } | |
3119 | if (vp->v_unsafefs) { | |
3120 | lck_mtx_destroy(&vp->v_unsafefs->fsnodelock, vnode_lck_grp); | |
3121 | FREE_ZONE((void *)vp->v_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS); | |
3122 | vp->v_unsafefs = (struct unsafe_fsnode *)NULL; | |
3123 | } | |
3124 | vp->v_lflag = 0; | |
3125 | vp->v_writecount = 0; | |
3126 | vp->v_references = 0; | |
3127 | vp->v_iterblkflags = 0; | |
3128 | vp->v_flag = VSTANDARD; | |
3129 | /* vbad vnodes can point to dead_mountp */ | |
3130 | vp->v_mount = 0; | |
3131 | vp->v_defer_reclaimlist = (vnode_t)0; | |
3132 | ||
3133 | vnode_unlock(vp); | |
3134 | done: | |
3135 | *vpp = vp; | |
3136 | ||
3137 | return (0); | |
3138 | } | |
3139 | ||
3140 | void | |
3141 | vnode_lock(vnode_t vp) | |
3142 | { | |
3143 | lck_mtx_lock(&vp->v_lock); | |
3144 | } | |
3145 | ||
3146 | void | |
3147 | vnode_unlock(vnode_t vp) | |
3148 | { | |
3149 | lck_mtx_unlock(&vp->v_lock); | |
3150 | } | |
3151 | ||
3152 | ||
3153 | ||
3154 | int | |
3155 | vnode_get(struct vnode *vp) | |
3156 | { | |
3157 | vnode_lock(vp); | |
3158 | ||
3159 | if ( (vp->v_iocount == 0) && (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) ) { | |
3160 | vnode_unlock(vp); | |
3161 | return(ENOENT); | |
3162 | } | |
3163 | vp->v_iocount++; | |
3164 | #ifdef JOE_DEBUG | |
3165 | record_vp(vp, 1); | |
3166 | #endif | |
3167 | vnode_unlock(vp); | |
3168 | ||
3169 | return(0); | |
3170 | } | |
3171 | ||
3172 | int | |
3173 | vnode_getwithvid(vnode_t vp, int vid) | |
3174 | { | |
3175 | return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID))); | |
3176 | } | |
3177 | ||
3178 | int | |
3179 | vnode_getwithref(vnode_t vp) | |
3180 | { | |
3181 | return(vget_internal(vp, 0, 0)); | |
3182 | } | |
3183 | ||
3184 | ||
3185 | int | |
3186 | vnode_put(vnode_t vp) | |
3187 | { | |
3188 | int retval; | |
3189 | ||
3190 | vnode_lock(vp); | |
3191 | retval = vnode_put_locked(vp); | |
3192 | vnode_unlock(vp); | |
3193 | ||
3194 | return(retval); | |
3195 | } | |
3196 | ||
3197 | int | |
3198 | vnode_put_locked(vnode_t vp) | |
3199 | { | |
3200 | struct vfs_context context; | |
3201 | ||
3202 | retry: | |
3203 | if (vp->v_iocount < 1) | |
3204 | panic("vnode_put(%x): iocount < 1", vp); | |
3205 | ||
3206 | if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { | |
3207 | vnode_dropiocount(vp, 1); | |
3208 | return(0); | |
3209 | } | |
3210 | if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) { | |
3211 | ||
3212 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
3213 | vnode_unlock(vp); | |
3214 | ||
3215 | context.vc_proc = current_proc(); | |
3216 | context.vc_ucred = kauth_cred_get(); | |
3217 | VNOP_INACTIVE(vp, &context); | |
3218 | ||
3219 | vnode_lock(vp); | |
3220 | /* | |
3221 | * because we had to drop the vnode lock before calling | |
3222 | * VNOP_INACTIVE, the state of this vnode may have changed... | |
3223 | * we may pick up both VL_MARTERM and either | |
3224 | * an iocount or a usecount while in the VNOP_INACTIVE call | |
3225 | * we don't want to call vnode_reclaim_internal on a vnode | |
3226 | * that has active references on it... so loop back around | |
3227 | * and reevaluate the state | |
3228 | */ | |
3229 | goto retry; | |
3230 | } | |
3231 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
3232 | ||
3233 | if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM) | |
3234 | vnode_reclaim_internal(vp, 1, 0); | |
3235 | ||
3236 | vnode_dropiocount(vp, 1); | |
3237 | vnode_list_add(vp); | |
3238 | ||
3239 | return(0); | |
3240 | } | |
3241 | ||
3242 | /* is vnode_t in use by others? */ | |
3243 | int | |
3244 | vnode_isinuse(vnode_t vp, int refcnt) | |
3245 | { | |
3246 | return(vnode_isinuse_locked(vp, refcnt, 0)); | |
3247 | } | |
3248 | ||
3249 | ||
3250 | static int | |
3251 | vnode_isinuse_locked(vnode_t vp, int refcnt, int locked) | |
3252 | { | |
3253 | int retval = 0; | |
3254 | ||
3255 | if (!locked) | |
3256 | vnode_lock(vp); | |
3257 | if ((vp->v_type != VREG) && (vp->v_usecount > refcnt)) { | |
3258 | retval = 1; | |
3259 | goto out; | |
3260 | } | |
3261 | if (vp->v_type == VREG) { | |
3262 | retval = ubc_isinuse_locked(vp, refcnt, 1); | |
3263 | } | |
3264 | ||
3265 | out: | |
3266 | if (!locked) | |
3267 | vnode_unlock(vp); | |
3268 | return(retval); | |
3269 | } | |
3270 | ||
3271 | ||
3272 | /* resume vnode_t */ | |
3273 | errno_t | |
3274 | vnode_resume(vnode_t vp) | |
3275 | { | |
3276 | ||
3277 | vnode_lock(vp); | |
3278 | ||
3279 | if (vp->v_owner == current_thread()) { | |
3280 | vp->v_lflag &= ~VL_SUSPENDED; | |
3281 | vp->v_owner = 0; | |
3282 | vnode_unlock(vp); | |
3283 | wakeup(&vp->v_iocount); | |
3284 | } else | |
3285 | vnode_unlock(vp); | |
3286 | ||
3287 | return(0); | |
3288 | } | |
3289 | ||
3290 | static errno_t | |
3291 | vnode_drain(vnode_t vp) | |
3292 | { | |
3293 | ||
3294 | if (vp->v_lflag & VL_DRAIN) { | |
3295 | panic("vnode_drain: recursuve drain"); | |
3296 | return(ENOENT); | |
3297 | } | |
3298 | vp->v_lflag |= VL_DRAIN; | |
3299 | vp->v_owner = current_thread(); | |
3300 | ||
3301 | while (vp->v_iocount > 1) | |
3302 | msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", 0); | |
3303 | return(0); | |
3304 | } | |
3305 | ||
3306 | ||
3307 | /* | |
3308 | * if the number of recent references via vnode_getwithvid or vnode_getwithref | |
3309 | * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from | |
3310 | * the LRU list if it's currently on it... once the iocount and usecount both drop | |
3311 | * to 0, it will get put back on the end of the list, effectively making it younger | |
3312 | * this allows us to keep actively referenced vnodes in the list without having | |
3313 | * to constantly remove and add to the list each time a vnode w/o a usecount is | |
3314 | * referenced which costs us taking and dropping a global lock twice. | |
3315 | */ | |
3316 | #define UNAGE_THRESHHOLD 10 | |
3317 | ||
3318 | errno_t | |
3319 | vnode_getiocount(vnode_t vp, int locked, int vid, int vflags) | |
3320 | { | |
3321 | int nodead = vflags & VNODE_NODEAD; | |
3322 | int nosusp = vflags & VNODE_NOSUSPEND; | |
3323 | ||
3324 | if (!locked) | |
3325 | vnode_lock(vp); | |
3326 | ||
3327 | for (;;) { | |
3328 | /* | |
3329 | * if it is a dead vnode with deadfs | |
3330 | */ | |
3331 | if (nodead && (vp->v_lflag & VL_DEAD) && ((vp->v_type == VBAD) || (vp->v_data == 0))) { | |
3332 | if (!locked) | |
3333 | vnode_unlock(vp); | |
3334 | return(ENOENT); | |
3335 | } | |
3336 | /* | |
3337 | * will return VL_DEAD ones | |
3338 | */ | |
3339 | if ((vp->v_lflag & (VL_SUSPENDED | VL_DRAIN | VL_TERMINATE)) == 0 ) { | |
3340 | break; | |
3341 | } | |
3342 | /* | |
3343 | * if suspended vnodes are to be failed | |
3344 | */ | |
3345 | if (nosusp && (vp->v_lflag & VL_SUSPENDED)) { | |
3346 | if (!locked) | |
3347 | vnode_unlock(vp); | |
3348 | return(ENOENT); | |
3349 | } | |
3350 | /* | |
3351 | * if you are the owner of drain/suspend/termination , can acquire iocount | |
3352 | * check for VL_TERMINATE; it does not set owner | |
3353 | */ | |
3354 | if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED | VL_TERMINATE)) && | |
3355 | (vp->v_owner == current_thread())) { | |
3356 | break; | |
3357 | } | |
3358 | if (vp->v_lflag & VL_TERMINATE) { | |
3359 | vp->v_lflag |= VL_TERMWANT; | |
3360 | ||
3361 | msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vnode getiocount", 0); | |
3362 | } else | |
3363 | msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", 0); | |
3364 | } | |
3365 | if (vid != vp->v_id) { | |
3366 | if (!locked) | |
3367 | vnode_unlock(vp); | |
3368 | return(ENOENT); | |
3369 | } | |
3370 | if (++vp->v_references >= UNAGE_THRESHHOLD) { | |
3371 | vp->v_references = 0; | |
3372 | vnode_list_remove(vp); | |
3373 | } | |
3374 | vp->v_iocount++; | |
3375 | #ifdef JOE_DEBUG | |
3376 | record_vp(vp, 1); | |
3377 | #endif | |
3378 | if (!locked) | |
3379 | vnode_unlock(vp); | |
3380 | return(0); | |
3381 | } | |
3382 | ||
3383 | static void | |
3384 | vnode_dropiocount (vnode_t vp, int locked) | |
3385 | { | |
3386 | if (!locked) | |
3387 | vnode_lock(vp); | |
3388 | if (vp->v_iocount < 1) | |
3389 | panic("vnode_dropiocount(%x): v_iocount < 1", vp); | |
3390 | ||
3391 | vp->v_iocount--; | |
3392 | #ifdef JOE_DEBUG | |
3393 | record_vp(vp, -1); | |
3394 | #endif | |
3395 | if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1)) | |
3396 | wakeup(&vp->v_iocount); | |
3397 | ||
3398 | if (!locked) | |
3399 | vnode_unlock(vp); | |
3400 | } | |
3401 | ||
3402 | ||
3403 | void | |
3404 | vnode_reclaim(struct vnode * vp) | |
3405 | { | |
3406 | vnode_reclaim_internal(vp, 0, 0); | |
3407 | } | |
3408 | ||
3409 | __private_extern__ | |
3410 | void | |
3411 | vnode_reclaim_internal(struct vnode * vp, int locked, int reuse) | |
3412 | { | |
3413 | int isfifo = 0; | |
3414 | ||
3415 | if (!locked) | |
3416 | vnode_lock(vp); | |
3417 | ||
3418 | if (vp->v_lflag & VL_TERMINATE) { | |
3419 | panic("vnode reclaim in progress"); | |
3420 | } | |
3421 | vp->v_lflag |= VL_TERMINATE; | |
3422 | ||
3423 | if (vnode_drain(vp)) { | |
3424 | panic("vnode drain failed"); | |
3425 | vnode_unlock(vp); | |
3426 | return; | |
3427 | } | |
3428 | isfifo = (vp->v_type == VFIFO); | |
3429 | ||
3430 | if (vp->v_type != VBAD) | |
3431 | vgone(vp); /* clean and reclaim the vnode */ | |
3432 | ||
3433 | /* | |
3434 | * give the vnode a new identity so | |
3435 | * that vnode_getwithvid will fail | |
3436 | * on any stale cache accesses | |
3437 | */ | |
3438 | vp->v_id++; | |
3439 | if (isfifo) { | |
3440 | struct fifoinfo * fip; | |
3441 | ||
3442 | fip = vp->v_fifoinfo; | |
3443 | vp->v_fifoinfo = NULL; | |
3444 | FREE(fip, M_TEMP); | |
3445 | } | |
3446 | ||
3447 | vp->v_type = VBAD; | |
3448 | ||
3449 | if (vp->v_data) | |
3450 | panic("vnode_reclaim_internal: cleaned vnode isn't"); | |
3451 | if (vp->v_numoutput) | |
3452 | panic("vnode_reclaim_internal: Clean vnode has pending I/O's"); | |
3453 | if (UBCINFOEXISTS(vp)) | |
3454 | panic("vnode_reclaim_internal: ubcinfo not cleaned"); | |
3455 | if (vp->v_parent) | |
3456 | panic("vnode_reclaim_internal: vparent not removed"); | |
3457 | if (vp->v_name) | |
3458 | panic("vnode_reclaim_internal: vname not removed"); | |
3459 | ||
3460 | vp->v_socket = 0; | |
3461 | ||
3462 | vp->v_lflag &= ~VL_TERMINATE; | |
3463 | vp->v_lflag &= ~VL_DRAIN; | |
3464 | vp->v_owner = 0; | |
3465 | ||
3466 | if (vp->v_lflag & VL_TERMWANT) { | |
3467 | vp->v_lflag &= ~VL_TERMWANT; | |
3468 | wakeup(&vp->v_lflag); | |
3469 | } | |
3470 | if (!reuse && vp->v_usecount == 0) | |
3471 | vnode_list_add(vp); | |
3472 | if (!locked) | |
3473 | vnode_unlock(vp); | |
3474 | } | |
3475 | ||
3476 | /* USAGE: | |
3477 | * The following api creates a vnode and associates all the parameter specified in vnode_fsparam | |
3478 | * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias | |
3479 | * is obsoleted by this. | |
3480 | * vnode_create(int flavor, size_t size, void * param, vnode_t *vp) | |
3481 | */ | |
3482 | int | |
3483 | vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) | |
3484 | { | |
3485 | int error; | |
3486 | int insert = 1; | |
3487 | vnode_t vp; | |
3488 | vnode_t nvp; | |
3489 | vnode_t dvp; | |
3490 | struct componentname *cnp; | |
3491 | struct vnode_fsparam *param = (struct vnode_fsparam *)data; | |
3492 | ||
3493 | if (flavor == VNCREATE_FLAVOR && (size == VCREATESIZE) && param) { | |
3494 | if ( (error = new_vnode(&vp)) ) { | |
3495 | return(error); | |
3496 | } else { | |
3497 | dvp = param->vnfs_dvp; | |
3498 | cnp = param->vnfs_cnp; | |
3499 | ||
3500 | vp->v_op = param->vnfs_vops; | |
3501 | vp->v_type = param->vnfs_vtype; | |
3502 | vp->v_data = param->vnfs_fsnode; | |
3503 | vp->v_iocount = 1; | |
3504 | ||
3505 | if (param->vnfs_markroot) | |
3506 | vp->v_flag |= VROOT; | |
3507 | if (param->vnfs_marksystem) | |
3508 | vp->v_flag |= VSYSTEM; | |
3509 | else if (vp->v_type == VREG) { | |
3510 | /* | |
3511 | * only non SYSTEM vp | |
3512 | */ | |
3513 | error = ubc_info_init_withsize(vp, param->vnfs_filesize); | |
3514 | if (error) { | |
3515 | #ifdef JOE_DEBUG | |
3516 | record_vp(vp, 1); | |
3517 | #endif | |
3518 | vp->v_mount = 0; | |
3519 | vp->v_op = dead_vnodeop_p; | |
3520 | vp->v_tag = VT_NON; | |
3521 | vp->v_data = NULL; | |
3522 | vp->v_type = VBAD; | |
3523 | vp->v_lflag |= VL_DEAD; | |
3524 | ||
3525 | vnode_put(vp); | |
3526 | return(error); | |
3527 | } | |
3528 | } | |
3529 | #ifdef JOE_DEBUG | |
3530 | record_vp(vp, 1); | |
3531 | #endif | |
3532 | if (vp->v_type == VCHR || vp->v_type == VBLK) { | |
3533 | ||
3534 | if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) { | |
3535 | /* | |
3536 | * if checkalias returns a vnode, it will be locked | |
3537 | * | |
3538 | * first get rid of the unneeded vnode we acquired | |
3539 | */ | |
3540 | vp->v_data = NULL; | |
3541 | vp->v_op = spec_vnodeop_p; | |
3542 | vp->v_type = VBAD; | |
3543 | vp->v_lflag = VL_DEAD; | |
3544 | vp->v_data = NULL; | |
3545 | vp->v_tag = VT_NON; | |
3546 | vnode_put(vp); | |
3547 | ||
3548 | /* | |
3549 | * switch to aliased vnode and finish | |
3550 | * preparing it | |
3551 | */ | |
3552 | vp = nvp; | |
3553 | ||
3554 | vclean(vp, 0, current_proc()); | |
3555 | vp->v_op = param->vnfs_vops; | |
3556 | vp->v_type = param->vnfs_vtype; | |
3557 | vp->v_data = param->vnfs_fsnode; | |
3558 | vp->v_lflag = 0; | |
3559 | vp->v_mount = NULL; | |
3560 | insmntque(vp, param->vnfs_mp); | |
3561 | insert = 0; | |
3562 | vnode_unlock(vp); | |
3563 | } | |
3564 | } | |
3565 | ||
3566 | if (vp->v_type == VFIFO) { | |
3567 | struct fifoinfo *fip; | |
3568 | ||
3569 | MALLOC(fip, struct fifoinfo *, | |
3570 | sizeof(*fip), M_TEMP, M_WAITOK); | |
3571 | bzero(fip, sizeof(struct fifoinfo )); | |
3572 | vp->v_fifoinfo = fip; | |
3573 | } | |
3574 | /* The file systems usually pass the address of the location where | |
3575 | * where there store the vnode pointer. When we add the vnode in mount | |
3576 | * point and name cache they are discoverable. So the file system node | |
3577 | * will have the connection to vnode setup by then | |
3578 | */ | |
3579 | *vpp = vp; | |
3580 | ||
3581 | if (param->vnfs_mp) { | |
3582 | if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL) | |
3583 | vp->v_flag |= VLOCKLOCAL; | |
3584 | if (insert) { | |
3585 | /* | |
3586 | * enter in mount vnode list | |
3587 | */ | |
3588 | insmntque(vp, param->vnfs_mp); | |
3589 | } | |
3590 | #ifdef INTERIM_FSNODE_LOCK | |
3591 | if (param->vnfs_mp->mnt_vtable->vfc_threadsafe == 0) { | |
3592 | MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *, | |
3593 | sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK); | |
3594 | vp->v_unsafefs->fsnode_count = 0; | |
3595 | vp->v_unsafefs->fsnodeowner = (void *)NULL; | |
3596 | lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr); | |
3597 | } | |
3598 | #endif /* INTERIM_FSNODE_LOCK */ | |
3599 | } | |
3600 | if (dvp && vnode_ref(dvp) == 0) { | |
3601 | vp->v_parent = dvp; | |
3602 | } | |
3603 | if (cnp) { | |
3604 | if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) { | |
3605 | /* | |
3606 | * enter into name cache | |
3607 | * we've got the info to enter it into the name cache now | |
3608 | */ | |
3609 | cache_enter(dvp, vp, cnp); | |
3610 | } | |
3611 | vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); | |
3612 | } | |
3613 | if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) { | |
3614 | /* | |
3615 | * this vnode is being created as cacheable in the name cache | |
3616 | * this allows us to re-enter it in the cache | |
3617 | */ | |
3618 | vp->v_flag |= VNCACHEABLE; | |
3619 | } | |
3620 | if ((vp->v_flag & VSYSTEM) && (vp->v_type != VREG)) | |
3621 | panic("incorrect vnode setup"); | |
3622 | ||
3623 | return(0); | |
3624 | } | |
3625 | } | |
3626 | return (EINVAL); | |
3627 | } | |
3628 | ||
3629 | int | |
3630 | vnode_addfsref(vnode_t vp) | |
3631 | { | |
3632 | vnode_lock(vp); | |
3633 | if (vp->v_lflag & VNAMED_FSHASH) | |
3634 | panic("add_fsref: vp already has named reference"); | |
3635 | if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) | |
3636 | panic("addfsref: vp on the free list\n"); | |
3637 | vp->v_lflag |= VNAMED_FSHASH; | |
3638 | vnode_unlock(vp); | |
3639 | return(0); | |
3640 | ||
3641 | } | |
3642 | int | |
3643 | vnode_removefsref(vnode_t vp) | |
3644 | { | |
3645 | vnode_lock(vp); | |
3646 | if ((vp->v_lflag & VNAMED_FSHASH) == 0) | |
3647 | panic("remove_fsref: no named reference"); | |
3648 | vp->v_lflag &= ~VNAMED_FSHASH; | |
3649 | vnode_unlock(vp); | |
3650 | return(0); | |
3651 | ||
3652 | } | |
3653 | ||
3654 | ||
3655 | int | |
3656 | vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg) | |
3657 | { | |
3658 | mount_t mp; | |
3659 | int ret = 0; | |
3660 | fsid_t * fsid_list; | |
3661 | int count, actualcount, i; | |
3662 | void * allocmem; | |
3663 | ||
3664 | count = mount_getvfscnt(); | |
3665 | count += 10; | |
3666 | ||
3667 | fsid_list = (fsid_t *)kalloc(count * sizeof(fsid_t)); | |
3668 | allocmem = (void *)fsid_list; | |
3669 | ||
3670 | actualcount = mount_fillfsids(fsid_list, count); | |
3671 | ||
3672 | for (i=0; i< actualcount; i++) { | |
3673 | ||
3674 | /* obtain the mount point with iteration reference */ | |
3675 | mp = mount_list_lookupby_fsid(&fsid_list[i], 0, 1); | |
3676 | ||
3677 | if(mp == (struct mount *)0) | |
3678 | continue; | |
3679 | mount_lock(mp); | |
3680 | if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { | |
3681 | mount_unlock(mp); | |
3682 | mount_iterdrop(mp); | |
3683 | continue; | |
3684 | ||
3685 | } | |
3686 | mount_unlock(mp); | |
3687 | ||
3688 | /* iterate over all the vnodes */ | |
3689 | ret = callout(mp, arg); | |
3690 | ||
3691 | mount_iterdrop(mp); | |
3692 | ||
3693 | switch (ret) { | |
3694 | case VFS_RETURNED: | |
3695 | case VFS_RETURNED_DONE: | |
3696 | if (ret == VFS_RETURNED_DONE) { | |
3697 | ret = 0; | |
3698 | goto out; | |
3699 | } | |
3700 | break; | |
3701 | ||
3702 | case VFS_CLAIMED_DONE: | |
3703 | ret = 0; | |
3704 | goto out; | |
3705 | case VFS_CLAIMED: | |
3706 | default: | |
3707 | break; | |
3708 | } | |
3709 | ret = 0; | |
3710 | } | |
3711 | ||
3712 | out: | |
3713 | kfree(allocmem, (count * sizeof(fsid_t))); | |
3714 | return (ret); | |
3715 | } | |
3716 | ||
3717 | /* | |
3718 | * Update the vfsstatfs structure in the mountpoint. | |
3719 | */ | |
3720 | int | |
3721 | vfs_update_vfsstat(mount_t mp, vfs_context_t ctx) | |
3722 | { | |
3723 | struct vfs_attr va; | |
3724 | int error; | |
3725 | ||
3726 | /* | |
3727 | * Request the attributes we want to propagate into | |
3728 | * the per-mount vfsstat structure. | |
3729 | */ | |
3730 | VFSATTR_INIT(&va); | |
3731 | VFSATTR_WANTED(&va, f_iosize); | |
3732 | VFSATTR_WANTED(&va, f_blocks); | |
3733 | VFSATTR_WANTED(&va, f_bfree); | |
3734 | VFSATTR_WANTED(&va, f_bavail); | |
3735 | VFSATTR_WANTED(&va, f_bused); | |
3736 | VFSATTR_WANTED(&va, f_files); | |
3737 | VFSATTR_WANTED(&va, f_ffree); | |
3738 | VFSATTR_WANTED(&va, f_bsize); | |
3739 | VFSATTR_WANTED(&va, f_fssubtype); | |
3740 | if ((error = vfs_getattr(mp, &va, ctx)) != 0) { | |
3741 | KAUTH_DEBUG("STAT - filesystem returned error %d", error); | |
3742 | return(error); | |
3743 | } | |
3744 | ||
3745 | /* | |
3746 | * Unpack into the per-mount structure. | |
3747 | * | |
3748 | * We only overwrite these fields, which are likely to change: | |
3749 | * f_blocks | |
3750 | * f_bfree | |
3751 | * f_bavail | |
3752 | * f_bused | |
3753 | * f_files | |
3754 | * f_ffree | |
3755 | * | |
3756 | * And these which are not, but which the FS has no other way | |
3757 | * of providing to us: | |
3758 | * f_bsize | |
3759 | * f_iosize | |
3760 | * f_fssubtype | |
3761 | * | |
3762 | */ | |
3763 | if (VFSATTR_IS_SUPPORTED(&va, f_bsize)) { | |
3764 | mp->mnt_vfsstat.f_bsize = va.f_bsize; | |
3765 | } else { | |
3766 | mp->mnt_vfsstat.f_bsize = mp->mnt_devblocksize; /* default from the device block size */ | |
3767 | } | |
3768 | if (VFSATTR_IS_SUPPORTED(&va, f_iosize)) { | |
3769 | mp->mnt_vfsstat.f_iosize = va.f_iosize; | |
3770 | } else { | |
3771 | mp->mnt_vfsstat.f_iosize = 1024 * 1024; /* 1MB sensible I/O size */ | |
3772 | } | |
3773 | if (VFSATTR_IS_SUPPORTED(&va, f_blocks)) | |
3774 | mp->mnt_vfsstat.f_blocks = va.f_blocks; | |
3775 | if (VFSATTR_IS_SUPPORTED(&va, f_bfree)) | |
3776 | mp->mnt_vfsstat.f_bfree = va.f_bfree; | |
3777 | if (VFSATTR_IS_SUPPORTED(&va, f_bavail)) | |
3778 | mp->mnt_vfsstat.f_bavail = va.f_bavail; | |
3779 | if (VFSATTR_IS_SUPPORTED(&va, f_bused)) | |
3780 | mp->mnt_vfsstat.f_bused = va.f_bused; | |
3781 | if (VFSATTR_IS_SUPPORTED(&va, f_files)) | |
3782 | mp->mnt_vfsstat.f_files = va.f_files; | |
3783 | if (VFSATTR_IS_SUPPORTED(&va, f_ffree)) | |
3784 | mp->mnt_vfsstat.f_ffree = va.f_ffree; | |
3785 | ||
3786 | /* this is unlikely to change, but has to be queried for */ | |
3787 | if (VFSATTR_IS_SUPPORTED(&va, f_fssubtype)) | |
3788 | mp->mnt_vfsstat.f_fssubtype = va.f_fssubtype; | |
3789 | ||
3790 | return(0); | |
3791 | } | |
3792 | ||
3793 | void | |
3794 | mount_list_add(mount_t mp) | |
3795 | { | |
3796 | mount_list_lock(); | |
3797 | TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); | |
3798 | nummounts++; | |
3799 | mount_list_unlock(); | |
3800 | } | |
3801 | ||
3802 | void | |
3803 | mount_list_remove(mount_t mp) | |
3804 | { | |
3805 | mount_list_lock(); | |
3806 | TAILQ_REMOVE(&mountlist, mp, mnt_list); | |
3807 | nummounts--; | |
3808 | mp->mnt_list.tqe_next = 0; | |
3809 | mp->mnt_list.tqe_prev = 0; | |
3810 | mount_list_unlock(); | |
3811 | } | |
3812 | ||
3813 | mount_t | |
3814 | mount_lookupby_volfsid(int volfs_id, int withref) | |
3815 | { | |
3816 | mount_t cur_mount = (mount_t)0; | |
3817 | mount_t mp ; | |
3818 | ||
3819 | mount_list_lock(); | |
3820 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
3821 | if (validfsnode(mp) && mp->mnt_vfsstat.f_fsid.val[0] == volfs_id) { | |
3822 | cur_mount = mp; | |
3823 | if (withref) { | |
3824 | if (mount_iterref(cur_mount, 1)) { | |
3825 | cur_mount = (mount_t)0; | |
3826 | mount_list_unlock(); | |
3827 | goto out; | |
3828 | } | |
3829 | } | |
3830 | break; | |
3831 | } | |
3832 | } | |
3833 | mount_list_unlock(); | |
3834 | if (withref && (cur_mount != (mount_t)0)) { | |
3835 | mp = cur_mount; | |
3836 | if (vfs_busy(mp, LK_NOWAIT) != 0) { | |
3837 | cur_mount = (mount_t)0; | |
3838 | } | |
3839 | mount_iterdrop(mp); | |
3840 | } | |
3841 | out: | |
3842 | return(cur_mount); | |
3843 | } | |
3844 | ||
3845 | ||
3846 | mount_t | |
3847 | mount_list_lookupby_fsid(fsid, locked, withref) | |
3848 | fsid_t *fsid; | |
3849 | int locked; | |
3850 | int withref; | |
3851 | { | |
3852 | mount_t retmp = (mount_t)0; | |
3853 | mount_t mp; | |
3854 | ||
3855 | if (!locked) | |
3856 | mount_list_lock(); | |
3857 | TAILQ_FOREACH(mp, &mountlist, mnt_list) | |
3858 | if (mp->mnt_vfsstat.f_fsid.val[0] == fsid->val[0] && | |
3859 | mp->mnt_vfsstat.f_fsid.val[1] == fsid->val[1]) { | |
3860 | retmp = mp; | |
3861 | if (withref) { | |
3862 | if (mount_iterref(retmp, 1)) | |
3863 | retmp = (mount_t)0; | |
3864 | } | |
3865 | goto out; | |
3866 | } | |
3867 | out: | |
3868 | if (!locked) | |
3869 | mount_list_unlock(); | |
3870 | return (retmp); | |
3871 | } | |
3872 | ||
3873 | errno_t | |
3874 | vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t context) | |
3875 | { | |
3876 | struct nameidata nd; | |
3877 | int error; | |
3878 | struct vfs_context context2; | |
3879 | vfs_context_t ctx = context; | |
3880 | u_long ndflags = 0; | |
3881 | ||
3882 | if (context == NULL) { /* XXX technically an error */ | |
3883 | context2.vc_proc = current_proc(); | |
3884 | context2.vc_ucred = kauth_cred_get(); | |
3885 | ctx = &context2; | |
3886 | } | |
3887 | ||
3888 | if (flags & VNODE_LOOKUP_NOFOLLOW) | |
3889 | ndflags = NOFOLLOW; | |
3890 | else | |
3891 | ndflags = FOLLOW; | |
3892 | ||
3893 | if (flags & VNODE_LOOKUP_NOCROSSMOUNT) | |
3894 | ndflags |= NOCROSSMOUNT; | |
3895 | if (flags & VNODE_LOOKUP_DOWHITEOUT) | |
3896 | ndflags |= DOWHITEOUT; | |
3897 | ||
3898 | /* XXX AUDITVNPATH1 needed ? */ | |
3899 | NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); | |
3900 | ||
3901 | if ((error = namei(&nd))) | |
3902 | return (error); | |
3903 | *vpp = nd.ni_vp; | |
3904 | nameidone(&nd); | |
3905 | ||
3906 | return (0); | |
3907 | } | |
3908 | ||
3909 | errno_t | |
3910 | vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_context_t context) | |
3911 | { | |
3912 | struct nameidata nd; | |
3913 | int error; | |
3914 | struct vfs_context context2; | |
3915 | vfs_context_t ctx = context; | |
3916 | u_long ndflags = 0; | |
3a60a9f5 | 3917 | int lflags = flags; |
91447636 A |
3918 | |
3919 | if (context == NULL) { /* XXX technically an error */ | |
3920 | context2.vc_proc = current_proc(); | |
3921 | context2.vc_ucred = kauth_cred_get(); | |
3922 | ctx = &context2; | |
3923 | } | |
3924 | ||
3a60a9f5 A |
3925 | if (fmode & O_NOFOLLOW) |
3926 | lflags |= VNODE_LOOKUP_NOFOLLOW; | |
3927 | ||
3928 | if (lflags & VNODE_LOOKUP_NOFOLLOW) | |
91447636 A |
3929 | ndflags = NOFOLLOW; |
3930 | else | |
3931 | ndflags = FOLLOW; | |
3932 | ||
3a60a9f5 | 3933 | if (lflags & VNODE_LOOKUP_NOCROSSMOUNT) |
91447636 | 3934 | ndflags |= NOCROSSMOUNT; |
3a60a9f5 | 3935 | if (lflags & VNODE_LOOKUP_DOWHITEOUT) |
91447636 A |
3936 | ndflags |= DOWHITEOUT; |
3937 | ||
3938 | /* XXX AUDITVNPATH1 needed ? */ | |
3939 | NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); | |
3940 | ||
3941 | if ((error = vn_open(&nd, fmode, cmode))) | |
3942 | *vpp = NULL; | |
3943 | else | |
3944 | *vpp = nd.ni_vp; | |
3945 | ||
3946 | return (error); | |
3947 | } | |
3948 | ||
3949 | errno_t | |
3950 | vnode_close(vnode_t vp, int flags, vfs_context_t context) | |
3951 | { | |
3952 | kauth_cred_t cred; | |
3953 | struct proc *p; | |
3954 | int error; | |
3955 | ||
3956 | if (context) { | |
3957 | p = context->vc_proc; | |
3958 | cred = context->vc_ucred; | |
3959 | } else { | |
3960 | p = current_proc(); | |
3961 | cred = kauth_cred_get(); | |
3962 | } | |
3963 | ||
3964 | error = vn_close(vp, flags, cred, p); | |
3965 | vnode_put(vp); | |
3966 | return (error); | |
3967 | } | |
3968 | ||
3969 | errno_t | |
3970 | vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) | |
3971 | { | |
3972 | struct vnode_attr va; | |
3973 | int error; | |
3974 | ||
3975 | VATTR_INIT(&va); | |
3976 | VATTR_WANTED(&va, va_data_size); | |
3977 | error = vnode_getattr(vp, &va, ctx); | |
3978 | if (!error) | |
3979 | *sizep = va.va_data_size; | |
3980 | return(error); | |
3981 | } | |
3982 | ||
3983 | errno_t | |
3984 | vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx) | |
3985 | { | |
3986 | struct vnode_attr va; | |
3987 | ||
3988 | VATTR_INIT(&va); | |
3989 | VATTR_SET(&va, va_data_size, size); | |
3990 | va.va_vaflags = ioflag & 0xffff; | |
3991 | return(vnode_setattr(vp, &va, ctx)); | |
3992 | } | |
3993 | ||
3994 | errno_t | |
3995 | vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx) | |
3996 | { | |
3997 | kauth_acl_t oacl, nacl; | |
3998 | int initial_acl; | |
3999 | errno_t error; | |
4000 | vnode_t vp = (vnode_t)0; | |
4001 | ||
4002 | error = 0; | |
4003 | oacl = nacl = NULL; | |
4004 | initial_acl = 0; | |
4005 | ||
4006 | KAUTH_DEBUG("%p CREATE - '%s'", dvp, cnp->cn_nameptr); | |
4007 | ||
4008 | /* | |
4009 | * Handle ACL inheritance. | |
4010 | */ | |
4011 | if (!(flags & VN_CREATE_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) { | |
4012 | /* save the original filesec */ | |
4013 | if (VATTR_IS_ACTIVE(vap, va_acl)) { | |
4014 | initial_acl = 1; | |
4015 | oacl = vap->va_acl; | |
4016 | } | |
4017 | ||
4018 | vap->va_acl = NULL; | |
4019 | if ((error = kauth_acl_inherit(dvp, | |
4020 | oacl, | |
4021 | &nacl, | |
4022 | vap->va_type == VDIR, | |
4023 | ctx)) != 0) { | |
4024 | KAUTH_DEBUG("%p CREATE - error %d processing inheritance", dvp, error); | |
4025 | return(error); | |
4026 | } | |
4027 | ||
4028 | /* | |
4029 | * If the generated ACL is NULL, then we can save ourselves some effort | |
4030 | * by clearing the active bit. | |
4031 | */ | |
4032 | if (nacl == NULL) { | |
4033 | VATTR_CLEAR_ACTIVE(vap, va_acl); | |
4034 | } else { | |
4035 | VATTR_SET(vap, va_acl, nacl); | |
4036 | } | |
4037 | } | |
4038 | ||
4039 | /* | |
4040 | * Check and default new attributes. | |
4041 | * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller | |
4042 | * hasn't supplied them. | |
4043 | */ | |
4044 | if ((error = vnode_authattr_new(dvp, vap, flags & VN_CREATE_NOAUTH, ctx)) != 0) { | |
4045 | KAUTH_DEBUG("%p CREATE - error %d handing/defaulting attributes", dvp, error); | |
4046 | goto out; | |
4047 | } | |
4048 | ||
4049 | ||
4050 | /* | |
4051 | * Create the requested node. | |
4052 | */ | |
4053 | switch(vap->va_type) { | |
4054 | case VREG: | |
4055 | error = VNOP_CREATE(dvp, vpp, cnp, vap, ctx); | |
4056 | break; | |
4057 | case VDIR: | |
4058 | error = VNOP_MKDIR(dvp, vpp, cnp, vap, ctx); | |
4059 | break; | |
4060 | case VSOCK: | |
4061 | case VFIFO: | |
4062 | case VBLK: | |
4063 | case VCHR: | |
4064 | error = VNOP_MKNOD(dvp, vpp, cnp, vap, ctx); | |
4065 | break; | |
4066 | default: | |
4067 | panic("vnode_create: unknown vtype %d", vap->va_type); | |
4068 | } | |
4069 | if (error != 0) { | |
4070 | KAUTH_DEBUG("%p CREATE - error %d returned by filesystem", dvp, error); | |
4071 | goto out; | |
4072 | } | |
4073 | ||
4074 | vp = *vpp; | |
4075 | /* | |
4076 | * If some of the requested attributes weren't handled by the VNOP, | |
4077 | * use our fallback code. | |
4078 | */ | |
4079 | if (!VATTR_ALL_SUPPORTED(vap) && *vpp) { | |
4080 | KAUTH_DEBUG(" CREATE - doing fallback with ACL %p", vap->va_acl); | |
4081 | error = vnode_setattr_fallback(*vpp, vap, ctx); | |
4082 | } | |
4083 | if ((error != 0 ) && (vp != (vnode_t)0)) { | |
4084 | *vpp = (vnode_t) 0; | |
4085 | vnode_put(vp); | |
4086 | } | |
4087 | ||
4088 | out: | |
4089 | /* | |
4090 | * If the caller supplied a filesec in vap, it has been replaced | |
4091 | * now by the post-inheritance copy. We need to put the original back | |
4092 | * and free the inherited product. | |
4093 | */ | |
4094 | if (initial_acl) { | |
4095 | VATTR_SET(vap, va_acl, oacl); | |
4096 | } else { | |
4097 | VATTR_CLEAR_ACTIVE(vap, va_acl); | |
4098 | } | |
4099 | if (nacl != NULL) | |
4100 | kauth_acl_free(nacl); | |
4101 | ||
4102 | return(error); | |
4103 | } | |
4104 | ||
4105 | static kauth_scope_t vnode_scope; | |
4106 | static int vnode_authorize_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action, | |
4107 | uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3); | |
4108 | ||
4109 | typedef struct _vnode_authorize_context { | |
4110 | vnode_t vp; | |
4111 | struct vnode_attr *vap; | |
4112 | vnode_t dvp; | |
4113 | struct vnode_attr *dvap; | |
4114 | vfs_context_t ctx; | |
4115 | int flags; | |
4116 | int flags_valid; | |
4117 | #define _VAC_IS_OWNER (1<<0) | |
4118 | #define _VAC_IN_GROUP (1<<1) | |
4119 | #define _VAC_IS_DIR_OWNER (1<<2) | |
4120 | #define _VAC_IN_DIR_GROUP (1<<3) | |
4121 | } *vauth_ctx; | |
4122 | ||
4123 | void | |
4124 | vnode_authorize_init(void) | |
4125 | { | |
4126 | vnode_scope = kauth_register_scope(KAUTH_SCOPE_VNODE, vnode_authorize_callback, NULL); | |
4127 | } | |
4128 | ||
4129 | /* | |
4130 | * Authorize an operation on a vnode. | |
4131 | * | |
4132 | * This is KPI, but here because it needs vnode_scope. | |
4133 | */ | |
4134 | int | |
4135 | vnode_authorize(vnode_t vp, vnode_t dvp, kauth_action_t action, vfs_context_t context) | |
4136 | { | |
4137 | int error, result; | |
4138 | ||
4139 | /* | |
4140 | * We can't authorize against a dead vnode; allow all operations through so that | |
4141 | * the correct error can be returned. | |
4142 | */ | |
4143 | if (vp->v_type == VBAD) | |
4144 | return(0); | |
4145 | ||
4146 | error = 0; | |
4147 | result = kauth_authorize_action(vnode_scope, vfs_context_ucred(context), action, | |
4148 | (uintptr_t)context, (uintptr_t)vp, (uintptr_t)dvp, (uintptr_t)&error); | |
4149 | if (result == EPERM) /* traditional behaviour */ | |
4150 | result = EACCES; | |
4151 | /* did the lower layers give a better error return? */ | |
4152 | if ((result != 0) && (error != 0)) | |
4153 | return(error); | |
4154 | return(result); | |
4155 | } | |
4156 | ||
4157 | /* | |
4158 | * Test for vnode immutability. | |
4159 | * | |
4160 | * The 'append' flag is set when the authorization request is constrained | |
4161 | * to operations which only request the right to append to a file. | |
4162 | * | |
4163 | * The 'ignore' flag is set when an operation modifying the immutability flags | |
4164 | * is being authorized. We check the system securelevel to determine which | |
4165 | * immutability flags we can ignore. | |
4166 | */ | |
4167 | static int | |
4168 | vnode_immutable(struct vnode_attr *vap, int append, int ignore) | |
4169 | { | |
4170 | int mask; | |
4171 | ||
4172 | /* start with all bits precluding the operation */ | |
4173 | mask = IMMUTABLE | APPEND; | |
4174 | ||
4175 | /* if appending only, remove the append-only bits */ | |
4176 | if (append) | |
4177 | mask &= ~APPEND; | |
4178 | ||
4179 | /* ignore only set when authorizing flags changes */ | |
4180 | if (ignore) { | |
4181 | if (securelevel <= 0) { | |
4182 | /* in insecure state, flags do not inhibit changes */ | |
4183 | mask = 0; | |
4184 | } else { | |
4185 | /* in secure state, user flags don't inhibit */ | |
4186 | mask &= ~(UF_IMMUTABLE | UF_APPEND); | |
4187 | } | |
4188 | } | |
4189 | KAUTH_DEBUG("IMMUTABLE - file flags 0x%x mask 0x%x append = %d ignore = %d", vap->va_flags, mask, append, ignore); | |
4190 | if ((vap->va_flags & mask) != 0) | |
4191 | return(EPERM); | |
4192 | return(0); | |
4193 | } | |
4194 | ||
4195 | static int | |
4196 | vauth_node_owner(struct vnode_attr *vap, kauth_cred_t cred) | |
4197 | { | |
4198 | int result; | |
4199 | ||
4200 | /* default assumption is not-owner */ | |
4201 | result = 0; | |
4202 | ||
4203 | /* | |
4204 | * If the filesystem has given us a UID, we treat this as authoritative. | |
4205 | */ | |
4206 | if (vap && VATTR_IS_SUPPORTED(vap, va_uid)) { | |
4207 | result = (vap->va_uid == kauth_cred_getuid(cred)) ? 1 : 0; | |
4208 | } | |
4209 | /* we could test the owner UUID here if we had a policy for it */ | |
4210 | ||
4211 | return(result); | |
4212 | } | |
4213 | ||
4214 | static int | |
4215 | vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember) | |
4216 | { | |
4217 | int error; | |
4218 | int result; | |
4219 | ||
4220 | error = 0; | |
4221 | result = 0; | |
4222 | ||
4223 | /* the caller is expected to have asked the filesystem for a group at some point */ | |
4224 | if (vap && VATTR_IS_SUPPORTED(vap, va_gid)) { | |
4225 | error = kauth_cred_ismember_gid(cred, vap->va_gid, &result); | |
4226 | } | |
4227 | /* we could test the group UUID here if we had a policy for it */ | |
4228 | ||
4229 | if (!error) | |
4230 | *ismember = result; | |
4231 | return(error); | |
4232 | } | |
4233 | ||
4234 | static int | |
4235 | vauth_file_owner(vauth_ctx vcp) | |
4236 | { | |
4237 | int result; | |
4238 | ||
4239 | if (vcp->flags_valid & _VAC_IS_OWNER) { | |
4240 | result = (vcp->flags & _VAC_IS_OWNER) ? 1 : 0; | |
4241 | } else { | |
4242 | result = vauth_node_owner(vcp->vap, vcp->ctx->vc_ucred); | |
4243 | ||
4244 | /* cache our result */ | |
4245 | vcp->flags_valid |= _VAC_IS_OWNER; | |
4246 | if (result) { | |
4247 | vcp->flags |= _VAC_IS_OWNER; | |
4248 | } else { | |
4249 | vcp->flags &= ~_VAC_IS_OWNER; | |
4250 | } | |
4251 | } | |
4252 | return(result); | |
4253 | } | |
4254 | ||
4255 | static int | |
4256 | vauth_file_ingroup(vauth_ctx vcp, int *ismember) | |
4257 | { | |
4258 | int error; | |
4259 | ||
4260 | if (vcp->flags_valid & _VAC_IN_GROUP) { | |
4261 | *ismember = (vcp->flags & _VAC_IN_GROUP) ? 1 : 0; | |
4262 | error = 0; | |
4263 | } else { | |
4264 | error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember); | |
4265 | ||
4266 | if (!error) { | |
4267 | /* cache our result */ | |
4268 | vcp->flags_valid |= _VAC_IN_GROUP; | |
4269 | if (*ismember) { | |
4270 | vcp->flags |= _VAC_IN_GROUP; | |
4271 | } else { | |
4272 | vcp->flags &= ~_VAC_IN_GROUP; | |
4273 | } | |
4274 | } | |
4275 | ||
4276 | } | |
4277 | return(error); | |
4278 | } | |
4279 | ||
4280 | static int | |
4281 | vauth_dir_owner(vauth_ctx vcp) | |
4282 | { | |
4283 | int result; | |
4284 | ||
4285 | if (vcp->flags_valid & _VAC_IS_DIR_OWNER) { | |
4286 | result = (vcp->flags & _VAC_IS_DIR_OWNER) ? 1 : 0; | |
4287 | } else { | |
4288 | result = vauth_node_owner(vcp->dvap, vcp->ctx->vc_ucred); | |
4289 | ||
4290 | /* cache our result */ | |
4291 | vcp->flags_valid |= _VAC_IS_DIR_OWNER; | |
4292 | if (result) { | |
4293 | vcp->flags |= _VAC_IS_DIR_OWNER; | |
4294 | } else { | |
4295 | vcp->flags &= ~_VAC_IS_DIR_OWNER; | |
4296 | } | |
4297 | } | |
4298 | return(result); | |
4299 | } | |
4300 | ||
4301 | static int | |
4302 | vauth_dir_ingroup(vauth_ctx vcp, int *ismember) | |
4303 | { | |
4304 | int error; | |
4305 | ||
4306 | if (vcp->flags_valid & _VAC_IN_DIR_GROUP) { | |
4307 | *ismember = (vcp->flags & _VAC_IN_DIR_GROUP) ? 1 : 0; | |
4308 | error = 0; | |
4309 | } else { | |
4310 | error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember); | |
4311 | ||
4312 | if (!error) { | |
4313 | /* cache our result */ | |
4314 | vcp->flags_valid |= _VAC_IN_DIR_GROUP; | |
4315 | if (*ismember) { | |
4316 | vcp->flags |= _VAC_IN_DIR_GROUP; | |
4317 | } else { | |
4318 | vcp->flags &= ~_VAC_IN_DIR_GROUP; | |
4319 | } | |
4320 | } | |
4321 | } | |
4322 | return(error); | |
4323 | } | |
4324 | ||
4325 | /* | |
4326 | * Test the posix permissions in (vap) to determine whether (credential) | |
4327 | * may perform (action) | |
4328 | */ | |
4329 | static int | |
4330 | vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir) | |
4331 | { | |
4332 | struct vnode_attr *vap; | |
4333 | int needed, error, owner_ok, group_ok, world_ok, ismember; | |
4334 | #ifdef KAUTH_DEBUG_ENABLE | |
4335 | const char *where; | |
4336 | # define _SETWHERE(c) where = c; | |
4337 | #else | |
4338 | # define _SETWHERE(c) | |
4339 | #endif | |
4340 | ||
4341 | /* checking file or directory? */ | |
4342 | if (on_dir) { | |
4343 | vap = vcp->dvap; | |
4344 | } else { | |
4345 | vap = vcp->vap; | |
4346 | } | |
4347 | ||
4348 | error = 0; | |
4349 | ||
4350 | /* | |
4351 | * We want to do as little work here as possible. So first we check | |
4352 | * which sets of permissions grant us the access we need, and avoid checking | |
4353 | * whether specific permissions grant access when more generic ones would. | |
4354 | */ | |
4355 | ||
4356 | /* owner permissions */ | |
4357 | needed = 0; | |
4358 | if (action & VREAD) | |
4359 | needed |= S_IRUSR; | |
4360 | if (action & VWRITE) | |
4361 | needed |= S_IWUSR; | |
4362 | if (action & VEXEC) | |
4363 | needed |= S_IXUSR; | |
4364 | owner_ok = (needed & vap->va_mode) == needed; | |
4365 | ||
4366 | /* group permissions */ | |
4367 | needed = 0; | |
4368 | if (action & VREAD) | |
4369 | needed |= S_IRGRP; | |
4370 | if (action & VWRITE) | |
4371 | needed |= S_IWGRP; | |
4372 | if (action & VEXEC) | |
4373 | needed |= S_IXGRP; | |
4374 | group_ok = (needed & vap->va_mode) == needed; | |
4375 | ||
4376 | /* world permissions */ | |
4377 | needed = 0; | |
4378 | if (action & VREAD) | |
4379 | needed |= S_IROTH; | |
4380 | if (action & VWRITE) | |
4381 | needed |= S_IWOTH; | |
4382 | if (action & VEXEC) | |
4383 | needed |= S_IXOTH; | |
4384 | world_ok = (needed & vap->va_mode) == needed; | |
4385 | ||
4386 | /* If granted/denied by all three, we're done */ | |
4387 | if (owner_ok && group_ok && world_ok) { | |
4388 | _SETWHERE("all"); | |
4389 | goto out; | |
4390 | } | |
4391 | if (!owner_ok && !group_ok && !world_ok) { | |
4392 | _SETWHERE("all"); | |
4393 | error = EACCES; | |
4394 | goto out; | |
4395 | } | |
4396 | ||
4397 | /* Check ownership (relatively cheap) */ | |
4398 | if ((on_dir && vauth_dir_owner(vcp)) || | |
4399 | (!on_dir && vauth_file_owner(vcp))) { | |
4400 | _SETWHERE("user"); | |
4401 | if (!owner_ok) | |
4402 | error = EACCES; | |
4403 | goto out; | |
4404 | } | |
4405 | ||
4406 | /* Not owner; if group and world both grant it we're done */ | |
4407 | if (group_ok && world_ok) { | |
4408 | _SETWHERE("group/world"); | |
4409 | goto out; | |
4410 | } | |
4411 | if (!group_ok && !world_ok) { | |
4412 | _SETWHERE("group/world"); | |
4413 | error = EACCES; | |
4414 | goto out; | |
4415 | } | |
4416 | ||
4417 | /* Check group membership (most expensive) */ | |
4418 | ismember = 0; | |
4419 | if (on_dir) { | |
4420 | error = vauth_dir_ingroup(vcp, &ismember); | |
4421 | } else { | |
4422 | error = vauth_file_ingroup(vcp, &ismember); | |
4423 | } | |
4424 | if (error) | |
4425 | goto out; | |
4426 | if (ismember) { | |
4427 | _SETWHERE("group"); | |
4428 | if (!group_ok) | |
4429 | error = EACCES; | |
4430 | goto out; | |
4431 | } | |
4432 | ||
4433 | /* Not owner, not in group, use world result */ | |
4434 | _SETWHERE("world"); | |
4435 | if (!world_ok) | |
4436 | error = EACCES; | |
4437 | ||
4438 | /* FALLTHROUGH */ | |
4439 | ||
4440 | out: | |
4441 | KAUTH_DEBUG("%p %s - posix %s permissions : need %s%s%s %x have %s%s%s%s%s%s%s%s%s UID = %d file = %d,%d", | |
4442 | vcp->vp, (error == 0) ? "ALLOWED" : "DENIED", where, | |
4443 | (action & VREAD) ? "r" : "-", | |
4444 | (action & VWRITE) ? "w" : "-", | |
4445 | (action & VEXEC) ? "x" : "-", | |
4446 | needed, | |
4447 | (vap->va_mode & S_IRUSR) ? "r" : "-", | |
4448 | (vap->va_mode & S_IWUSR) ? "w" : "-", | |
4449 | (vap->va_mode & S_IXUSR) ? "x" : "-", | |
4450 | (vap->va_mode & S_IRGRP) ? "r" : "-", | |
4451 | (vap->va_mode & S_IWGRP) ? "w" : "-", | |
4452 | (vap->va_mode & S_IXGRP) ? "x" : "-", | |
4453 | (vap->va_mode & S_IROTH) ? "r" : "-", | |
4454 | (vap->va_mode & S_IWOTH) ? "w" : "-", | |
4455 | (vap->va_mode & S_IXOTH) ? "x" : "-", | |
4456 | kauth_cred_getuid(vcp->ctx->vc_ucred), | |
4457 | on_dir ? vcp->dvap->va_uid : vcp->vap->va_uid, | |
4458 | on_dir ? vcp->dvap->va_gid : vcp->vap->va_gid); | |
4459 | return(error); | |
4460 | } | |
4461 | ||
4462 | /* | |
4463 | * Authorize the deletion of the node vp from the directory dvp. | |
4464 | * | |
4465 | * We assume that: | |
4466 | * - Neither the node nor the directory are immutable. | |
4467 | * - The user is not the superuser. | |
4468 | * | |
4469 | * Deletion is not permitted if the directory is sticky and the caller is not owner of the | |
4470 | * node or directory. | |
4471 | * | |
4472 | * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be | |
4473 | * deleted. If neither denies the permission, and the caller has Posix write access to the | |
4474 | * directory, then the node may be deleted. | |
4475 | */ | |
4476 | static int | |
4477 | vnode_authorize_delete(vauth_ctx vcp) | |
4478 | { | |
4479 | struct vnode_attr *vap = vcp->vap; | |
4480 | struct vnode_attr *dvap = vcp->dvap; | |
4481 | kauth_cred_t cred = vcp->ctx->vc_ucred; | |
4482 | struct kauth_acl_eval eval; | |
4483 | int error, delete_denied, delete_child_denied, ismember; | |
4484 | ||
4485 | /* check the ACL on the directory */ | |
4486 | delete_child_denied = 0; | |
4487 | if (VATTR_IS_NOT(dvap, va_acl, NULL)) { | |
4488 | eval.ae_requested = KAUTH_VNODE_DELETE_CHILD; | |
4489 | eval.ae_acl = &dvap->va_acl->acl_ace[0]; | |
4490 | eval.ae_count = dvap->va_acl->acl_entrycount; | |
4491 | eval.ae_options = 0; | |
4492 | if (vauth_dir_owner(vcp)) | |
4493 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4494 | if ((error = vauth_dir_ingroup(vcp, &ismember)) != 0) | |
4495 | return(error); | |
4496 | if (ismember) | |
4497 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4498 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4499 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4500 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4501 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4502 | ||
4503 | error = kauth_acl_evaluate(cred, &eval); | |
4504 | ||
4505 | if (error != 0) { | |
4506 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4507 | return(error); | |
4508 | } | |
4509 | if (eval.ae_result == KAUTH_RESULT_DENY) | |
4510 | delete_child_denied = 1; | |
4511 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4512 | KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp); | |
4513 | return(0); | |
4514 | } | |
4515 | } | |
4516 | ||
4517 | /* check the ACL on the node */ | |
4518 | delete_denied = 0; | |
4519 | if (VATTR_IS_NOT(vap, va_acl, NULL)) { | |
4520 | eval.ae_requested = KAUTH_VNODE_DELETE; | |
4521 | eval.ae_acl = &vap->va_acl->acl_ace[0]; | |
4522 | eval.ae_count = vap->va_acl->acl_entrycount; | |
4523 | eval.ae_options = 0; | |
4524 | if (vauth_file_owner(vcp)) | |
4525 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4526 | if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) | |
4527 | return(error); | |
4528 | if (ismember) | |
4529 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4530 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4531 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4532 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4533 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4534 | ||
4535 | if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { | |
4536 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4537 | return(error); | |
4538 | } | |
4539 | if (eval.ae_result == KAUTH_RESULT_DENY) | |
4540 | delete_denied = 1; | |
4541 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4542 | KAUTH_DEBUG("%p ALLOWED - granted by file ACL", vcp->vp); | |
4543 | return(0); | |
4544 | } | |
4545 | } | |
4546 | ||
4547 | /* if denied by ACL on directory or node, return denial */ | |
4548 | if (delete_denied || delete_child_denied) { | |
4549 | KAUTH_DEBUG("%p ALLOWED - denied by ACL", vcp->vp); | |
4550 | return(EACCES); | |
4551 | } | |
4552 | ||
4553 | /* enforce sticky bit behaviour */ | |
4554 | if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) { | |
4555 | KAUTH_DEBUG("%p DENIED - sticky bit rules (user %d file %d dir %d)", | |
4556 | vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid); | |
4557 | return(EACCES); | |
4558 | } | |
4559 | ||
4560 | /* check the directory */ | |
4561 | if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) { | |
4562 | KAUTH_DEBUG("%p ALLOWED - granted by posix permisssions", vcp->vp); | |
4563 | return(error); | |
4564 | } | |
4565 | ||
4566 | /* not denied, must be OK */ | |
4567 | return(0); | |
4568 | } | |
4569 | ||
4570 | ||
4571 | /* | |
4572 | * Authorize an operation based on the node's attributes. | |
4573 | */ | |
4574 | static int | |
4575 | vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_rights_t preauth_rights) | |
4576 | { | |
4577 | struct vnode_attr *vap = vcp->vap; | |
4578 | kauth_cred_t cred = vcp->ctx->vc_ucred; | |
4579 | struct kauth_acl_eval eval; | |
4580 | int error, ismember; | |
4581 | mode_t posix_action; | |
4582 | ||
4583 | /* | |
4584 | * If we are the file owner, we automatically have some rights. | |
4585 | * | |
4586 | * Do we need to expand this to support group ownership? | |
4587 | */ | |
4588 | if (vauth_file_owner(vcp)) | |
4589 | acl_rights &= ~(KAUTH_VNODE_WRITE_SECURITY); | |
4590 | ||
4591 | /* | |
4592 | * If we are checking both TAKE_OWNERSHIP and WRITE_SECURITY, we can | |
4593 | * mask the latter. If TAKE_OWNERSHIP is requested the caller is about to | |
4594 | * change ownership to themselves, and WRITE_SECURITY is implicitly | |
4595 | * granted to the owner. We need to do this because at this point | |
4596 | * WRITE_SECURITY may not be granted as the caller is not currently | |
4597 | * the owner. | |
4598 | */ | |
4599 | if ((acl_rights & KAUTH_VNODE_TAKE_OWNERSHIP) && | |
4600 | (acl_rights & KAUTH_VNODE_WRITE_SECURITY)) | |
4601 | acl_rights &= ~KAUTH_VNODE_WRITE_SECURITY; | |
4602 | ||
4603 | if (acl_rights == 0) { | |
4604 | KAUTH_DEBUG("%p ALLOWED - implicit or no rights required", vcp->vp); | |
4605 | return(0); | |
4606 | } | |
4607 | ||
4608 | /* if we have an ACL, evaluate it */ | |
4609 | if (VATTR_IS_NOT(vap, va_acl, NULL)) { | |
4610 | eval.ae_requested = acl_rights; | |
4611 | eval.ae_acl = &vap->va_acl->acl_ace[0]; | |
4612 | eval.ae_count = vap->va_acl->acl_entrycount; | |
4613 | eval.ae_options = 0; | |
4614 | if (vauth_file_owner(vcp)) | |
4615 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4616 | if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) | |
4617 | return(error); | |
4618 | if (ismember) | |
4619 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4620 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4621 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4622 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4623 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4624 | ||
4625 | if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { | |
4626 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4627 | return(error); | |
4628 | } | |
4629 | ||
4630 | if (eval.ae_result == KAUTH_RESULT_DENY) { | |
4631 | KAUTH_DEBUG("%p DENIED - by ACL", vcp->vp); | |
4632 | return(EACCES); /* deny, deny, counter-allege */ | |
4633 | } | |
4634 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4635 | KAUTH_DEBUG("%p ALLOWED - all rights granted by ACL", vcp->vp); | |
4636 | return(0); | |
4637 | } | |
4638 | /* fall through and evaluate residual rights */ | |
4639 | } else { | |
4640 | /* no ACL, everything is residual */ | |
4641 | eval.ae_residual = acl_rights; | |
4642 | } | |
4643 | ||
4644 | /* | |
4645 | * Grant residual rights that have been pre-authorized. | |
4646 | */ | |
4647 | eval.ae_residual &= ~preauth_rights; | |
4648 | ||
4649 | /* | |
4650 | * We grant WRITE_ATTRIBUTES to the owner if it hasn't been denied. | |
4651 | */ | |
4652 | if (vauth_file_owner(vcp)) | |
4653 | eval.ae_residual &= ~KAUTH_VNODE_WRITE_ATTRIBUTES; | |
4654 | ||
4655 | if (eval.ae_residual == 0) { | |
4656 | KAUTH_DEBUG("%p ALLOWED - rights already authorized", vcp->vp); | |
4657 | return(0); | |
4658 | } | |
4659 | ||
4660 | /* | |
4661 | * Bail if we have residual rights that can't be granted by posix permissions, | |
4662 | * or aren't presumed granted at this point. | |
4663 | * | |
4664 | * XXX these can be collapsed for performance | |
4665 | */ | |
4666 | if (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) { | |
4667 | KAUTH_DEBUG("%p DENIED - CHANGE_OWNER not permitted", vcp->vp); | |
4668 | return(EACCES); | |
4669 | } | |
4670 | if (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) { | |
4671 | KAUTH_DEBUG("%p DENIED - WRITE_SECURITY not permitted", vcp->vp); | |
4672 | return(EACCES); | |
4673 | } | |
4674 | ||
4675 | #if DIAGNOSTIC | |
4676 | if (eval.ae_residual & KAUTH_VNODE_DELETE) | |
4677 | panic("vnode_authorize: can't be checking delete permission here"); | |
4678 | #endif | |
4679 | ||
4680 | /* | |
4681 | * Compute the fallback posix permissions that will satisfy the remaining | |
4682 | * rights. | |
4683 | */ | |
4684 | posix_action = 0; | |
4685 | if (eval.ae_residual & (KAUTH_VNODE_READ_DATA | | |
4686 | KAUTH_VNODE_LIST_DIRECTORY | | |
4687 | KAUTH_VNODE_READ_EXTATTRIBUTES)) | |
4688 | posix_action |= VREAD; | |
4689 | if (eval.ae_residual & (KAUTH_VNODE_WRITE_DATA | | |
4690 | KAUTH_VNODE_ADD_FILE | | |
4691 | KAUTH_VNODE_ADD_SUBDIRECTORY | | |
4692 | KAUTH_VNODE_DELETE_CHILD | | |
4693 | KAUTH_VNODE_WRITE_ATTRIBUTES | | |
4694 | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) | |
4695 | posix_action |= VWRITE; | |
4696 | if (eval.ae_residual & (KAUTH_VNODE_EXECUTE | | |
4697 | KAUTH_VNODE_SEARCH)) | |
4698 | posix_action |= VEXEC; | |
4699 | ||
4700 | if (posix_action != 0) { | |
4701 | return(vnode_authorize_posix(vcp, posix_action, 0 /* !on_dir */)); | |
4702 | } else { | |
4703 | KAUTH_DEBUG("%p ALLOWED - residual rights %s%s%s%s%s%s%s%s%s%s%s%s%s%s granted due to no posix mapping", | |
4704 | vcp->vp, | |
4705 | (eval.ae_residual & KAUTH_VNODE_READ_DATA) | |
4706 | ? vnode_isdir(vcp->vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", | |
4707 | (eval.ae_residual & KAUTH_VNODE_WRITE_DATA) | |
4708 | ? vnode_isdir(vcp->vp) ? " ADD_FILE" : " WRITE_DATA" : "", | |
4709 | (eval.ae_residual & KAUTH_VNODE_EXECUTE) | |
4710 | ? vnode_isdir(vcp->vp) ? " SEARCH" : " EXECUTE" : "", | |
4711 | (eval.ae_residual & KAUTH_VNODE_DELETE) | |
4712 | ? " DELETE" : "", | |
4713 | (eval.ae_residual & KAUTH_VNODE_APPEND_DATA) | |
4714 | ? vnode_isdir(vcp->vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", | |
4715 | (eval.ae_residual & KAUTH_VNODE_DELETE_CHILD) | |
4716 | ? " DELETE_CHILD" : "", | |
4717 | (eval.ae_residual & KAUTH_VNODE_READ_ATTRIBUTES) | |
4718 | ? " READ_ATTRIBUTES" : "", | |
4719 | (eval.ae_residual & KAUTH_VNODE_WRITE_ATTRIBUTES) | |
4720 | ? " WRITE_ATTRIBUTES" : "", | |
4721 | (eval.ae_residual & KAUTH_VNODE_READ_EXTATTRIBUTES) | |
4722 | ? " READ_EXTATTRIBUTES" : "", | |
4723 | (eval.ae_residual & KAUTH_VNODE_WRITE_EXTATTRIBUTES) | |
4724 | ? " WRITE_EXTATTRIBUTES" : "", | |
4725 | (eval.ae_residual & KAUTH_VNODE_READ_SECURITY) | |
4726 | ? " READ_SECURITY" : "", | |
4727 | (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) | |
4728 | ? " WRITE_SECURITY" : "", | |
4729 | (eval.ae_residual & KAUTH_VNODE_CHECKIMMUTABLE) | |
4730 | ? " CHECKIMMUTABLE" : "", | |
4731 | (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) | |
4732 | ? " CHANGE_OWNER" : ""); | |
4733 | } | |
4734 | ||
4735 | /* | |
4736 | * Lack of required Posix permissions implies no reason to deny access. | |
4737 | */ | |
4738 | return(0); | |
4739 | } | |
4740 | ||
4741 | /* | |
4742 | * Check for file immutability. | |
4743 | */ | |
4744 | static int | |
4745 | vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore) | |
4746 | { | |
4747 | mount_t mp; | |
4748 | int error; | |
4749 | int append; | |
4750 | ||
4751 | /* | |
4752 | * Perform immutability checks for operations that change data. | |
4753 | * | |
4754 | * Sockets, fifos and devices require special handling. | |
4755 | */ | |
4756 | switch(vp->v_type) { | |
4757 | case VSOCK: | |
4758 | case VFIFO: | |
4759 | case VBLK: | |
4760 | case VCHR: | |
4761 | /* | |
4762 | * Writing to these nodes does not change the filesystem data, | |
4763 | * so forget that it's being tried. | |
4764 | */ | |
4765 | rights &= ~KAUTH_VNODE_WRITE_DATA; | |
4766 | break; | |
4767 | default: | |
4768 | break; | |
4769 | } | |
4770 | ||
4771 | error = 0; | |
4772 | if (rights & KAUTH_VNODE_WRITE_RIGHTS) { | |
4773 | ||
4774 | /* check per-filesystem options if possible */ | |
4775 | mp = vnode_mount(vp); | |
4776 | if (mp != NULL) { | |
4777 | ||
4778 | /* check for no-EA filesystems */ | |
4779 | if ((rights & KAUTH_VNODE_WRITE_EXTATTRIBUTES) && | |
4780 | (vfs_flags(mp) & MNT_NOUSERXATTR)) { | |
4781 | KAUTH_DEBUG("%p DENIED - filesystem disallowed extended attributes", vp); | |
4782 | error = EACCES; /* User attributes disabled */ | |
4783 | goto out; | |
4784 | } | |
4785 | } | |
4786 | ||
4787 | /* check for file immutability */ | |
4788 | append = 0; | |
4789 | if (vp->v_type == VDIR) { | |
4790 | if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY)) == rights) | |
4791 | append = 1; | |
4792 | } else { | |
4793 | if ((rights & KAUTH_VNODE_APPEND_DATA) == rights) | |
4794 | append = 1; | |
4795 | } | |
4796 | if ((error = vnode_immutable(vap, append, ignore)) != 0) { | |
4797 | KAUTH_DEBUG("%p DENIED - file is immutable", vp); | |
4798 | goto out; | |
4799 | } | |
4800 | } | |
4801 | out: | |
4802 | return(error); | |
4803 | } | |
4804 | ||
4805 | /* | |
4806 | * Handle authorization actions for filesystems that advertise that the server will | |
4807 | * be enforcing. | |
4808 | */ | |
4809 | static int | |
4810 | vnode_authorize_opaque(vnode_t vp, int *resultp, kauth_action_t action, vfs_context_t ctx) | |
4811 | { | |
4812 | int error; | |
4813 | ||
4814 | /* | |
4815 | * If the vp is a device node, socket or FIFO it actually represents a local | |
4816 | * endpoint, so we need to handle it locally. | |
4817 | */ | |
4818 | switch(vp->v_type) { | |
4819 | case VBLK: | |
4820 | case VCHR: | |
4821 | case VSOCK: | |
4822 | case VFIFO: | |
4823 | return(0); | |
4824 | default: | |
4825 | break; | |
4826 | } | |
4827 | ||
4828 | /* | |
4829 | * In the advisory request case, if the filesystem doesn't think it's reliable | |
4830 | * we will attempt to formulate a result ourselves based on VNOP_GETATTR data. | |
4831 | */ | |
4832 | if ((action & KAUTH_VNODE_ACCESS) && !vfs_authopaqueaccess(vnode_mount(vp))) | |
4833 | return(0); | |
4834 | ||
4835 | /* | |
4836 | * Let the filesystem have a say in the matter. It's OK for it to not implemnent | |
4837 | * VNOP_ACCESS, as most will authorise inline with the actual request. | |
4838 | */ | |
4839 | if ((error = VNOP_ACCESS(vp, action, ctx)) != ENOTSUP) { | |
4840 | *resultp = error; | |
4841 | KAUTH_DEBUG("%p DENIED - opaque filesystem VNOP_ACCESS denied access", vp); | |
4842 | return(1); | |
4843 | } | |
4844 | ||
4845 | /* | |
4846 | * Typically opaque filesystems do authorisation in-line, but exec is a special case. In | |
4847 | * order to be reasonably sure that exec will be permitted, we try a bit harder here. | |
4848 | */ | |
4849 | if ((action & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp)) { | |
4850 | /* try a VNOP_OPEN for readonly access */ | |
4851 | if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { | |
4852 | *resultp = error; | |
4853 | KAUTH_DEBUG("%p DENIED - EXECUTE denied because file could not be opened readonly", vp); | |
4854 | return(1); | |
4855 | } | |
4856 | VNOP_CLOSE(vp, FREAD, ctx); | |
4857 | } | |
4858 | ||
4859 | /* | |
4860 | * We don't have any reason to believe that the request has to be denied at this point, | |
4861 | * so go ahead and allow it. | |
4862 | */ | |
4863 | *resultp = 0; | |
4864 | KAUTH_DEBUG("%p ALLOWED - bypassing access check for non-local filesystem", vp); | |
4865 | return(1); | |
4866 | } | |
4867 | ||
4868 | static int | |
4869 | vnode_authorize_callback(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action, | |
4870 | uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) | |
4871 | { | |
4872 | struct _vnode_authorize_context auth_context; | |
4873 | vauth_ctx vcp; | |
4874 | vfs_context_t ctx; | |
4875 | vnode_t vp, dvp; | |
4876 | kauth_cred_t cred; | |
4877 | kauth_ace_rights_t rights; | |
4878 | struct vnode_attr va, dva; | |
4879 | int result; | |
4880 | int *errorp; | |
4881 | int noimmutable; | |
4882 | ||
4883 | vcp = &auth_context; | |
4884 | ctx = vcp->ctx = (vfs_context_t)arg0; | |
4885 | vp = vcp->vp = (vnode_t)arg1; | |
4886 | dvp = vcp->dvp = (vnode_t)arg2; | |
4887 | errorp = (int *)arg3; | |
4888 | /* note that we authorize against the context, not the passed cred (the same thing anyway) */ | |
4889 | cred = ctx->vc_ucred; | |
4890 | ||
4891 | VATTR_INIT(&va); | |
4892 | vcp->vap = &va; | |
4893 | VATTR_INIT(&dva); | |
4894 | vcp->dvap = &dva; | |
4895 | ||
4896 | vcp->flags = vcp->flags_valid = 0; | |
4897 | ||
4898 | #if DIAGNOSTIC | |
4899 | if ((ctx == NULL) || (vp == NULL) || (cred == NULL)) | |
4900 | panic("vnode_authorize: bad arguments (context %p vp %p cred %p)", ctx, vp, cred); | |
4901 | #endif | |
4902 | ||
4903 | KAUTH_DEBUG("%p AUTH - %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s on %s '%s' (0x%x:%p/%p)", | |
4904 | vp, vfs_context_proc(ctx)->p_comm, | |
4905 | (action & KAUTH_VNODE_ACCESS) ? "access" : "auth", | |
4906 | (action & KAUTH_VNODE_READ_DATA) ? vnode_isdir(vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", | |
4907 | (action & KAUTH_VNODE_WRITE_DATA) ? vnode_isdir(vp) ? " ADD_FILE" : " WRITE_DATA" : "", | |
4908 | (action & KAUTH_VNODE_EXECUTE) ? vnode_isdir(vp) ? " SEARCH" : " EXECUTE" : "", | |
4909 | (action & KAUTH_VNODE_DELETE) ? " DELETE" : "", | |
4910 | (action & KAUTH_VNODE_APPEND_DATA) ? vnode_isdir(vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", | |
4911 | (action & KAUTH_VNODE_DELETE_CHILD) ? " DELETE_CHILD" : "", | |
4912 | (action & KAUTH_VNODE_READ_ATTRIBUTES) ? " READ_ATTRIBUTES" : "", | |
4913 | (action & KAUTH_VNODE_WRITE_ATTRIBUTES) ? " WRITE_ATTRIBUTES" : "", | |
4914 | (action & KAUTH_VNODE_READ_EXTATTRIBUTES) ? " READ_EXTATTRIBUTES" : "", | |
4915 | (action & KAUTH_VNODE_WRITE_EXTATTRIBUTES) ? " WRITE_EXTATTRIBUTES" : "", | |
4916 | (action & KAUTH_VNODE_READ_SECURITY) ? " READ_SECURITY" : "", | |
4917 | (action & KAUTH_VNODE_WRITE_SECURITY) ? " WRITE_SECURITY" : "", | |
4918 | (action & KAUTH_VNODE_CHANGE_OWNER) ? " CHANGE_OWNER" : "", | |
4919 | (action & KAUTH_VNODE_NOIMMUTABLE) ? " (noimmutable)" : "", | |
4920 | vnode_isdir(vp) ? "directory" : "file", | |
4921 | vp->v_name ? vp->v_name : "<NULL>", action, vp, dvp); | |
4922 | ||
4923 | /* | |
4924 | * Extract the control bits from the action, everything else is | |
4925 | * requested rights. | |
4926 | */ | |
4927 | noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0; | |
4928 | rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE); | |
4929 | ||
4930 | if (rights & KAUTH_VNODE_DELETE) { | |
4931 | #if DIAGNOSTIC | |
4932 | if (dvp == NULL) | |
4933 | panic("vnode_authorize: KAUTH_VNODE_DELETE test requires a directory"); | |
4934 | #endif | |
4935 | } else { | |
4936 | dvp = NULL; | |
4937 | } | |
4938 | ||
4939 | /* | |
4940 | * Check for read-only filesystems. | |
4941 | */ | |
4942 | if ((rights & KAUTH_VNODE_WRITE_RIGHTS) && | |
4943 | (vp->v_mount->mnt_flag & MNT_RDONLY) && | |
4944 | ((vp->v_type == VREG) || (vp->v_type == VDIR) || | |
4945 | (vp->v_type == VLNK) || (vp->v_type == VCPLX) || | |
4946 | (rights & KAUTH_VNODE_DELETE) || (rights & KAUTH_VNODE_DELETE_CHILD))) { | |
4947 | result = EROFS; | |
4948 | goto out; | |
4949 | } | |
4950 | ||
4951 | /* | |
4952 | * Check for noexec filesystems. | |
4953 | */ | |
4954 | if ((rights & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp) && (vp->v_mount->mnt_flag & MNT_NOEXEC)) { | |
4955 | result = EACCES; | |
4956 | goto out; | |
4957 | } | |
4958 | ||
4959 | /* | |
4960 | * Handle cases related to filesystems with non-local enforcement. | |
4961 | * This call can return 0, in which case we will fall through to perform a | |
4962 | * check based on VNOP_GETATTR data. Otherwise it returns 1 and sets | |
4963 | * an appropriate result, at which point we can return immediately. | |
4964 | */ | |
4965 | if (vfs_authopaque(vp->v_mount) && vnode_authorize_opaque(vp, &result, action, ctx)) | |
4966 | goto out; | |
4967 | ||
4968 | /* | |
4969 | * Get vnode attributes and extended security information for the vnode | |
4970 | * and directory if required. | |
4971 | */ | |
4972 | VATTR_WANTED(&va, va_mode); | |
4973 | VATTR_WANTED(&va, va_uid); | |
4974 | VATTR_WANTED(&va, va_gid); | |
4975 | VATTR_WANTED(&va, va_flags); | |
4976 | VATTR_WANTED(&va, va_acl); | |
4977 | if ((result = vnode_getattr(vp, &va, ctx)) != 0) { | |
4978 | KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result); | |
4979 | goto out; | |
4980 | } | |
4981 | if (dvp) { | |
4982 | VATTR_WANTED(&dva, va_mode); | |
4983 | VATTR_WANTED(&dva, va_uid); | |
4984 | VATTR_WANTED(&dva, va_gid); | |
4985 | VATTR_WANTED(&dva, va_flags); | |
4986 | VATTR_WANTED(&dva, va_acl); | |
4987 | if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) { | |
4988 | KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result); | |
4989 | goto out; | |
4990 | } | |
4991 | } | |
4992 | ||
4993 | /* | |
4994 | * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes | |
4995 | * *_EXTATTRIBUTES. | |
4996 | */ | |
4997 | if (S_ISXATTR(va.va_mode)) { | |
4998 | if (rights & KAUTH_VNODE_READ_DATA) { | |
4999 | rights &= ~KAUTH_VNODE_READ_DATA; | |
5000 | rights |= KAUTH_VNODE_READ_EXTATTRIBUTES; | |
5001 | } | |
5002 | if (rights & KAUTH_VNODE_WRITE_DATA) { | |
5003 | rights &= ~KAUTH_VNODE_WRITE_DATA; | |
5004 | rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES; | |
5005 | } | |
5006 | } | |
5007 | ||
5008 | /* | |
5009 | * Check for immutability. | |
5010 | * | |
5011 | * In the deletion case, parent directory immutability vetoes specific | |
5012 | * file rights. | |
5013 | */ | |
5014 | if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0) | |
5015 | goto out; | |
5016 | if ((rights & KAUTH_VNODE_DELETE) && | |
5017 | ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0)) | |
5018 | goto out; | |
5019 | ||
5020 | /* | |
5021 | * Clear rights that have been authorized by reaching this point, bail if nothing left to | |
5022 | * check. | |
5023 | */ | |
5024 | rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE); | |
5025 | if (rights == 0) | |
5026 | goto out; | |
5027 | ||
5028 | /* | |
5029 | * If we're not the superuser, authorize based on file properties. | |
5030 | */ | |
5031 | if (!vfs_context_issuser(ctx)) { | |
5032 | /* process delete rights */ | |
5033 | if ((rights & KAUTH_VNODE_DELETE) && | |
5034 | ((result = vnode_authorize_delete(vcp)) != 0)) | |
5035 | goto out; | |
5036 | ||
5037 | /* process remaining rights */ | |
5038 | if ((rights & ~KAUTH_VNODE_DELETE) && | |
5039 | ((result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE)) != 0)) | |
5040 | goto out; | |
5041 | } else { | |
5042 | ||
5043 | /* | |
5044 | * Execute is only granted to root if one of the x bits is set. This check only | |
5045 | * makes sense if the posix mode bits are actually supported. | |
5046 | */ | |
5047 | if ((rights & KAUTH_VNODE_EXECUTE) && | |
5048 | (vp->v_type == VREG) && | |
5049 | VATTR_IS_SUPPORTED(&va, va_mode) && | |
5050 | !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { | |
5051 | result = EPERM; | |
5052 | KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode); | |
5053 | goto out; | |
5054 | } | |
5055 | ||
5056 | KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp); | |
5057 | } | |
5058 | ||
5059 | out: | |
5060 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) | |
5061 | kauth_acl_free(va.va_acl); | |
5062 | if (VATTR_IS_SUPPORTED(&dva, va_acl) && (dva.va_acl != NULL)) | |
5063 | kauth_acl_free(dva.va_acl); | |
5064 | if (result) { | |
5065 | *errorp = result; | |
5066 | KAUTH_DEBUG("%p DENIED - auth denied", vp); | |
5067 | return(KAUTH_RESULT_DENY); | |
5068 | } | |
5069 | ||
5070 | /* | |
5071 | * Note that this implies that we will allow requests for no rights, as well as | |
5072 | * for rights that we do not recognise. There should be none of these. | |
5073 | */ | |
5074 | KAUTH_DEBUG("%p ALLOWED - auth granted", vp); | |
5075 | return(KAUTH_RESULT_ALLOW); | |
5076 | } | |
5077 | ||
5078 | /* | |
5079 | * Check that the attribute information in vattr can be legally applied to | |
5080 | * a new file by the context. | |
5081 | */ | |
5082 | int | |
5083 | vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx) | |
5084 | { | |
5085 | int error; | |
5086 | int is_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode; | |
5087 | kauth_cred_t cred; | |
5088 | guid_t changer; | |
5089 | mount_t dmp; | |
5090 | ||
5091 | error = 0; | |
5092 | defaulted_owner = defaulted_group = defaulted_mode = 0; | |
5093 | ||
5094 | /* | |
5095 | * Require that the filesystem support extended security to apply any. | |
5096 | */ | |
5097 | if (!vfs_extendedsecurity(dvp->v_mount) && | |
5098 | (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { | |
5099 | error = EINVAL; | |
5100 | goto out; | |
5101 | } | |
5102 | ||
5103 | /* | |
5104 | * Default some fields. | |
5105 | */ | |
5106 | dmp = dvp->v_mount; | |
5107 | ||
5108 | /* | |
5109 | * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit owner is set, that | |
5110 | * owner takes ownership of all new files. | |
5111 | */ | |
5112 | if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsowner != KAUTH_UID_NONE)) { | |
5113 | VATTR_SET(vap, va_uid, dmp->mnt_fsowner); | |
5114 | defaulted_owner = 1; | |
5115 | } else { | |
5116 | if (!VATTR_IS_ACTIVE(vap, va_uid)) { | |
5117 | /* default owner is current user */ | |
5118 | VATTR_SET(vap, va_uid, kauth_cred_getuid(vfs_context_ucred(ctx))); | |
5119 | defaulted_owner = 1; | |
5120 | } | |
5121 | } | |
5122 | ||
5123 | /* | |
5124 | * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that | |
5125 | * group takes ownership of all new files. | |
5126 | */ | |
5127 | if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsgroup != KAUTH_GID_NONE)) { | |
5128 | VATTR_SET(vap, va_gid, dmp->mnt_fsgroup); | |
5129 | defaulted_group = 1; | |
5130 | } else { | |
5131 | if (!VATTR_IS_ACTIVE(vap, va_gid)) { | |
5132 | /* default group comes from parent object, fallback to current user */ | |
5133 | struct vnode_attr dva; | |
5134 | VATTR_INIT(&dva); | |
5135 | VATTR_WANTED(&dva, va_gid); | |
5136 | if ((error = vnode_getattr(dvp, &dva, ctx)) != 0) | |
5137 | goto out; | |
5138 | if (VATTR_IS_SUPPORTED(&dva, va_gid)) { | |
5139 | VATTR_SET(vap, va_gid, dva.va_gid); | |
5140 | } else { | |
5141 | VATTR_SET(vap, va_gid, kauth_cred_getgid(vfs_context_ucred(ctx))); | |
5142 | } | |
5143 | defaulted_group = 1; | |
5144 | } | |
5145 | } | |
5146 | ||
5147 | if (!VATTR_IS_ACTIVE(vap, va_flags)) | |
5148 | VATTR_SET(vap, va_flags, 0); | |
5149 | ||
5150 | /* default mode is everything, masked with current umask */ | |
5151 | if (!VATTR_IS_ACTIVE(vap, va_mode)) { | |
5152 | VATTR_SET(vap, va_mode, ACCESSPERMS & ~vfs_context_proc(ctx)->p_fd->fd_cmask); | |
5153 | KAUTH_DEBUG("ATTR - defaulting new file mode to %o from umask %o", vap->va_mode, vfs_context_proc(ctx)->p_fd->fd_cmask); | |
5154 | defaulted_mode = 1; | |
5155 | } | |
5156 | /* set timestamps to now */ | |
5157 | if (!VATTR_IS_ACTIVE(vap, va_create_time)) { | |
5158 | nanotime(&vap->va_create_time); | |
5159 | VATTR_SET_ACTIVE(vap, va_create_time); | |
5160 | } | |
5161 | ||
5162 | /* | |
5163 | * Check for attempts to set nonsensical fields. | |
5164 | */ | |
5165 | if (vap->va_active & ~VNODE_ATTR_NEWOBJ) { | |
5166 | error = EINVAL; | |
5167 | KAUTH_DEBUG("ATTR - ERROR - attempt to set unsupported new-file attributes %llx", | |
5168 | vap->va_active & ~VNODE_ATTR_NEWOBJ); | |
5169 | goto out; | |
5170 | } | |
5171 | ||
5172 | /* | |
5173 | * Quickly check for the applicability of any enforcement here. | |
5174 | * Tests below maintain the integrity of the local security model. | |
5175 | */ | |
5176 | if (vfs_authopaque(vnode_mount(dvp))) | |
5177 | goto out; | |
5178 | ||
5179 | /* | |
5180 | * We need to know if the caller is the superuser, or if the work is | |
5181 | * otherwise already authorised. | |
5182 | */ | |
5183 | cred = vfs_context_ucred(ctx); | |
5184 | if (noauth) { | |
5185 | /* doing work for the kernel */ | |
5186 | is_suser = 1; | |
5187 | } else { | |
5188 | is_suser = vfs_context_issuser(ctx); | |
5189 | } | |
5190 | ||
5191 | ||
5192 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5193 | if (is_suser) { | |
5194 | if ((vap->va_flags & (UF_SETTABLE | SF_SETTABLE)) != vap->va_flags) { | |
5195 | error = EPERM; | |
5196 | KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); | |
5197 | goto out; | |
5198 | } | |
5199 | } else { | |
5200 | if ((vap->va_flags & UF_SETTABLE) != vap->va_flags) { | |
5201 | error = EPERM; | |
5202 | KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); | |
5203 | goto out; | |
5204 | } | |
5205 | } | |
5206 | } | |
5207 | ||
5208 | /* if not superuser, validate legality of new-item attributes */ | |
5209 | if (!is_suser) { | |
5210 | if (!defaulted_mode && VATTR_IS_ACTIVE(vap, va_mode)) { | |
5211 | /* setgid? */ | |
5212 | if (vap->va_mode & S_ISGID) { | |
5213 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5214 | KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); | |
5215 | goto out; | |
5216 | } | |
5217 | if (!ismember) { | |
5218 | KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", vap->va_gid); | |
5219 | error = EPERM; | |
5220 | goto out; | |
5221 | } | |
5222 | } | |
5223 | ||
5224 | /* setuid? */ | |
5225 | if ((vap->va_mode & S_ISUID) && (vap->va_uid != kauth_cred_getuid(cred))) { | |
5226 | KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); | |
5227 | error = EPERM; | |
5228 | goto out; | |
5229 | } | |
5230 | } | |
5231 | if (!defaulted_owner && (vap->va_uid != kauth_cred_getuid(cred))) { | |
5232 | KAUTH_DEBUG(" DENIED - cannot create new item owned by %d", vap->va_uid); | |
5233 | error = EPERM; | |
5234 | goto out; | |
5235 | } | |
5236 | if (!defaulted_group) { | |
5237 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5238 | KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); | |
5239 | goto out; | |
5240 | } | |
5241 | if (!ismember) { | |
5242 | KAUTH_DEBUG(" DENIED - cannot create new item with group %d - not a member", vap->va_gid); | |
5243 | error = EPERM; | |
5244 | goto out; | |
5245 | } | |
5246 | } | |
5247 | ||
5248 | /* initialising owner/group UUID */ | |
5249 | if (VATTR_IS_ACTIVE(vap, va_uuuid)) { | |
5250 | if ((error = kauth_cred_getguid(cred, &changer)) != 0) { | |
5251 | KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); | |
5252 | /* XXX ENOENT here - no GUID - should perhaps become EPERM */ | |
5253 | goto out; | |
5254 | } | |
5255 | if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { | |
5256 | KAUTH_DEBUG(" ERROR - cannot create item with supplied owner UUID - not us"); | |
5257 | error = EPERM; | |
5258 | goto out; | |
5259 | } | |
5260 | } | |
5261 | if (VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5262 | if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { | |
5263 | KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); | |
5264 | goto out; | |
5265 | } | |
5266 | if (!ismember) { | |
5267 | KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); | |
5268 | error = EPERM; | |
5269 | goto out; | |
5270 | } | |
5271 | } | |
5272 | } | |
5273 | out: | |
5274 | return(error); | |
5275 | } | |
5276 | ||
5277 | /* | |
5278 | * Check that the attribute information in vap can be legally written by the context. | |
5279 | * | |
5280 | * Call this when you're not sure about the vnode_attr; either its contents have come | |
5281 | * from an unknown source, or when they are variable. | |
5282 | * | |
5283 | * Returns errno, or zero and sets *actionp to the KAUTH_VNODE_* actions that | |
5284 | * must be authorized to be permitted to write the vattr. | |
5285 | */ | |
5286 | int | |
5287 | vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_context_t ctx) | |
5288 | { | |
5289 | struct vnode_attr ova; | |
5290 | kauth_action_t required_action; | |
5291 | int error, is_suser, ismember, chowner, chgroup; | |
5292 | guid_t changer; | |
5293 | gid_t group; | |
5294 | uid_t owner; | |
5295 | mode_t newmode; | |
5296 | kauth_cred_t cred; | |
5297 | uint32_t fdelta; | |
5298 | ||
5299 | VATTR_INIT(&ova); | |
5300 | required_action = 0; | |
5301 | error = 0; | |
5302 | ||
5303 | /* | |
5304 | * Quickly check for enforcement applicability. | |
5305 | */ | |
5306 | if (vfs_authopaque(vnode_mount(vp))) | |
5307 | goto out; | |
5308 | ||
5309 | /* | |
5310 | * Check for attempts to set nonsensical fields. | |
5311 | */ | |
5312 | if (vap->va_active & VNODE_ATTR_RDONLY) { | |
5313 | KAUTH_DEBUG("ATTR - ERROR: attempt to set readonly attribute(s)"); | |
5314 | error = EINVAL; | |
5315 | goto out; | |
5316 | } | |
5317 | ||
5318 | /* | |
5319 | * We need to know if the caller is the superuser. | |
5320 | */ | |
5321 | cred = vfs_context_ucred(ctx); | |
5322 | is_suser = kauth_cred_issuser(cred); | |
5323 | ||
5324 | /* | |
5325 | * If any of the following are changing, we need information from the old file: | |
5326 | * va_uid | |
5327 | * va_gid | |
5328 | * va_mode | |
5329 | * va_uuuid | |
5330 | * va_guuid | |
5331 | */ | |
5332 | if (VATTR_IS_ACTIVE(vap, va_uid) || | |
5333 | VATTR_IS_ACTIVE(vap, va_gid) || | |
5334 | VATTR_IS_ACTIVE(vap, va_mode) || | |
5335 | VATTR_IS_ACTIVE(vap, va_uuuid) || | |
5336 | VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5337 | VATTR_WANTED(&ova, va_mode); | |
5338 | VATTR_WANTED(&ova, va_uid); | |
5339 | VATTR_WANTED(&ova, va_gid); | |
5340 | VATTR_WANTED(&ova, va_uuuid); | |
5341 | VATTR_WANTED(&ova, va_guuid); | |
5342 | KAUTH_DEBUG("ATTR - security information changing, fetching existing attributes"); | |
5343 | } | |
5344 | ||
5345 | /* | |
5346 | * If timestamps are being changed, we need to know who the file is owned | |
5347 | * by. | |
5348 | */ | |
5349 | if (VATTR_IS_ACTIVE(vap, va_create_time) || | |
5350 | VATTR_IS_ACTIVE(vap, va_change_time) || | |
5351 | VATTR_IS_ACTIVE(vap, va_modify_time) || | |
5352 | VATTR_IS_ACTIVE(vap, va_access_time) || | |
5353 | VATTR_IS_ACTIVE(vap, va_backup_time)) { | |
5354 | ||
5355 | VATTR_WANTED(&ova, va_uid); | |
5356 | #if 0 /* enable this when we support UUIDs as official owners */ | |
5357 | VATTR_WANTED(&ova, va_uuuid); | |
5358 | #endif | |
5359 | KAUTH_DEBUG("ATTR - timestamps changing, fetching uid and GUID"); | |
5360 | } | |
5361 | ||
5362 | /* | |
5363 | * If flags are being changed, we need the old flags. | |
5364 | */ | |
5365 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5366 | KAUTH_DEBUG("ATTR - flags changing, fetching old flags"); | |
5367 | VATTR_WANTED(&ova, va_flags); | |
5368 | } | |
5369 | ||
5370 | /* | |
5371 | * If the size is being set, make sure it's not a directory. | |
5372 | */ | |
5373 | if (VATTR_IS_ACTIVE(vap, va_data_size)) { | |
5374 | /* size is meaningless on a directory, don't permit this */ | |
5375 | if (vnode_isdir(vp)) { | |
5376 | KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory"); | |
5377 | error = EISDIR; | |
5378 | goto out; | |
5379 | } | |
5380 | } | |
5381 | ||
5382 | /* | |
5383 | * Get old data. | |
5384 | */ | |
5385 | KAUTH_DEBUG("ATTR - fetching old attributes %016llx", ova.va_active); | |
5386 | if ((error = vnode_getattr(vp, &ova, ctx)) != 0) { | |
5387 | KAUTH_DEBUG(" ERROR - got %d trying to get attributes", error); | |
5388 | goto out; | |
5389 | } | |
5390 | ||
5391 | /* | |
5392 | * Size changes require write access to the file data. | |
5393 | */ | |
5394 | if (VATTR_IS_ACTIVE(vap, va_data_size)) { | |
5395 | /* if we can't get the size, or it's different, we need write access */ | |
5396 | KAUTH_DEBUG("ATTR - size change, requiring WRITE_DATA"); | |
5397 | required_action |= KAUTH_VNODE_WRITE_DATA; | |
5398 | } | |
5399 | ||
5400 | /* | |
5401 | * Changing timestamps? | |
5402 | * | |
5403 | * Note that we are only called to authorize user-requested time changes; | |
5404 | * side-effect time changes are not authorized. Authorisation is only | |
5405 | * required for existing files. | |
5406 | * | |
5407 | * Non-owners are not permitted to change the time on an existing | |
5408 | * file to anything other than the current time. | |
5409 | */ | |
5410 | if (VATTR_IS_ACTIVE(vap, va_create_time) || | |
5411 | VATTR_IS_ACTIVE(vap, va_change_time) || | |
5412 | VATTR_IS_ACTIVE(vap, va_modify_time) || | |
5413 | VATTR_IS_ACTIVE(vap, va_access_time) || | |
5414 | VATTR_IS_ACTIVE(vap, va_backup_time)) { | |
5415 | /* | |
5416 | * The owner and root may set any timestamps they like, | |
5417 | * provided that the file is not immutable. The owner still needs | |
5418 | * WRITE_ATTRIBUTES (implied by ownership but still deniable). | |
5419 | */ | |
5420 | if (is_suser || vauth_node_owner(&ova, cred)) { | |
5421 | KAUTH_DEBUG("ATTR - root or owner changing timestamps"); | |
5422 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE | KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5423 | } else { | |
5424 | /* just setting the current time? */ | |
5425 | if (vap->va_vaflags & VA_UTIMES_NULL) { | |
5426 | KAUTH_DEBUG("ATTR - non-root/owner changing timestamps, requiring WRITE_ATTRIBUTES"); | |
5427 | required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5428 | } else { | |
5429 | KAUTH_DEBUG("ATTR - ERROR: illegal timestamp modification attempted"); | |
5430 | error = EACCES; | |
5431 | goto out; | |
5432 | } | |
5433 | } | |
5434 | } | |
5435 | ||
5436 | /* | |
5437 | * Changing file mode? | |
5438 | */ | |
5439 | if (VATTR_IS_ACTIVE(vap, va_mode) && VATTR_IS_SUPPORTED(&ova, va_mode) && (ova.va_mode != vap->va_mode)) { | |
5440 | KAUTH_DEBUG("ATTR - mode change from %06o to %06o", ova.va_mode, vap->va_mode); | |
5441 | ||
5442 | /* | |
5443 | * Mode changes always have the same basic auth requirements. | |
5444 | */ | |
5445 | if (is_suser) { | |
5446 | KAUTH_DEBUG("ATTR - superuser mode change, requiring immutability check"); | |
5447 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; | |
5448 | } else { | |
5449 | /* need WRITE_SECURITY */ | |
5450 | KAUTH_DEBUG("ATTR - non-superuser mode change, requiring WRITE_SECURITY"); | |
5451 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5452 | } | |
5453 | ||
5454 | /* | |
5455 | * Can't set the setgid bit if you're not in the group and not root. Have to have | |
5456 | * existing group information in the case we're not setting it right now. | |
5457 | */ | |
5458 | if (vap->va_mode & S_ISGID) { | |
5459 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ | |
5460 | if (!is_suser) { | |
5461 | if (VATTR_IS_ACTIVE(vap, va_gid)) { | |
5462 | group = vap->va_gid; | |
5463 | } else if (VATTR_IS_SUPPORTED(&ova, va_gid)) { | |
5464 | group = ova.va_gid; | |
5465 | } else { | |
5466 | KAUTH_DEBUG("ATTR - ERROR: setgid but no gid available"); | |
5467 | error = EINVAL; | |
5468 | goto out; | |
5469 | } | |
5470 | /* | |
5471 | * This might be too restrictive; WRITE_SECURITY might be implied by | |
5472 | * membership in this case, rather than being an additional requirement. | |
5473 | */ | |
5474 | if ((error = kauth_cred_ismember_gid(cred, group, &ismember)) != 0) { | |
5475 | KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); | |
5476 | goto out; | |
5477 | } | |
5478 | if (!ismember) { | |
5479 | KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", group); | |
5480 | error = EPERM; | |
5481 | goto out; | |
5482 | } | |
5483 | } | |
5484 | } | |
5485 | ||
5486 | /* | |
5487 | * Can't set the setuid bit unless you're root or the file's owner. | |
5488 | */ | |
5489 | if (vap->va_mode & S_ISUID) { | |
5490 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ | |
5491 | if (!is_suser) { | |
5492 | if (VATTR_IS_ACTIVE(vap, va_uid)) { | |
5493 | owner = vap->va_uid; | |
5494 | } else if (VATTR_IS_SUPPORTED(&ova, va_uid)) { | |
5495 | owner = ova.va_uid; | |
5496 | } else { | |
5497 | KAUTH_DEBUG("ATTR - ERROR: setuid but no uid available"); | |
5498 | error = EINVAL; | |
5499 | goto out; | |
5500 | } | |
5501 | if (owner != kauth_cred_getuid(cred)) { | |
5502 | /* | |
5503 | * We could allow this if WRITE_SECURITY is permitted, perhaps. | |
5504 | */ | |
5505 | KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); | |
5506 | error = EPERM; | |
5507 | goto out; | |
5508 | } | |
5509 | } | |
5510 | } | |
5511 | } | |
5512 | ||
5513 | /* | |
5514 | * Validate/mask flags changes. This checks that only the flags in | |
5515 | * the UF_SETTABLE mask are being set, and preserves the flags in | |
5516 | * the SF_SETTABLE case. | |
5517 | * | |
5518 | * Since flags changes may be made in conjunction with other changes, | |
5519 | * we will ask the auth code to ignore immutability in the case that | |
5520 | * the SF_* flags are not set and we are only manipulating the file flags. | |
5521 | * | |
5522 | */ | |
5523 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5524 | /* compute changing flags bits */ | |
5525 | if (VATTR_IS_SUPPORTED(&ova, va_flags)) { | |
5526 | fdelta = vap->va_flags ^ ova.va_flags; | |
5527 | } else { | |
5528 | fdelta = vap->va_flags; | |
5529 | } | |
5530 | ||
5531 | if (fdelta != 0) { | |
5532 | KAUTH_DEBUG("ATTR - flags changing, requiring WRITE_SECURITY"); | |
5533 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5534 | ||
5535 | /* check that changing bits are legal */ | |
5536 | if (is_suser) { | |
5537 | /* | |
5538 | * The immutability check will prevent us from clearing the SF_* | |
5539 | * flags unless the system securelevel permits it, so just check | |
5540 | * for legal flags here. | |
5541 | */ | |
5542 | if (fdelta & ~(UF_SETTABLE | SF_SETTABLE)) { | |
5543 | error = EPERM; | |
5544 | KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); | |
5545 | goto out; | |
5546 | } | |
5547 | } else { | |
5548 | if (fdelta & ~UF_SETTABLE) { | |
5549 | error = EPERM; | |
5550 | KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); | |
5551 | goto out; | |
5552 | } | |
5553 | } | |
5554 | /* | |
5555 | * If the caller has the ability to manipulate file flags, | |
5556 | * security is not reduced by ignoring them for this operation. | |
5557 | * | |
5558 | * A more complete test here would consider the 'after' states of the flags | |
5559 | * to determine whether it would permit the operation, but this becomes | |
5560 | * very complex. | |
5561 | * | |
5562 | * Ignoring immutability is conditional on securelevel; this does not bypass | |
5563 | * the SF_* flags if securelevel > 0. | |
5564 | */ | |
5565 | required_action |= KAUTH_VNODE_NOIMMUTABLE; | |
5566 | } | |
5567 | } | |
5568 | ||
5569 | /* | |
5570 | * Validate ownership information. | |
5571 | */ | |
5572 | chowner = 0; | |
5573 | chgroup = 0; | |
5574 | ||
5575 | /* | |
5576 | * uid changing | |
5577 | * Note that if the filesystem didn't give us a UID, we expect that it doesn't | |
5578 | * support them in general, and will ignore it if/when we try to set it. | |
5579 | * We might want to clear the uid out of vap completely here. | |
5580 | */ | |
5581 | if (VATTR_IS_ACTIVE(vap, va_uid) && VATTR_IS_SUPPORTED(&ova, va_uid) && (vap->va_uid != ova.va_uid)) { | |
5582 | if (!is_suser && (kauth_cred_getuid(cred) != vap->va_uid)) { | |
5583 | KAUTH_DEBUG(" DENIED - non-superuser cannot change ownershipt to a third party"); | |
5584 | error = EPERM; | |
5585 | goto out; | |
5586 | } | |
5587 | chowner = 1; | |
5588 | } | |
5589 | ||
5590 | /* | |
5591 | * gid changing | |
5592 | * Note that if the filesystem didn't give us a GID, we expect that it doesn't | |
5593 | * support them in general, and will ignore it if/when we try to set it. | |
5594 | * We might want to clear the gid out of vap completely here. | |
5595 | */ | |
5596 | if (VATTR_IS_ACTIVE(vap, va_gid) && VATTR_IS_SUPPORTED(&ova, va_gid) && (vap->va_gid != ova.va_gid)) { | |
5597 | if (!is_suser) { | |
5598 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5599 | KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); | |
5600 | goto out; | |
5601 | } | |
5602 | if (!ismember) { | |
5603 | KAUTH_DEBUG(" DENIED - group change from %d to %d but not a member of target group", | |
5604 | ova.va_gid, vap->va_gid); | |
5605 | error = EPERM; | |
5606 | goto out; | |
5607 | } | |
5608 | } | |
5609 | chgroup = 1; | |
5610 | } | |
5611 | ||
5612 | /* | |
5613 | * Owner UUID being set or changed. | |
5614 | */ | |
5615 | if (VATTR_IS_ACTIVE(vap, va_uuuid)) { | |
5616 | /* if the owner UUID is not actually changing ... */ | |
5617 | if (VATTR_IS_SUPPORTED(&ova, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid)) | |
5618 | goto no_uuuid_change; | |
5619 | ||
5620 | /* | |
5621 | * The owner UUID cannot be set by a non-superuser to anything other than | |
5622 | * their own. | |
5623 | */ | |
5624 | if (!is_suser) { | |
5625 | if ((error = kauth_cred_getguid(cred, &changer)) != 0) { | |
5626 | KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); | |
5627 | /* XXX ENOENT here - no UUID - should perhaps become EPERM */ | |
5628 | goto out; | |
5629 | } | |
5630 | if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { | |
5631 | KAUTH_DEBUG(" ERROR - cannot set supplied owner UUID - not us"); | |
5632 | error = EPERM; | |
5633 | goto out; | |
5634 | } | |
5635 | } | |
5636 | chowner = 1; | |
5637 | } | |
5638 | no_uuuid_change: | |
5639 | /* | |
5640 | * Group UUID being set or changed. | |
5641 | */ | |
5642 | if (VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5643 | /* if the group UUID is not actually changing ... */ | |
5644 | if (VATTR_IS_SUPPORTED(&ova, va_guuid) && kauth_guid_equal(&vap->va_guuid, &ova.va_guuid)) | |
5645 | goto no_guuid_change; | |
5646 | ||
5647 | /* | |
5648 | * The group UUID cannot be set by a non-superuser to anything other than | |
5649 | * one of which they are a member. | |
5650 | */ | |
5651 | if (!is_suser) { | |
5652 | if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { | |
5653 | KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); | |
5654 | goto out; | |
5655 | } | |
5656 | if (!ismember) { | |
5657 | KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); | |
5658 | error = EPERM; | |
5659 | goto out; | |
5660 | } | |
5661 | } | |
5662 | chgroup = 1; | |
5663 | } | |
5664 | no_guuid_change: | |
5665 | ||
5666 | /* | |
5667 | * Compute authorisation for group/ownership changes. | |
5668 | */ | |
5669 | if (chowner || chgroup) { | |
5670 | if (is_suser) { | |
5671 | KAUTH_DEBUG("ATTR - superuser changing file owner/group, requiring immutability check"); | |
5672 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; | |
5673 | } else { | |
5674 | if (chowner) { | |
5675 | KAUTH_DEBUG("ATTR - ownership change, requiring TAKE_OWNERSHIP"); | |
5676 | required_action |= KAUTH_VNODE_TAKE_OWNERSHIP; | |
5677 | } | |
5678 | if (chgroup && !chowner) { | |
5679 | KAUTH_DEBUG("ATTR - group change, requiring WRITE_SECURITY"); | |
5680 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5681 | } | |
5682 | ||
5683 | /* clear set-uid and set-gid bits as required by Posix */ | |
5684 | if (VATTR_IS_ACTIVE(vap, va_mode)) { | |
5685 | newmode = vap->va_mode; | |
5686 | } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) { | |
5687 | newmode = ova.va_mode; | |
5688 | } else { | |
5689 | KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits"); | |
5690 | newmode = 0; | |
5691 | } | |
5692 | if (newmode & (S_ISUID | S_ISGID)) { | |
5693 | VATTR_SET(vap, va_mode, newmode & ~(S_ISUID | S_ISGID)); | |
5694 | KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode, vap->va_mode); | |
5695 | } | |
5696 | } | |
5697 | } | |
5698 | ||
5699 | /* | |
5700 | * Authorise changes in the ACL. | |
5701 | */ | |
5702 | if (VATTR_IS_ACTIVE(vap, va_acl)) { | |
5703 | ||
5704 | /* no existing ACL */ | |
5705 | if (!VATTR_IS_ACTIVE(&ova, va_acl) || (ova.va_acl == NULL)) { | |
5706 | ||
5707 | /* adding an ACL */ | |
5708 | if (vap->va_acl != NULL) { | |
5709 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5710 | KAUTH_DEBUG("CHMOD - adding ACL"); | |
5711 | } | |
5712 | ||
5713 | /* removing an existing ACL */ | |
5714 | } else if (vap->va_acl == NULL) { | |
5715 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5716 | KAUTH_DEBUG("CHMOD - removing ACL"); | |
5717 | ||
5718 | /* updating an existing ACL */ | |
5719 | } else { | |
5720 | if (vap->va_acl->acl_entrycount != ova.va_acl->acl_entrycount) { | |
5721 | /* entry count changed, must be different */ | |
5722 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5723 | KAUTH_DEBUG("CHMOD - adding/removing ACL entries"); | |
5724 | } else if (vap->va_acl->acl_entrycount > 0) { | |
5725 | /* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */ | |
5726 | if (!memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0], | |
5727 | sizeof(struct kauth_ace) * vap->va_acl->acl_entrycount)) { | |
5728 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5729 | KAUTH_DEBUG("CHMOD - changing ACL entries"); | |
5730 | } | |
5731 | } | |
5732 | } | |
5733 | } | |
5734 | ||
5735 | /* | |
5736 | * Other attributes that require authorisation. | |
5737 | */ | |
5738 | if (VATTR_IS_ACTIVE(vap, va_encoding)) | |
5739 | required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5740 | ||
5741 | out: | |
5742 | if (VATTR_IS_SUPPORTED(&ova, va_acl) && (ova.va_acl != NULL)) | |
5743 | kauth_acl_free(ova.va_acl); | |
5744 | if (error == 0) | |
5745 | *actionp = required_action; | |
5746 | return(error); | |
5747 | } | |
5748 | ||
5749 | ||
5750 | void | |
5751 | vfs_setlocklocal(mount_t mp) | |
5752 | { | |
5753 | vnode_t vp; | |
5754 | ||
5755 | mount_lock(mp); | |
5756 | mp->mnt_kern_flag |= MNTK_LOCK_LOCAL; | |
5757 | ||
5758 | /* | |
5759 | * We do not expect anyone to be using any vnodes at the | |
5760 | * time this routine is called. So no need for vnode locking | |
5761 | */ | |
5762 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { | |
5763 | vp->v_flag |= VLOCKLOCAL; | |
5764 | } | |
5765 | TAILQ_FOREACH(vp, &mp->mnt_workerqueue, v_mntvnodes) { | |
5766 | vp->v_flag |= VLOCKLOCAL; | |
5767 | } | |
5768 | TAILQ_FOREACH(vp, &mp->mnt_newvnodes, v_mntvnodes) { | |
5769 | vp->v_flag |= VLOCKLOCAL; | |
5770 | } | |
5771 | mount_unlock(mp); | |
5772 | } | |
5773 | ||
5774 | ||
5775 | #ifdef JOE_DEBUG | |
5776 | ||
5777 | record_vp(vnode_t vp, int count) { | |
5778 | struct uthread *ut; | |
5779 | int i; | |
5780 | ||
5781 | if ((vp->v_flag & VSYSTEM)) | |
5782 | return; | |
5783 | ||
5784 | ut = get_bsdthread_info(current_thread()); | |
5785 | ut->uu_iocount += count; | |
5786 | ||
5787 | if (ut->uu_vpindex < 32) { | |
5788 | for (i = 0; i < ut->uu_vpindex; i++) { | |
5789 | if (ut->uu_vps[i] == vp) | |
5790 | return; | |
5791 | } | |
5792 | ut->uu_vps[ut->uu_vpindex] = vp; | |
5793 | ut->uu_vpindex++; | |
5794 | } | |
5795 | } | |
5796 | #endif |