]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
5d5c5d0d A |
2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
3 | * | |
8f6c56a5 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
8f6c56a5 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
8ad349bb | 24 | * limitations under the License. |
8f6c56a5 A |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
1c79356b A |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * (c) UNIX System Laboratories, Inc. | |
33 | * All or some portions of this file are derived from material licensed | |
34 | * to the University of California by American Telephone and Telegraph | |
35 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
36 | * the permission of UNIX System Laboratories, Inc. | |
37 | * | |
38 | * Redistribution and use in source and binary forms, with or without | |
39 | * modification, are permitted provided that the following conditions | |
40 | * are met: | |
41 | * 1. Redistributions of source code must retain the above copyright | |
42 | * notice, this list of conditions and the following disclaimer. | |
43 | * 2. Redistributions in binary form must reproduce the above copyright | |
44 | * notice, this list of conditions and the following disclaimer in the | |
45 | * documentation and/or other materials provided with the distribution. | |
46 | * 3. All advertising materials mentioning features or use of this software | |
47 | * must display the following acknowledgement: | |
48 | * This product includes software developed by the University of | |
49 | * California, Berkeley and its contributors. | |
50 | * 4. Neither the name of the University nor the names of its contributors | |
51 | * may be used to endorse or promote products derived from this software | |
52 | * without specific prior written permission. | |
53 | * | |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
64 | * SUCH DAMAGE. | |
65 | * | |
66 | * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 | |
67 | */ | |
68 | ||
69 | /* | |
70 | * External virtual filesystem routines | |
71 | */ | |
72 | ||
9bccf70c | 73 | #undef DIAGNOSTIC |
1c79356b A |
74 | #define DIAGNOSTIC 1 |
75 | ||
76 | #include <sys/param.h> | |
77 | #include <sys/systm.h> | |
91447636 A |
78 | #include <sys/proc_internal.h> |
79 | #include <sys/kauth.h> | |
80 | #include <sys/mount_internal.h> | |
1c79356b | 81 | #include <sys/time.h> |
91447636 A |
82 | #include <sys/lock.h> |
83 | #include <sys/vnode_internal.h> | |
1c79356b A |
84 | #include <sys/stat.h> |
85 | #include <sys/namei.h> | |
86 | #include <sys/ucred.h> | |
91447636 | 87 | #include <sys/buf_internal.h> |
1c79356b A |
88 | #include <sys/errno.h> |
89 | #include <sys/malloc.h> | |
90 | #include <sys/domain.h> | |
91 | #include <sys/mbuf.h> | |
92 | #include <sys/syslog.h> | |
91447636 | 93 | #include <sys/ubc_internal.h> |
1c79356b A |
94 | #include <sys/vm.h> |
95 | #include <sys/sysctl.h> | |
55e303ae A |
96 | #include <sys/filedesc.h> |
97 | #include <sys/event.h> | |
91447636 A |
98 | #include <sys/kdebug.h> |
99 | #include <sys/kauth.h> | |
100 | #include <sys/user.h> | |
101 | #include <miscfs/fifofs/fifo.h> | |
55e303ae A |
102 | |
103 | #include <string.h> | |
104 | #include <machine/spl.h> | |
105 | ||
1c79356b A |
106 | |
107 | #include <kern/assert.h> | |
108 | ||
109 | #include <miscfs/specfs/specdev.h> | |
110 | ||
0b4e3aa0 A |
111 | #include <mach/mach_types.h> |
112 | #include <mach/memory_object_types.h> | |
113 | ||
91447636 A |
114 | extern lck_grp_t *vnode_lck_grp; |
115 | extern lck_attr_t *vnode_lck_attr; | |
116 | ||
117 | ||
118 | extern lck_mtx_t * mnt_list_mtx_lock; | |
0b4e3aa0 | 119 | |
1c79356b A |
120 | enum vtype iftovt_tab[16] = { |
121 | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | |
122 | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, | |
123 | }; | |
124 | int vttoif_tab[9] = { | |
125 | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, | |
126 | S_IFSOCK, S_IFIFO, S_IFMT, | |
127 | }; | |
128 | ||
91447636 A |
129 | extern int ubc_isinuse_locked(vnode_t, int, int); |
130 | extern kern_return_t adjust_vm_object_cache(vm_size_t oval, vm_size_t nval); | |
131 | ||
132 | static void vnode_list_add(vnode_t); | |
133 | static void vnode_list_remove(vnode_t); | |
134 | ||
135 | static errno_t vnode_drain(vnode_t); | |
136 | static void vgone(vnode_t); | |
137 | static void vclean(vnode_t vp, int flag, proc_t p); | |
138 | static void vnode_reclaim_internal(vnode_t, int, int); | |
139 | ||
140 | static void vnode_dropiocount (vnode_t, int); | |
141 | static errno_t vnode_getiocount(vnode_t vp, int locked, int vid, int vflags); | |
142 | static int vget_internal(vnode_t, int, int); | |
143 | ||
144 | static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev); | |
145 | static int vnode_reload(vnode_t); | |
146 | static int vnode_isinuse_locked(vnode_t, int, int); | |
147 | ||
148 | static void insmntque(vnode_t vp, mount_t mp); | |
149 | mount_t mount_list_lookupby_fsid(fsid_t *, int, int); | |
150 | static int mount_getvfscnt(void); | |
151 | static int mount_fillfsids(fsid_t *, int ); | |
152 | static void vnode_iterate_setup(mount_t); | |
153 | static int vnode_umount_preflight(mount_t, vnode_t, int); | |
154 | static int vnode_iterate_prepare(mount_t); | |
155 | static int vnode_iterate_reloadq(mount_t); | |
156 | static void vnode_iterate_clear(mount_t); | |
1c79356b | 157 | |
1c79356b A |
158 | TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ |
159 | TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */ | |
160 | struct mntlist mountlist; /* mounted filesystem list */ | |
91447636 | 161 | static int nummounts = 0; |
1c79356b A |
162 | |
163 | #if DIAGNOSTIC | |
164 | #define VLISTCHECK(fun, vp, list) \ | |
165 | if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \ | |
166 | panic("%s: %s vnode not on %slist", (fun), (list), (list)); | |
167 | ||
168 | #define VINACTIVECHECK(fun, vp, expected) \ | |
169 | do { \ | |
170 | int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \ | |
171 | if (__is_inactive ^ expected) \ | |
172 | panic("%s: %sinactive vnode, expected %s", (fun), \ | |
173 | __is_inactive? "" : "not ", \ | |
174 | expected? "inactive": "not inactive"); \ | |
175 | } while(0) | |
176 | #else | |
177 | #define VLISTCHECK(fun, vp, list) | |
178 | #define VINACTIVECHECK(fun, vp, expected) | |
179 | #endif /* DIAGNOSTIC */ | |
180 | ||
181 | #define VLISTNONE(vp) \ | |
182 | do { \ | |
183 | (vp)->v_freelist.tqe_next = (struct vnode *)0; \ | |
184 | (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \ | |
185 | } while(0) | |
186 | ||
187 | #define VONLIST(vp) \ | |
188 | ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb) | |
189 | ||
190 | /* remove a vnode from free vnode list */ | |
191 | #define VREMFREE(fun, vp) \ | |
192 | do { \ | |
193 | VLISTCHECK((fun), (vp), "free"); \ | |
194 | TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \ | |
195 | VLISTNONE((vp)); \ | |
196 | freevnodes--; \ | |
197 | } while(0) | |
198 | ||
199 | /* remove a vnode from inactive vnode list */ | |
200 | #define VREMINACTIVE(fun, vp) \ | |
201 | do { \ | |
202 | VLISTCHECK((fun), (vp), "inactive"); \ | |
203 | VINACTIVECHECK((fun), (vp), VUINACTIVE); \ | |
204 | TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \ | |
205 | CLR((vp)->v_flag, VUINACTIVE); \ | |
206 | VLISTNONE((vp)); \ | |
207 | inactivevnodes--; \ | |
208 | } while(0) | |
209 | ||
1c79356b A |
210 | /* |
211 | * Have to declare first two locks as actual data even if !MACH_SLOCKS, since | |
212 | * a pointers to them get passed around. | |
213 | */ | |
91447636 A |
214 | void * mntvnode_slock; |
215 | void * mntid_slock; | |
216 | void * spechash_slock; | |
1c79356b A |
217 | |
218 | /* | |
219 | * vnodetarget is the amount of vnodes we expect to get back | |
220 | * from the the inactive vnode list and VM object cache. | |
221 | * As vnreclaim() is a mainly cpu bound operation for faster | |
222 | * processers this number could be higher. | |
223 | * Having this number too high introduces longer delays in | |
91447636 | 224 | * the execution of new_vnode(). |
1c79356b A |
225 | */ |
226 | unsigned long vnodetarget; /* target for vnreclaim() */ | |
227 | #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */ | |
228 | ||
229 | /* | |
230 | * We need quite a few vnodes on the free list to sustain the | |
231 | * rapid stat() the compilation process does, and still benefit from the name | |
232 | * cache. Having too few vnodes on the free list causes serious disk | |
233 | * thrashing as we cycle through them. | |
234 | */ | |
0b4e3aa0 | 235 | #define VNODE_FREE_MIN 300 /* freelist should have at least these many */ |
1c79356b A |
236 | |
237 | /* | |
238 | * We need to get vnodes back from the VM object cache when a certain # | |
239 | * of vnodes are reused from the freelist. This is essential for the | |
240 | * caching to be effective in the namecache and the buffer cache [for the | |
241 | * metadata]. | |
242 | */ | |
243 | #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4) | |
244 | ||
245 | /* | |
246 | * If we have enough vnodes on the freelist we do not want to reclaim | |
247 | * the vnodes from the VM object cache. | |
248 | */ | |
249 | #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2)) | |
250 | ||
251 | /* | |
252 | * Initialize the vnode management data structures. | |
253 | */ | |
0b4e3aa0 | 254 | __private_extern__ void |
91447636 | 255 | vntblinit(void) |
1c79356b | 256 | { |
1c79356b | 257 | TAILQ_INIT(&vnode_free_list); |
1c79356b | 258 | TAILQ_INIT(&vnode_inactive_list); |
91447636 | 259 | TAILQ_INIT(&mountlist); |
1c79356b A |
260 | |
261 | if (!vnodetarget) | |
262 | vnodetarget = VNODE_FREE_TARGET; | |
263 | ||
264 | /* | |
265 | * Scale the vm_object_cache to accomodate the vnodes | |
266 | * we want to cache | |
267 | */ | |
268 | (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN); | |
269 | } | |
270 | ||
271 | /* Reset the VM Object Cache with the values passed in */ | |
0b4e3aa0 | 272 | __private_extern__ kern_return_t |
1c79356b A |
273 | reset_vmobjectcache(unsigned int val1, unsigned int val2) |
274 | { | |
275 | vm_size_t oval = val1 - VNODE_FREE_MIN; | |
9bccf70c A |
276 | vm_size_t nval; |
277 | ||
278 | if(val2 < VNODE_FREE_MIN) | |
279 | nval = 0; | |
280 | else | |
281 | nval = val2 - VNODE_FREE_MIN; | |
1c79356b A |
282 | |
283 | return(adjust_vm_object_cache(oval, nval)); | |
284 | } | |
285 | ||
91447636 A |
286 | |
287 | /* the timeout is in 10 msecs */ | |
1c79356b | 288 | int |
91447636 A |
289 | vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, char *msg) { |
290 | int error = 0; | |
291 | struct timespec ts; | |
1c79356b | 292 | |
91447636 A |
293 | KERNEL_DEBUG(0x3010280 | DBG_FUNC_START, (int)vp, output_target, vp->v_numoutput, 0, 0); |
294 | ||
295 | if (vp->v_numoutput > output_target) { | |
296 | ||
297 | slpflag &= ~PDROP; | |
298 | ||
299 | vnode_lock(vp); | |
300 | ||
301 | while ((vp->v_numoutput > output_target) && error == 0) { | |
302 | if (output_target) | |
303 | vp->v_flag |= VTHROTTLED; | |
304 | else | |
305 | vp->v_flag |= VBWAIT; | |
306 | ts.tv_sec = (slptimeout/100); | |
307 | ts.tv_nsec = (slptimeout % 1000) * 10 * NSEC_PER_USEC * 1000 ; | |
308 | error = msleep((caddr_t)&vp->v_numoutput, &vp->v_lock, (slpflag | (PRIBIO + 1)), msg, &ts); | |
309 | } | |
310 | vnode_unlock(vp); | |
1c79356b | 311 | } |
91447636 A |
312 | KERNEL_DEBUG(0x3010280 | DBG_FUNC_END, (int)vp, output_target, vp->v_numoutput, error, 0); |
313 | ||
314 | return error; | |
1c79356b A |
315 | } |
316 | ||
91447636 | 317 | |
1c79356b | 318 | void |
91447636 A |
319 | vnode_startwrite(vnode_t vp) { |
320 | ||
321 | OSAddAtomic(1, &vp->v_numoutput); | |
322 | } | |
323 | ||
324 | ||
325 | void | |
326 | vnode_writedone(vnode_t vp) | |
1c79356b | 327 | { |
91447636 A |
328 | if (vp) { |
329 | int need_wakeup = 0; | |
330 | ||
331 | OSAddAtomic(-1, &vp->v_numoutput); | |
332 | ||
333 | vnode_lock(vp); | |
1c79356b | 334 | |
91447636 A |
335 | if (vp->v_numoutput < 0) |
336 | panic("vnode_writedone: numoutput < 0"); | |
337 | ||
338 | if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput < (VNODE_ASYNC_THROTTLE / 3))) { | |
339 | vp->v_flag &= ~VTHROTTLED; | |
340 | need_wakeup = 1; | |
341 | } | |
342 | if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) { | |
343 | vp->v_flag &= ~VBWAIT; | |
344 | need_wakeup = 1; | |
345 | } | |
346 | vnode_unlock(vp); | |
347 | ||
348 | if (need_wakeup) | |
349 | wakeup((caddr_t)&vp->v_numoutput); | |
350 | } | |
1c79356b A |
351 | } |
352 | ||
91447636 A |
353 | |
354 | ||
1c79356b | 355 | int |
91447636 | 356 | vnode_hasdirtyblks(vnode_t vp) |
1c79356b | 357 | { |
91447636 | 358 | struct cl_writebehind *wbp; |
1c79356b | 359 | |
91447636 A |
360 | /* |
361 | * Not taking the buf_mtxp as there is little | |
362 | * point doing it. Even if the lock is taken the | |
363 | * state can change right after that. If their | |
364 | * needs to be a synchronization, it must be driven | |
365 | * by the caller | |
366 | */ | |
367 | if (vp->v_dirtyblkhd.lh_first) | |
368 | return (1); | |
369 | ||
370 | if (!UBCINFOEXISTS(vp)) | |
371 | return (0); | |
0b4e3aa0 | 372 | |
91447636 A |
373 | wbp = vp->v_ubcinfo->cl_wbehind; |
374 | ||
375 | if (wbp && (wbp->cl_number || wbp->cl_scmap)) | |
376 | return (1); | |
0b4e3aa0 | 377 | |
1c79356b A |
378 | return (0); |
379 | } | |
380 | ||
1c79356b | 381 | int |
91447636 | 382 | vnode_hascleanblks(vnode_t vp) |
1c79356b | 383 | { |
91447636 A |
384 | /* |
385 | * Not taking the buf_mtxp as there is little | |
386 | * point doing it. Even if the lock is taken the | |
387 | * state can change right after that. If their | |
388 | * needs to be a synchronization, it must be driven | |
389 | * by the caller | |
390 | */ | |
391 | if (vp->v_cleanblkhd.lh_first) | |
392 | return (1); | |
393 | return (0); | |
394 | } | |
1c79356b | 395 | |
91447636 A |
396 | void |
397 | vnode_iterate_setup(mount_t mp) | |
398 | { | |
399 | while (mp->mnt_lflag & MNT_LITER) { | |
400 | mp->mnt_lflag |= MNT_LITERWAIT; | |
401 | msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", 0); | |
1c79356b | 402 | } |
91447636 A |
403 | |
404 | mp->mnt_lflag |= MNT_LITER; | |
405 | ||
1c79356b A |
406 | } |
407 | ||
91447636 A |
408 | static int |
409 | vnode_umount_preflight(mount_t mp, vnode_t skipvp, int flags) | |
1c79356b | 410 | { |
91447636 | 411 | vnode_t vp; |
1c79356b | 412 | |
91447636 A |
413 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { |
414 | if (vp->v_type == VDIR) | |
415 | continue; | |
416 | if (vp == skipvp) | |
417 | continue; | |
418 | if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || | |
419 | (vp->v_flag & VNOFLUSH))) | |
420 | continue; | |
421 | if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) | |
422 | continue; | |
423 | if ((flags & WRITECLOSE) && | |
424 | (vp->v_writecount == 0 || vp->v_type != VREG)) | |
425 | continue; | |
426 | /* Look for busy vnode */ | |
427 | if (((vp->v_usecount != 0) && | |
428 | ((vp->v_usecount - vp->v_kusecount) != 0))) | |
429 | return(1); | |
1c79356b | 430 | } |
91447636 A |
431 | |
432 | return(0); | |
1c79356b A |
433 | } |
434 | ||
91447636 A |
435 | /* |
436 | * This routine prepares iteration by moving all the vnodes to worker queue | |
437 | * called with mount lock held | |
1c79356b | 438 | */ |
91447636 A |
439 | int |
440 | vnode_iterate_prepare(mount_t mp) | |
1c79356b | 441 | { |
91447636 | 442 | vnode_t vp; |
1c79356b | 443 | |
91447636 A |
444 | if (TAILQ_EMPTY(&mp->mnt_vnodelist)) { |
445 | /* nothing to do */ | |
446 | return (0); | |
447 | } | |
1c79356b | 448 | |
91447636 A |
449 | vp = TAILQ_FIRST(&mp->mnt_vnodelist); |
450 | vp->v_mntvnodes.tqe_prev = &(mp->mnt_workerqueue.tqh_first); | |
451 | mp->mnt_workerqueue.tqh_first = mp->mnt_vnodelist.tqh_first; | |
452 | mp->mnt_workerqueue.tqh_last = mp->mnt_vnodelist.tqh_last; | |
453 | ||
454 | TAILQ_INIT(&mp->mnt_vnodelist); | |
455 | if (mp->mnt_newvnodes.tqh_first != NULL) | |
456 | panic("vnode_iterate_prepare: newvnode when entering vnode"); | |
457 | TAILQ_INIT(&mp->mnt_newvnodes); | |
458 | ||
459 | return (1); | |
1c79356b A |
460 | } |
461 | ||
91447636 A |
462 | |
463 | /* called with mount lock held */ | |
464 | int | |
465 | vnode_iterate_reloadq(mount_t mp) | |
1c79356b | 466 | { |
91447636 A |
467 | int moved = 0; |
468 | ||
469 | /* add the remaining entries in workerq to the end of mount vnode list */ | |
470 | if (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
471 | struct vnode * mvp; | |
472 | mvp = TAILQ_LAST(&mp->mnt_vnodelist, vnodelst); | |
473 | ||
474 | /* Joining the workerque entities to mount vnode list */ | |
475 | if (mvp) | |
476 | mvp->v_mntvnodes.tqe_next = mp->mnt_workerqueue.tqh_first; | |
477 | else | |
478 | mp->mnt_vnodelist.tqh_first = mp->mnt_workerqueue.tqh_first; | |
479 | mp->mnt_workerqueue.tqh_first->v_mntvnodes.tqe_prev = mp->mnt_vnodelist.tqh_last; | |
480 | mp->mnt_vnodelist.tqh_last = mp->mnt_workerqueue.tqh_last; | |
481 | TAILQ_INIT(&mp->mnt_workerqueue); | |
482 | } | |
483 | ||
484 | /* add the newvnodes to the head of mount vnode list */ | |
485 | if (!TAILQ_EMPTY(&mp->mnt_newvnodes)) { | |
486 | struct vnode * nlvp; | |
487 | nlvp = TAILQ_LAST(&mp->mnt_newvnodes, vnodelst); | |
488 | ||
489 | mp->mnt_newvnodes.tqh_first->v_mntvnodes.tqe_prev = &mp->mnt_vnodelist.tqh_first; | |
490 | nlvp->v_mntvnodes.tqe_next = mp->mnt_vnodelist.tqh_first; | |
491 | if(mp->mnt_vnodelist.tqh_first) | |
492 | mp->mnt_vnodelist.tqh_first->v_mntvnodes.tqe_prev = &nlvp->v_mntvnodes.tqe_next; | |
493 | else | |
494 | mp->mnt_vnodelist.tqh_last = mp->mnt_newvnodes.tqh_last; | |
495 | mp->mnt_vnodelist.tqh_first = mp->mnt_newvnodes.tqh_first; | |
496 | TAILQ_INIT(&mp->mnt_newvnodes); | |
497 | moved = 1; | |
498 | } | |
1c79356b | 499 | |
91447636 | 500 | return(moved); |
1c79356b A |
501 | } |
502 | ||
1c79356b | 503 | |
91447636 A |
504 | void |
505 | vnode_iterate_clear(mount_t mp) | |
506 | { | |
507 | mp->mnt_lflag &= ~MNT_LITER; | |
508 | if (mp->mnt_lflag & MNT_LITERWAIT) { | |
509 | mp->mnt_lflag &= ~MNT_LITERWAIT; | |
510 | wakeup(mp); | |
511 | } | |
512 | } | |
1c79356b | 513 | |
1c79356b | 514 | |
1c79356b | 515 | int |
91447636 A |
516 | vnode_iterate(mp, flags, callout, arg) |
517 | mount_t mp; | |
518 | int flags; | |
519 | int (*callout)(struct vnode *, void *); | |
520 | void * arg; | |
1c79356b | 521 | { |
1c79356b | 522 | struct vnode *vp; |
91447636 A |
523 | int vid, retval; |
524 | int ret = 0; | |
1c79356b | 525 | |
91447636 | 526 | mount_lock(mp); |
1c79356b | 527 | |
91447636 | 528 | vnode_iterate_setup(mp); |
1c79356b | 529 | |
91447636 A |
530 | /* it is returns 0 then there is nothing to do */ |
531 | retval = vnode_iterate_prepare(mp); | |
1c79356b | 532 | |
91447636 A |
533 | if (retval == 0) { |
534 | vnode_iterate_clear(mp); | |
535 | mount_unlock(mp); | |
536 | return(ret); | |
1c79356b | 537 | } |
91447636 A |
538 | |
539 | /* iterate over all the vnodes */ | |
540 | while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
541 | vp = TAILQ_FIRST(&mp->mnt_workerqueue); | |
542 | TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); | |
543 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
544 | vid = vp->v_id; | |
545 | if ((vp->v_data == NULL) || (vp->v_type == VNON) || (vp->v_mount != mp)) { | |
546 | continue; | |
547 | } | |
548 | mount_unlock(mp); | |
1c79356b | 549 | |
91447636 A |
550 | if ( vget_internal(vp, vid, (flags | VNODE_NODEAD| VNODE_WITHID | VNODE_NOSUSPEND))) { |
551 | mount_lock(mp); | |
552 | continue; | |
553 | } | |
554 | if (flags & VNODE_RELOAD) { | |
555 | /* | |
556 | * we're reloading the filesystem | |
557 | * cast out any inactive vnodes... | |
558 | */ | |
559 | if (vnode_reload(vp)) { | |
560 | /* vnode will be recycled on the refcount drop */ | |
561 | vnode_put(vp); | |
562 | mount_lock(mp); | |
563 | continue; | |
564 | } | |
565 | } | |
55e303ae | 566 | |
91447636 A |
567 | retval = callout(vp, arg); |
568 | ||
569 | switch (retval) { | |
570 | case VNODE_RETURNED: | |
571 | case VNODE_RETURNED_DONE: | |
572 | vnode_put(vp); | |
573 | if (retval == VNODE_RETURNED_DONE) { | |
574 | mount_lock(mp); | |
575 | ret = 0; | |
576 | goto out; | |
577 | } | |
578 | break; | |
579 | ||
580 | case VNODE_CLAIMED_DONE: | |
581 | mount_lock(mp); | |
582 | ret = 0; | |
583 | goto out; | |
584 | case VNODE_CLAIMED: | |
585 | default: | |
586 | break; | |
587 | } | |
588 | mount_lock(mp); | |
55e303ae | 589 | } |
1c79356b | 590 | |
91447636 A |
591 | out: |
592 | (void)vnode_iterate_reloadq(mp); | |
593 | vnode_iterate_clear(mp); | |
594 | mount_unlock(mp); | |
595 | return (ret); | |
596 | } | |
55e303ae | 597 | |
91447636 A |
598 | void |
599 | mount_lock_renames(mount_t mp) | |
600 | { | |
601 | lck_mtx_lock(&mp->mnt_renamelock); | |
1c79356b A |
602 | } |
603 | ||
1c79356b | 604 | void |
91447636 | 605 | mount_unlock_renames(mount_t mp) |
1c79356b | 606 | { |
91447636 A |
607 | lck_mtx_unlock(&mp->mnt_renamelock); |
608 | } | |
1c79356b | 609 | |
91447636 A |
610 | void |
611 | mount_lock(mount_t mp) | |
612 | { | |
613 | lck_mtx_lock(&mp->mnt_mlock); | |
1c79356b A |
614 | } |
615 | ||
91447636 A |
616 | void |
617 | mount_unlock(mount_t mp) | |
fa4905b1 | 618 | { |
91447636 | 619 | lck_mtx_unlock(&mp->mnt_mlock); |
fa4905b1 A |
620 | } |
621 | ||
91447636 | 622 | |
1c79356b | 623 | void |
91447636 | 624 | mount_ref(mount_t mp, int locked) |
1c79356b | 625 | { |
91447636 A |
626 | if ( !locked) |
627 | mount_lock(mp); | |
628 | ||
629 | mp->mnt_count++; | |
630 | ||
631 | if ( !locked) | |
632 | mount_unlock(mp); | |
1c79356b A |
633 | } |
634 | ||
91447636 A |
635 | |
636 | void | |
637 | mount_drop(mount_t mp, int locked) | |
638 | { | |
639 | if ( !locked) | |
640 | mount_lock(mp); | |
641 | ||
642 | mp->mnt_count--; | |
643 | ||
644 | if (mp->mnt_count == 0 && (mp->mnt_lflag & MNT_LDRAIN)) | |
645 | wakeup(&mp->mnt_lflag); | |
646 | ||
647 | if ( !locked) | |
648 | mount_unlock(mp); | |
649 | } | |
650 | ||
651 | ||
1c79356b | 652 | int |
91447636 | 653 | mount_iterref(mount_t mp, int locked) |
1c79356b | 654 | { |
91447636 | 655 | int retval = 0; |
1c79356b | 656 | |
91447636 A |
657 | if (!locked) |
658 | mount_list_lock(); | |
659 | if (mp->mnt_iterref < 0) { | |
660 | retval = 1; | |
661 | } else { | |
662 | mp->mnt_iterref++; | |
1c79356b | 663 | } |
91447636 A |
664 | if (!locked) |
665 | mount_list_unlock(); | |
666 | return(retval); | |
667 | } | |
1c79356b | 668 | |
91447636 A |
669 | int |
670 | mount_isdrained(mount_t mp, int locked) | |
671 | { | |
672 | int retval; | |
1c79356b | 673 | |
91447636 A |
674 | if (!locked) |
675 | mount_list_lock(); | |
676 | if (mp->mnt_iterref < 0) | |
677 | retval = 1; | |
678 | else | |
679 | retval = 0; | |
680 | if (!locked) | |
681 | mount_list_unlock(); | |
682 | return(retval); | |
683 | } | |
684 | ||
685 | void | |
686 | mount_iterdrop(mount_t mp) | |
687 | { | |
688 | mount_list_lock(); | |
689 | mp->mnt_iterref--; | |
690 | wakeup(&mp->mnt_iterref); | |
691 | mount_list_unlock(); | |
692 | } | |
693 | ||
694 | void | |
695 | mount_iterdrain(mount_t mp) | |
696 | { | |
697 | mount_list_lock(); | |
698 | while (mp->mnt_iterref) | |
699 | msleep((caddr_t)&mp->mnt_iterref, mnt_list_mtx_lock, PVFS, "mount_iterdrain", 0 ); | |
700 | /* mount iterations drained */ | |
701 | mp->mnt_iterref = -1; | |
702 | mount_list_unlock(); | |
703 | } | |
704 | void | |
705 | mount_iterreset(mount_t mp) | |
706 | { | |
707 | mount_list_lock(); | |
708 | if (mp->mnt_iterref == -1) | |
709 | mp->mnt_iterref = 0; | |
710 | mount_list_unlock(); | |
711 | } | |
712 | ||
713 | /* always called with mount lock held */ | |
714 | int | |
715 | mount_refdrain(mount_t mp) | |
716 | { | |
717 | if (mp->mnt_lflag & MNT_LDRAIN) | |
718 | panic("already in drain"); | |
719 | mp->mnt_lflag |= MNT_LDRAIN; | |
720 | ||
721 | while (mp->mnt_count) | |
722 | msleep((caddr_t)&mp->mnt_lflag, &mp->mnt_mlock, PVFS, "mount_drain", 0 ); | |
723 | ||
724 | if (mp->mnt_vnodelist.tqh_first != NULL) | |
725 | panic("mount_refdrain: dangling vnode"); | |
726 | ||
727 | mp->mnt_lflag &= ~MNT_LDRAIN; | |
728 | ||
729 | return(0); | |
730 | } | |
731 | ||
732 | ||
733 | /* | |
734 | * Mark a mount point as busy. Used to synchronize access and to delay | |
735 | * unmounting. | |
736 | */ | |
737 | int | |
738 | vfs_busy(mount_t mp, int flags) | |
739 | { | |
740 | ||
741 | restart: | |
742 | if (mp->mnt_lflag & MNT_LDEAD) | |
743 | return(ENOENT); | |
744 | ||
745 | if (mp->mnt_lflag & MNT_LUNMOUNT) { | |
746 | if (flags & LK_NOWAIT) | |
747 | return (ENOENT); | |
748 | ||
749 | mount_lock(mp); | |
750 | ||
751 | if (mp->mnt_lflag & MNT_LDEAD) { | |
752 | mount_unlock(mp); | |
753 | return(ENOENT); | |
754 | } | |
755 | if (mp->mnt_lflag & MNT_LUNMOUNT) { | |
756 | mp->mnt_lflag |= MNT_LWAIT; | |
1c79356b | 757 | /* |
91447636 A |
758 | * Since all busy locks are shared except the exclusive |
759 | * lock granted when unmounting, the only place that a | |
760 | * wakeup needs to be done is at the release of the | |
761 | * exclusive lock at the end of dounmount. | |
1c79356b | 762 | */ |
91447636 A |
763 | msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", 0 ); |
764 | return (ENOENT); | |
1c79356b | 765 | } |
91447636 A |
766 | mount_unlock(mp); |
767 | } | |
768 | ||
769 | lck_rw_lock_shared(&mp->mnt_rwlock); | |
770 | ||
771 | /* | |
772 | * until we are granted the rwlock, it's possible for the mount point to | |
773 | * change state, so reevaluate before granting the vfs_busy | |
774 | */ | |
775 | if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { | |
776 | lck_rw_done(&mp->mnt_rwlock); | |
777 | goto restart; | |
1c79356b | 778 | } |
1c79356b A |
779 | return (0); |
780 | } | |
781 | ||
91447636 A |
782 | /* |
783 | * Free a busy filesystem. | |
784 | */ | |
785 | ||
786 | void | |
787 | vfs_unbusy(mount_t mp) | |
788 | { | |
789 | lck_rw_done(&mp->mnt_rwlock); | |
790 | } | |
791 | ||
792 | ||
793 | ||
794 | static void | |
795 | vfs_rootmountfailed(mount_t mp) { | |
796 | ||
797 | mount_list_lock(); | |
798 | mp->mnt_vtable->vfc_refcount--; | |
799 | mount_list_unlock(); | |
800 | ||
801 | vfs_unbusy(mp); | |
802 | ||
803 | mount_lock_destroy(mp); | |
804 | ||
805 | FREE_ZONE(mp, sizeof(struct mount), M_MOUNT); | |
806 | } | |
807 | ||
808 | /* | |
809 | * Lookup a filesystem type, and if found allocate and initialize | |
810 | * a mount structure for it. | |
811 | * | |
812 | * Devname is usually updated by mount(8) after booting. | |
813 | */ | |
814 | static mount_t | |
815 | vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) | |
816 | { | |
817 | mount_t mp; | |
818 | ||
819 | mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); | |
820 | bzero((char *)mp, (u_long)sizeof(struct mount)); | |
821 | ||
822 | /* Initialize the default IO constraints */ | |
823 | mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; | |
824 | mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; | |
825 | mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; | |
826 | mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; | |
827 | mp->mnt_devblocksize = DEV_BSIZE; | |
828 | ||
829 | mount_lock_init(mp); | |
830 | (void)vfs_busy(mp, LK_NOWAIT); | |
831 | ||
832 | TAILQ_INIT(&mp->mnt_vnodelist); | |
833 | TAILQ_INIT(&mp->mnt_workerqueue); | |
834 | TAILQ_INIT(&mp->mnt_newvnodes); | |
835 | ||
836 | mp->mnt_vtable = vfsp; | |
837 | mp->mnt_op = vfsp->vfc_vfsops; | |
838 | mp->mnt_flag = MNT_RDONLY | MNT_ROOTFS; | |
839 | mp->mnt_vnodecovered = NULLVP; | |
840 | //mp->mnt_stat.f_type = vfsp->vfc_typenum; | |
841 | mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; | |
842 | ||
843 | mount_list_lock(); | |
844 | vfsp->vfc_refcount++; | |
845 | mount_list_unlock(); | |
846 | ||
847 | strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); | |
848 | mp->mnt_vfsstat.f_mntonname[0] = '/'; | |
849 | (void) copystr((char *)devname, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0); | |
850 | ||
851 | return (mp); | |
852 | } | |
853 | ||
854 | errno_t | |
855 | vfs_rootmountalloc(const char *fstypename, const char *devname, mount_t *mpp) | |
856 | { | |
857 | struct vfstable *vfsp; | |
858 | ||
859 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) | |
860 | if (!strcmp(vfsp->vfc_name, fstypename)) | |
861 | break; | |
862 | if (vfsp == NULL) | |
863 | return (ENODEV); | |
864 | ||
865 | *mpp = vfs_rootmountalloc_internal(vfsp, devname); | |
866 | ||
867 | if (*mpp) | |
868 | return (0); | |
869 | ||
870 | return (ENOMEM); | |
871 | } | |
872 | ||
873 | ||
874 | /* | |
875 | * Find an appropriate filesystem to use for the root. If a filesystem | |
876 | * has not been preselected, walk through the list of known filesystems | |
877 | * trying those that have mountroot routines, and try them until one | |
878 | * works or we have tried them all. | |
879 | */ | |
880 | extern int (*mountroot)(void); | |
881 | ||
882 | int | |
883 | vfs_mountroot() | |
884 | { | |
885 | struct vfstable *vfsp; | |
886 | struct vfs_context context; | |
887 | int error; | |
888 | mount_t mp; | |
889 | ||
890 | if (mountroot != NULL) { | |
891 | /* | |
892 | * used for netboot which follows a different set of rules | |
893 | */ | |
894 | error = (*mountroot)(); | |
895 | return (error); | |
896 | } | |
897 | if ((error = bdevvp(rootdev, &rootvp))) { | |
898 | printf("vfs_mountroot: can't setup bdevvp\n"); | |
899 | return (error); | |
900 | } | |
901 | context.vc_proc = current_proc(); | |
902 | context.vc_ucred = kauth_cred_get(); | |
903 | ||
904 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { | |
905 | if (vfsp->vfc_mountroot == NULL) | |
906 | continue; | |
907 | ||
908 | mp = vfs_rootmountalloc_internal(vfsp, "root_device"); | |
909 | mp->mnt_devvp = rootvp; | |
910 | ||
911 | if ((error = (*vfsp->vfc_mountroot)(mp, rootvp, &context)) == 0) { | |
912 | mp->mnt_devvp->v_specflags |= SI_MOUNTEDON; | |
913 | ||
914 | vfs_unbusy(mp); | |
915 | ||
916 | mount_list_add(mp); | |
917 | ||
918 | /* | |
919 | * cache the IO attributes for the underlying physical media... | |
920 | * an error return indicates the underlying driver doesn't | |
921 | * support all the queries necessary... however, reasonable | |
922 | * defaults will have been set, so no reason to bail or care | |
923 | */ | |
924 | vfs_init_io_attributes(rootvp, mp); | |
925 | /* | |
926 | * get rid of iocount reference returned | |
927 | * by bdevvp... it will have also taken | |
928 | * a usecount reference which we want to keep | |
929 | */ | |
930 | vnode_put(rootvp); | |
931 | ||
932 | return (0); | |
933 | } | |
934 | vfs_rootmountfailed(mp); | |
935 | ||
936 | if (error != EINVAL) | |
937 | printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); | |
938 | } | |
939 | return (ENODEV); | |
940 | } | |
941 | ||
942 | /* | |
943 | * Lookup a mount point by filesystem identifier. | |
944 | */ | |
945 | extern mount_t vfs_getvfs_locked(fsid_t *); | |
946 | ||
947 | struct mount * | |
948 | vfs_getvfs(fsid) | |
949 | fsid_t *fsid; | |
950 | { | |
951 | return (mount_list_lookupby_fsid(fsid, 0, 0)); | |
952 | } | |
953 | ||
954 | struct mount * | |
955 | vfs_getvfs_locked(fsid) | |
956 | fsid_t *fsid; | |
957 | { | |
958 | return(mount_list_lookupby_fsid(fsid, 1, 0)); | |
959 | } | |
960 | ||
961 | struct mount * | |
962 | vfs_getvfs_by_mntonname(u_char *path) | |
963 | { | |
964 | mount_t retmp = (mount_t)0; | |
965 | mount_t mp; | |
966 | ||
967 | mount_list_lock(); | |
968 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
969 | if (!strcmp(mp->mnt_vfsstat.f_mntonname, path)) { | |
970 | retmp = mp; | |
971 | goto out; | |
972 | } | |
973 | } | |
974 | out: | |
975 | mount_list_unlock(); | |
976 | return (retmp); | |
977 | } | |
978 | ||
979 | /* generation number for creation of new fsids */ | |
980 | u_short mntid_gen = 0; | |
981 | /* | |
982 | * Get a new unique fsid | |
983 | */ | |
984 | void | |
985 | vfs_getnewfsid(mp) | |
986 | struct mount *mp; | |
987 | { | |
988 | ||
989 | fsid_t tfsid; | |
990 | int mtype; | |
991 | mount_t nmp; | |
992 | ||
993 | mount_list_lock(); | |
994 | ||
995 | /* generate a new fsid */ | |
996 | mtype = mp->mnt_vtable->vfc_typenum; | |
997 | if (++mntid_gen == 0) | |
998 | mntid_gen++; | |
999 | tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); | |
1000 | tfsid.val[1] = mtype; | |
1001 | ||
1002 | TAILQ_FOREACH(nmp, &mountlist, mnt_list) { | |
1003 | while (vfs_getvfs_locked(&tfsid)) { | |
1004 | if (++mntid_gen == 0) | |
1005 | mntid_gen++; | |
1006 | tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); | |
1007 | } | |
1008 | } | |
1009 | mp->mnt_vfsstat.f_fsid.val[0] = tfsid.val[0]; | |
1010 | mp->mnt_vfsstat.f_fsid.val[1] = tfsid.val[1]; | |
1011 | mount_list_unlock(); | |
1012 | } | |
1013 | ||
1014 | /* | |
1015 | * Routines having to do with the management of the vnode table. | |
1016 | */ | |
1017 | extern int (**dead_vnodeop_p)(void *); | |
1018 | long numvnodes, freevnodes; | |
1019 | long inactivevnodes; | |
1020 | ||
1021 | ||
1022 | /* | |
1023 | * Move a vnode from one mount queue to another. | |
1024 | */ | |
1025 | static void | |
1026 | insmntque(vnode_t vp, mount_t mp) | |
1027 | { | |
1028 | mount_t lmp; | |
1029 | /* | |
1030 | * Delete from old mount point vnode list, if on one. | |
1031 | */ | |
3a60a9f5 | 1032 | if ( (lmp = vp->v_mount) != NULL && lmp != dead_mountp) { |
91447636 A |
1033 | if ((vp->v_lflag & VNAMED_MOUNT) == 0) |
1034 | panic("insmntque: vp not in mount vnode list"); | |
1035 | vp->v_lflag &= ~VNAMED_MOUNT; | |
1036 | ||
1037 | mount_lock(lmp); | |
1038 | ||
1039 | mount_drop(lmp, 1); | |
1040 | ||
1041 | if (vp->v_mntvnodes.tqe_next == NULL) { | |
1042 | if (TAILQ_LAST(&lmp->mnt_vnodelist, vnodelst) == vp) | |
1043 | TAILQ_REMOVE(&lmp->mnt_vnodelist, vp, v_mntvnodes); | |
1044 | else if (TAILQ_LAST(&lmp->mnt_newvnodes, vnodelst) == vp) | |
1045 | TAILQ_REMOVE(&lmp->mnt_newvnodes, vp, v_mntvnodes); | |
1046 | else if (TAILQ_LAST(&lmp->mnt_workerqueue, vnodelst) == vp) | |
1047 | TAILQ_REMOVE(&lmp->mnt_workerqueue, vp, v_mntvnodes); | |
1048 | } else { | |
1049 | vp->v_mntvnodes.tqe_next->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_prev; | |
1050 | *vp->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_next; | |
1051 | } | |
1052 | vp->v_mntvnodes.tqe_next = 0; | |
1053 | vp->v_mntvnodes.tqe_prev = 0; | |
1054 | mount_unlock(lmp); | |
1055 | return; | |
1056 | } | |
1057 | ||
1058 | /* | |
1059 | * Insert into list of vnodes for the new mount point, if available. | |
1060 | */ | |
1061 | if ((vp->v_mount = mp) != NULL) { | |
1062 | mount_lock(mp); | |
1063 | if ((vp->v_mntvnodes.tqe_next != 0) && (vp->v_mntvnodes.tqe_prev != 0)) | |
1064 | panic("vp already in mount list"); | |
1065 | if (mp->mnt_lflag & MNT_LITER) | |
1066 | TAILQ_INSERT_HEAD(&mp->mnt_newvnodes, vp, v_mntvnodes); | |
1067 | else | |
1068 | TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
1069 | if (vp->v_lflag & VNAMED_MOUNT) | |
1070 | panic("insmntque: vp already in mount vnode list"); | |
1071 | if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) | |
1072 | panic("insmntque: vp on the free list\n"); | |
1073 | vp->v_lflag |= VNAMED_MOUNT; | |
1074 | mount_ref(mp, 1); | |
1075 | mount_unlock(mp); | |
1076 | } | |
1077 | } | |
1078 | ||
1079 | ||
1c79356b A |
1080 | /* |
1081 | * Create a vnode for a block device. | |
1082 | * Used for root filesystem, argdev, and swap areas. | |
1083 | * Also used for memory file system special devices. | |
1084 | */ | |
1085 | int | |
91447636 | 1086 | bdevvp(dev_t dev, vnode_t *vpp) |
1c79356b | 1087 | { |
91447636 A |
1088 | vnode_t nvp; |
1089 | int error; | |
1090 | struct vnode_fsparam vfsp; | |
1091 | struct vfs_context context; | |
1c79356b A |
1092 | |
1093 | if (dev == NODEV) { | |
1094 | *vpp = NULLVP; | |
1095 | return (ENODEV); | |
1096 | } | |
91447636 A |
1097 | |
1098 | context.vc_proc = current_proc(); | |
1099 | context.vc_ucred = FSCRED; | |
1100 | ||
1101 | vfsp.vnfs_mp = (struct mount *)0; | |
1102 | vfsp.vnfs_vtype = VBLK; | |
1103 | vfsp.vnfs_str = "bdevvp"; | |
1104 | vfsp.vnfs_dvp = 0; | |
1105 | vfsp.vnfs_fsnode = 0; | |
1106 | vfsp.vnfs_cnp = 0; | |
1107 | vfsp.vnfs_vops = spec_vnodeop_p; | |
1108 | vfsp.vnfs_rdev = dev; | |
1109 | vfsp.vnfs_filesize = 0; | |
1110 | ||
1111 | vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; | |
1112 | ||
1113 | vfsp.vnfs_marksystem = 0; | |
1114 | vfsp.vnfs_markroot = 0; | |
1115 | ||
1116 | if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &nvp)) ) { | |
1c79356b A |
1117 | *vpp = NULLVP; |
1118 | return (error); | |
1119 | } | |
91447636 A |
1120 | if ( (error = vnode_ref(nvp)) ) { |
1121 | panic("bdevvp failed: vnode_ref"); | |
1122 | return (error); | |
1c79356b | 1123 | } |
91447636 A |
1124 | if ( (error = VNOP_FSYNC(nvp, MNT_WAIT, &context)) ) { |
1125 | panic("bdevvp failed: fsync"); | |
1126 | return (error); | |
1127 | } | |
1128 | if ( (error = buf_invalidateblks(nvp, BUF_WRITE_DATA, 0, 0)) ) { | |
1129 | panic("bdevvp failed: invalidateblks"); | |
1130 | return (error); | |
1131 | } | |
1132 | if ( (error = VNOP_OPEN(nvp, FREAD, &context)) ) { | |
1133 | panic("bdevvp failed: open"); | |
1134 | return (error); | |
1135 | } | |
1136 | *vpp = nvp; | |
1137 | ||
1c79356b A |
1138 | return (0); |
1139 | } | |
1140 | ||
1141 | /* | |
1142 | * Check to see if the new vnode represents a special device | |
1143 | * for which we already have a vnode (either because of | |
1144 | * bdevvp() or because of a different vnode representing | |
1145 | * the same block device). If such an alias exists, deallocate | |
1146 | * the existing contents and return the aliased vnode. The | |
1147 | * caller is responsible for filling it with its new contents. | |
1148 | */ | |
91447636 A |
1149 | static vnode_t |
1150 | checkalias(nvp, nvp_rdev) | |
1c79356b A |
1151 | register struct vnode *nvp; |
1152 | dev_t nvp_rdev; | |
1c79356b | 1153 | { |
1c79356b A |
1154 | struct vnode *vp; |
1155 | struct vnode **vpp; | |
91447636 | 1156 | int vid = 0; |
1c79356b | 1157 | |
1c79356b A |
1158 | vpp = &speclisth[SPECHASH(nvp_rdev)]; |
1159 | loop: | |
91447636 A |
1160 | SPECHASH_LOCK(); |
1161 | ||
1c79356b | 1162 | for (vp = *vpp; vp; vp = vp->v_specnext) { |
91447636 A |
1163 | if (nvp_rdev == vp->v_rdev && nvp->v_type == vp->v_type) { |
1164 | vid = vp->v_id; | |
1165 | break; | |
1166 | } | |
1167 | } | |
1168 | SPECHASH_UNLOCK(); | |
1169 | ||
1170 | if (vp) { | |
1171 | if (vnode_getwithvid(vp,vid)) { | |
1172 | goto loop; | |
1173 | } | |
1174 | /* | |
1175 | * Termination state is checked in vnode_getwithvid | |
1176 | */ | |
1177 | vnode_lock(vp); | |
1178 | ||
1c79356b A |
1179 | /* |
1180 | * Alias, but not in use, so flush it out. | |
1181 | */ | |
91447636 A |
1182 | if ((vp->v_iocount == 1) && (vp->v_usecount == 0)) { |
1183 | vnode_reclaim_internal(vp, 1, 0); | |
1184 | vnode_unlock(vp); | |
1185 | vnode_put(vp); | |
1c79356b A |
1186 | goto loop; |
1187 | } | |
1c79356b A |
1188 | } |
1189 | if (vp == NULL || vp->v_tag != VT_NON) { | |
91447636 A |
1190 | MALLOC_ZONE(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), |
1191 | M_SPECINFO, M_WAITOK); | |
1c79356b A |
1192 | bzero(nvp->v_specinfo, sizeof(struct specinfo)); |
1193 | nvp->v_rdev = nvp_rdev; | |
91447636 A |
1194 | nvp->v_specflags = 0; |
1195 | nvp->v_speclastr = -1; | |
1196 | ||
1197 | SPECHASH_LOCK(); | |
1c79356b A |
1198 | nvp->v_hashchain = vpp; |
1199 | nvp->v_specnext = *vpp; | |
1c79356b | 1200 | *vpp = nvp; |
91447636 A |
1201 | SPECHASH_UNLOCK(); |
1202 | ||
1c79356b A |
1203 | if (vp != NULLVP) { |
1204 | nvp->v_flag |= VALIASED; | |
1205 | vp->v_flag |= VALIASED; | |
91447636 A |
1206 | vnode_unlock(vp); |
1207 | vnode_put(vp); | |
1c79356b | 1208 | } |
1c79356b A |
1209 | return (NULLVP); |
1210 | } | |
1c79356b A |
1211 | return (vp); |
1212 | } | |
1213 | ||
91447636 | 1214 | |
1c79356b | 1215 | /* |
0b4e3aa0 A |
1216 | * Get a reference on a particular vnode and lock it if requested. |
1217 | * If the vnode was on the inactive list, remove it from the list. | |
1218 | * If the vnode was on the free list, remove it from the list and | |
1219 | * move it to inactive list as needed. | |
1220 | * The vnode lock bit is set if the vnode is being eliminated in | |
1221 | * vgone. The process is awakened when the transition is completed, | |
1222 | * and an error returned to indicate that the vnode is no longer | |
1223 | * usable (possibly having been changed to a new file system type). | |
1c79356b | 1224 | */ |
91447636 A |
1225 | static int |
1226 | vget_internal(vnode_t vp, int vid, int vflags) | |
1c79356b A |
1227 | { |
1228 | int error = 0; | |
55e303ae A |
1229 | u_long vpid; |
1230 | ||
91447636 | 1231 | vnode_lock(vp); |
55e303ae | 1232 | |
91447636 A |
1233 | if (vflags & VNODE_WITHID) |
1234 | vpid = vid; | |
1235 | else | |
1236 | vpid = vp->v_id; // save off the original v_id | |
0b4e3aa0 | 1237 | |
91447636 A |
1238 | if ((vflags & VNODE_WRITEABLE) && (vp->v_writecount == 0)) |
1239 | /* | |
1240 | * vnode to be returned only if it has writers opened | |
1241 | */ | |
1242 | error = EINVAL; | |
1243 | else | |
1244 | error = vnode_getiocount(vp, 1, vpid, vflags); | |
55e303ae | 1245 | |
91447636 | 1246 | vnode_unlock(vp); |
55e303ae | 1247 | |
0b4e3aa0 A |
1248 | return (error); |
1249 | } | |
1250 | ||
1c79356b | 1251 | int |
91447636 | 1252 | vnode_ref(vnode_t vp) |
1c79356b | 1253 | { |
1c79356b | 1254 | |
91447636 | 1255 | return (vnode_ref_ext(vp, 0)); |
1c79356b A |
1256 | } |
1257 | ||
1c79356b | 1258 | int |
91447636 | 1259 | vnode_ref_ext(vnode_t vp, int fmode) |
1c79356b | 1260 | { |
91447636 | 1261 | int error = 0; |
1c79356b | 1262 | |
91447636 | 1263 | vnode_lock(vp); |
1c79356b | 1264 | |
91447636 A |
1265 | /* |
1266 | * once all the current call sites have been fixed to insure they have | |
1267 | * taken an iocount, we can toughen this assert up and insist that the | |
1268 | * iocount is non-zero... a non-zero usecount doesn't insure correctness | |
1269 | */ | |
1270 | if (vp->v_iocount <= 0 && vp->v_usecount <= 0) | |
1271 | panic("vnode_ref_ext: vp %x has no valid reference %d, %d", vp, vp->v_iocount, vp->v_usecount); | |
1c79356b | 1272 | |
91447636 A |
1273 | /* |
1274 | * if you are the owner of drain/termination, can acquire usecount | |
1275 | */ | |
1276 | if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) { | |
1277 | if (vp->v_owner != current_thread()) { | |
1278 | error = ENOENT; | |
1279 | goto out; | |
1280 | } | |
1281 | } | |
1282 | vp->v_usecount++; | |
1c79356b | 1283 | |
91447636 A |
1284 | if (fmode & FWRITE) { |
1285 | if (++vp->v_writecount <= 0) | |
1286 | panic("vnode_ref_ext: v_writecount"); | |
55e303ae | 1287 | } |
91447636 A |
1288 | if (fmode & O_EVTONLY) { |
1289 | if (++vp->v_kusecount <= 0) | |
1290 | panic("vnode_ref_ext: v_kusecount"); | |
55e303ae | 1291 | } |
91447636 A |
1292 | out: |
1293 | vnode_unlock(vp); | |
1294 | ||
1295 | return (error); | |
55e303ae A |
1296 | } |
1297 | ||
1298 | ||
1c79356b A |
1299 | /* |
1300 | * put the vnode on appropriate free list. | |
91447636 | 1301 | * called with vnode LOCKED |
1c79356b A |
1302 | */ |
1303 | static void | |
91447636 | 1304 | vnode_list_add(vnode_t vp) |
1c79356b | 1305 | { |
55e303ae | 1306 | |
1c79356b | 1307 | /* |
91447636 | 1308 | * if it is already on a list or non zero references return |
1c79356b | 1309 | */ |
91447636 | 1310 | if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0)) |
1c79356b | 1311 | return; |
91447636 | 1312 | vnode_list_lock(); |
1c79356b | 1313 | |
91447636 A |
1314 | /* |
1315 | * insert at tail of LRU list or at head if VAGE or VL_DEAD is set | |
1316 | */ | |
1317 | if ((vp->v_flag & VAGE) || (vp->v_lflag & VL_DEAD)) { | |
1318 | TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); | |
1c79356b | 1319 | vp->v_flag &= ~VAGE; |
91447636 A |
1320 | } else { |
1321 | TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); | |
1322 | } | |
1c79356b | 1323 | freevnodes++; |
91447636 A |
1324 | |
1325 | vnode_list_unlock(); | |
1c79356b A |
1326 | } |
1327 | ||
1328 | /* | |
91447636 | 1329 | * remove the vnode from appropriate free list. |
1c79356b A |
1330 | */ |
1331 | static void | |
91447636 | 1332 | vnode_list_remove(vnode_t vp) |
1c79356b | 1333 | { |
91447636 A |
1334 | /* |
1335 | * we want to avoid taking the list lock | |
1336 | * in the case where we're not on the free | |
1337 | * list... this will be true for most | |
1338 | * directories and any currently in use files | |
1339 | * | |
1340 | * we're guaranteed that we can't go from | |
1341 | * the not-on-list state to the on-list | |
1342 | * state since we hold the vnode lock... | |
1343 | * all calls to vnode_list_add are done | |
1344 | * under the vnode lock... so we can | |
1345 | * check for that condition (the prevelant one) | |
1346 | * without taking the list lock | |
1347 | */ | |
1348 | if (VONLIST(vp)) { | |
1349 | vnode_list_lock(); | |
1350 | /* | |
1351 | * however, we're not guaranteed that | |
1352 | * we won't go from the on-list state | |
1353 | * to the non-on-list state until we | |
1354 | * hold the vnode_list_lock... this | |
1355 | * is due to new_vnode removing vnodes | |
1356 | * from the free list uder the list_lock | |
1357 | * w/o the vnode lock... so we need to | |
1358 | * check again whether we're currently | |
1359 | * on the free list | |
1360 | */ | |
1361 | if (VONLIST(vp)) { | |
1362 | VREMFREE("vnode_list_remove", vp); | |
1363 | VLISTNONE(vp); | |
1364 | } | |
1365 | vnode_list_unlock(); | |
1366 | } | |
1c79356b A |
1367 | } |
1368 | ||
1369 | ||
1c79356b | 1370 | void |
91447636 | 1371 | vnode_rele(vnode_t vp) |
1c79356b | 1372 | { |
91447636 A |
1373 | vnode_rele_internal(vp, 0, 0, 0); |
1374 | } | |
1c79356b | 1375 | |
1c79356b | 1376 | |
91447636 A |
1377 | void |
1378 | vnode_rele_ext(vnode_t vp, int fmode, int dont_reenter) | |
1379 | { | |
1380 | vnode_rele_internal(vp, fmode, dont_reenter, 0); | |
1c79356b A |
1381 | } |
1382 | ||
91447636 | 1383 | |
1c79356b | 1384 | void |
91447636 | 1385 | vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) |
1c79356b | 1386 | { |
91447636 | 1387 | struct vfs_context context; |
55e303ae | 1388 | |
91447636 A |
1389 | if ( !locked) |
1390 | vnode_lock(vp); | |
1c79356b | 1391 | |
91447636 A |
1392 | if (--vp->v_usecount < 0) |
1393 | panic("vnode_rele_ext: vp %x usecount -ve : %d", vp, vp->v_usecount); | |
1394 | ||
1395 | if (fmode & FWRITE) { | |
1396 | if (--vp->v_writecount < 0) | |
1397 | panic("vnode_rele_ext: vp %x writecount -ve : %d", vp, vp->v_writecount); | |
1c79356b | 1398 | } |
91447636 A |
1399 | if (fmode & O_EVTONLY) { |
1400 | if (--vp->v_kusecount < 0) | |
1401 | panic("vnode_rele_ext: vp %x kusecount -ve : %d", vp, vp->v_kusecount); | |
1c79356b | 1402 | } |
91447636 A |
1403 | if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) { |
1404 | /* | |
1405 | * vnode is still busy... if we're the last | |
1406 | * usecount, mark for a future call to VNOP_INACTIVE | |
1407 | * when the iocount finally drops to 0 | |
1408 | */ | |
1409 | if (vp->v_usecount == 0) { | |
1410 | vp->v_lflag |= VL_NEEDINACTIVE; | |
1411 | vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); | |
1412 | } | |
1413 | if ( !locked) | |
1414 | vnode_unlock(vp); | |
1c79356b A |
1415 | return; |
1416 | } | |
91447636 A |
1417 | vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); |
1418 | ||
1419 | if ( (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) || dont_reenter) { | |
1420 | /* | |
1421 | * vnode is being cleaned, or | |
1422 | * we've requested that we don't reenter | |
1423 | * the filesystem on this release... in | |
1424 | * this case, we'll mark the vnode aged | |
1425 | * if it's been marked for termination | |
1c79356b | 1426 | */ |
91447636 A |
1427 | if (dont_reenter) { |
1428 | if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) | |
1429 | vp->v_lflag |= VL_NEEDINACTIVE; | |
1430 | vp->v_flag |= VAGE; | |
1c79356b | 1431 | } |
91447636 A |
1432 | vnode_list_add(vp); |
1433 | if ( !locked) | |
1434 | vnode_unlock(vp); | |
1435 | return; | |
1c79356b | 1436 | } |
91447636 A |
1437 | /* |
1438 | * at this point both the iocount and usecount | |
1439 | * are zero | |
1440 | * pick up an iocount so that we can call | |
1441 | * VNOP_INACTIVE with the vnode lock unheld | |
1442 | */ | |
1443 | vp->v_iocount++; | |
1444 | #ifdef JOE_DEBUG | |
1445 | record_vp(vp, 1); | |
1c79356b | 1446 | #endif |
91447636 A |
1447 | vp->v_lflag &= ~VL_NEEDINACTIVE; |
1448 | vnode_unlock(vp); | |
1c79356b | 1449 | |
91447636 A |
1450 | context.vc_proc = current_proc(); |
1451 | context.vc_ucred = kauth_cred_get(); | |
1452 | VNOP_INACTIVE(vp, &context); | |
1c79356b | 1453 | |
91447636 A |
1454 | vnode_lock(vp); |
1455 | /* | |
1456 | * because we dropped the vnode lock to call VNOP_INACTIVE | |
1457 | * the state of the vnode may have changed... we may have | |
1458 | * picked up an iocount, usecount or the MARKTERM may have | |
1459 | * been set... we need to reevaluate the reference counts | |
1460 | * to determine if we can call vnode_reclaim_internal at | |
1461 | * this point... if the reference counts are up, we'll pick | |
1462 | * up the MARKTERM state when they get subsequently dropped | |
1463 | */ | |
1464 | if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && | |
1465 | ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { | |
1466 | struct uthread *ut; | |
1c79356b | 1467 | |
91447636 A |
1468 | ut = get_bsdthread_info(current_thread()); |
1469 | ||
1470 | if (ut->uu_defer_reclaims) { | |
1471 | vp->v_defer_reclaimlist = ut->uu_vreclaims; | |
1472 | ut->uu_vreclaims = vp; | |
1473 | goto defer_reclaim; | |
1474 | } | |
1475 | vnode_reclaim_internal(vp, 1, 0); | |
1476 | } | |
1477 | vnode_dropiocount(vp, 1); | |
1478 | vnode_list_add(vp); | |
1479 | defer_reclaim: | |
1480 | if ( !locked) | |
1481 | vnode_unlock(vp); | |
1482 | return; | |
1c79356b A |
1483 | } |
1484 | ||
1485 | /* | |
1486 | * Remove any vnodes in the vnode table belonging to mount point mp. | |
1487 | * | |
1488 | * If MNT_NOFORCE is specified, there should not be any active ones, | |
1489 | * return error if any are found (nb: this is a user error, not a | |
1490 | * system error). If MNT_FORCE is specified, detach any active vnodes | |
1491 | * that are found. | |
1492 | */ | |
1493 | #if DIAGNOSTIC | |
1494 | int busyprt = 0; /* print out busy vnodes */ | |
1495 | #if 0 | |
1496 | struct ctldebug debug1 = { "busyprt", &busyprt }; | |
1497 | #endif /* 0 */ | |
1498 | #endif | |
1499 | ||
1500 | int | |
1501 | vflush(mp, skipvp, flags) | |
1502 | struct mount *mp; | |
1503 | struct vnode *skipvp; | |
1504 | int flags; | |
1505 | { | |
0b4e3aa0 | 1506 | struct proc *p = current_proc(); |
91447636 | 1507 | struct vnode *vp; |
1c79356b | 1508 | int busy = 0; |
91447636 A |
1509 | int reclaimed = 0; |
1510 | int vid, retval; | |
1c79356b | 1511 | |
91447636 A |
1512 | mount_lock(mp); |
1513 | vnode_iterate_setup(mp); | |
1514 | /* | |
1515 | * On regular unmounts(not forced) do a | |
1516 | * quick check for vnodes to be in use. This | |
1517 | * preserves the caching of vnodes. automounter | |
1518 | * tries unmounting every so often to see whether | |
1519 | * it is still busy or not. | |
1520 | */ | |
1521 | if ((flags & FORCECLOSE)==0) { | |
1522 | if (vnode_umount_preflight(mp, skipvp, flags)) { | |
1523 | vnode_iterate_clear(mp); | |
1524 | mount_unlock(mp); | |
1525 | return(EBUSY); | |
1526 | } | |
1527 | } | |
1c79356b | 1528 | loop: |
91447636 A |
1529 | /* it is returns 0 then there is nothing to do */ |
1530 | retval = vnode_iterate_prepare(mp); | |
1531 | ||
1532 | if (retval == 0) { | |
1533 | vnode_iterate_clear(mp); | |
1534 | mount_unlock(mp); | |
1535 | return(retval); | |
1536 | } | |
1537 | ||
1538 | /* iterate over all the vnodes */ | |
1539 | while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { | |
1540 | vp = TAILQ_FIRST(&mp->mnt_workerqueue); | |
1541 | TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); | |
1542 | TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); | |
1543 | if ( (vp->v_mount != mp) || (vp == skipvp)) { | |
1544 | continue; | |
1545 | } | |
1546 | vid = vp->v_id; | |
1547 | mount_unlock(mp); | |
1548 | vnode_lock(vp); | |
1549 | ||
1550 | if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) { | |
1551 | vnode_unlock(vp); | |
1552 | mount_lock(mp); | |
1553 | continue; | |
1554 | } | |
1555 | ||
1c79356b | 1556 | /* |
91447636 A |
1557 | * If requested, skip over vnodes marked VSYSTEM. |
1558 | * Skip over all vnodes marked VNOFLUSH. | |
1559 | */ | |
1560 | if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || | |
1561 | (vp->v_flag & VNOFLUSH))) { | |
1562 | vnode_unlock(vp); | |
1563 | mount_lock(mp); | |
1c79356b | 1564 | continue; |
91447636 | 1565 | } |
1c79356b | 1566 | /* |
91447636 | 1567 | * If requested, skip over vnodes marked VSWAP. |
1c79356b | 1568 | */ |
91447636 A |
1569 | if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) { |
1570 | vnode_unlock(vp); | |
1571 | mount_lock(mp); | |
1c79356b A |
1572 | continue; |
1573 | } | |
1574 | /* | |
91447636 | 1575 | * If requested, skip over vnodes marked VSWAP. |
1c79356b | 1576 | */ |
91447636 A |
1577 | if ((flags & SKIPROOT) && (vp->v_flag & VROOT)) { |
1578 | vnode_unlock(vp); | |
1579 | mount_lock(mp); | |
1c79356b A |
1580 | continue; |
1581 | } | |
1582 | /* | |
1583 | * If WRITECLOSE is set, only flush out regular file | |
1584 | * vnodes open for writing. | |
1585 | */ | |
1586 | if ((flags & WRITECLOSE) && | |
1587 | (vp->v_writecount == 0 || vp->v_type != VREG)) { | |
91447636 A |
1588 | vnode_unlock(vp); |
1589 | mount_lock(mp); | |
1c79356b A |
1590 | continue; |
1591 | } | |
1592 | /* | |
91447636 | 1593 | * If the real usecount is 0, all we need to do is clear |
1c79356b A |
1594 | * out the vnode data structures and we are done. |
1595 | */ | |
91447636 A |
1596 | if (((vp->v_usecount == 0) || |
1597 | ((vp->v_usecount - vp->v_kusecount) == 0))) { | |
1598 | vp->v_iocount++; /* so that drain waits for * other iocounts */ | |
1599 | #ifdef JOE_DEBUG | |
1600 | record_vp(vp, 1); | |
1601 | #endif | |
1602 | vnode_reclaim_internal(vp, 1, 0); | |
1603 | vnode_dropiocount(vp, 1); | |
1604 | vnode_list_add(vp); | |
1605 | ||
1606 | vnode_unlock(vp); | |
1607 | reclaimed++; | |
1608 | mount_lock(mp); | |
1c79356b A |
1609 | continue; |
1610 | } | |
1611 | /* | |
1612 | * If FORCECLOSE is set, forcibly close the vnode. | |
1613 | * For block or character devices, revert to an | |
1614 | * anonymous device. For all other files, just kill them. | |
1615 | */ | |
1616 | if (flags & FORCECLOSE) { | |
1c79356b | 1617 | if (vp->v_type != VBLK && vp->v_type != VCHR) { |
91447636 A |
1618 | vp->v_iocount++; /* so that drain waits * for other iocounts */ |
1619 | #ifdef JOE_DEBUG | |
1620 | record_vp(vp, 1); | |
1621 | #endif | |
1622 | vnode_reclaim_internal(vp, 1, 0); | |
1623 | vnode_dropiocount(vp, 1); | |
1624 | vnode_list_add(vp); | |
1625 | vnode_unlock(vp); | |
1c79356b A |
1626 | } else { |
1627 | vclean(vp, 0, p); | |
91447636 | 1628 | vp->v_lflag &= ~VL_DEAD; |
1c79356b | 1629 | vp->v_op = spec_vnodeop_p; |
91447636 | 1630 | vnode_unlock(vp); |
1c79356b | 1631 | } |
91447636 | 1632 | mount_lock(mp); |
1c79356b A |
1633 | continue; |
1634 | } | |
1635 | #if DIAGNOSTIC | |
1636 | if (busyprt) | |
1637 | vprint("vflush: busy vnode", vp); | |
1638 | #endif | |
91447636 A |
1639 | vnode_unlock(vp); |
1640 | mount_lock(mp); | |
1c79356b A |
1641 | busy++; |
1642 | } | |
91447636 A |
1643 | |
1644 | /* At this point the worker queue is completed */ | |
1645 | if (busy && ((flags & FORCECLOSE)==0) && reclaimed) { | |
1646 | busy = 0; | |
1647 | reclaimed = 0; | |
1648 | (void)vnode_iterate_reloadq(mp); | |
1649 | /* returned with mount lock held */ | |
1650 | goto loop; | |
1651 | } | |
1652 | ||
1653 | /* if new vnodes were created in between retry the reclaim */ | |
1654 | if ( vnode_iterate_reloadq(mp) != 0) { | |
1655 | if (!(busy && ((flags & FORCECLOSE)==0))) | |
1656 | goto loop; | |
1657 | } | |
1658 | vnode_iterate_clear(mp); | |
1659 | mount_unlock(mp); | |
1660 | ||
9bccf70c | 1661 | if (busy && ((flags & FORCECLOSE)==0)) |
1c79356b A |
1662 | return (EBUSY); |
1663 | return (0); | |
1664 | } | |
1665 | ||
91447636 | 1666 | int num_recycledvnodes=0; |
1c79356b A |
1667 | /* |
1668 | * Disassociate the underlying file system from a vnode. | |
91447636 | 1669 | * The vnode lock is held on entry. |
1c79356b A |
1670 | */ |
1671 | static void | |
91447636 | 1672 | vclean(vnode_t vp, int flags, proc_t p) |
1c79356b | 1673 | { |
91447636 | 1674 | struct vfs_context context; |
1c79356b | 1675 | int active; |
91447636 A |
1676 | int need_inactive; |
1677 | int already_terminating; | |
1678 | kauth_cred_t ucred = NULL; | |
1c79356b | 1679 | |
91447636 A |
1680 | context.vc_proc = p; |
1681 | context.vc_ucred = kauth_cred_get(); | |
1c79356b A |
1682 | |
1683 | /* | |
1684 | * Check to see if the vnode is in use. | |
1685 | * If so we have to reference it before we clean it out | |
1686 | * so that its count cannot fall to zero and generate a | |
1687 | * race against ourselves to recycle it. | |
1688 | */ | |
91447636 | 1689 | active = vp->v_usecount; |
55e303ae | 1690 | |
91447636 A |
1691 | /* |
1692 | * just in case we missed sending a needed | |
1693 | * VNOP_INACTIVE, we'll do it now | |
1694 | */ | |
1695 | need_inactive = (vp->v_lflag & VL_NEEDINACTIVE); | |
1696 | ||
1697 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
55e303ae | 1698 | |
1c79356b A |
1699 | /* |
1700 | * Prevent the vnode from being recycled or | |
1701 | * brought into use while we clean it out. | |
1702 | */ | |
91447636 A |
1703 | already_terminating = (vp->v_lflag & VL_TERMINATE); |
1704 | ||
1705 | vp->v_lflag |= VL_TERMINATE; | |
1c79356b A |
1706 | |
1707 | /* | |
91447636 A |
1708 | * remove the vnode from any mount list |
1709 | * it might be on... | |
1c79356b | 1710 | */ |
91447636 | 1711 | insmntque(vp, (struct mount *)0); |
1c79356b | 1712 | |
91447636 | 1713 | ucred = vp->v_cred; |
89b3af67 | 1714 | vp->v_cred = NOCRED; |
91447636 A |
1715 | |
1716 | vnode_unlock(vp); | |
1717 | ||
89b3af67 A |
1718 | if (IS_VALID_CRED(ucred)) |
1719 | kauth_cred_unref(&ucred); | |
91447636 A |
1720 | |
1721 | OSAddAtomic(1, &num_recycledvnodes); | |
1c79356b | 1722 | /* |
91447636 | 1723 | * purge from the name cache as early as possible... |
1c79356b | 1724 | */ |
91447636 | 1725 | cache_purge(vp); |
1c79356b | 1726 | |
0b4e3aa0 | 1727 | if (active && (flags & DOCLOSE)) |
91447636 | 1728 | VNOP_CLOSE(vp, IO_NDELAY, &context); |
1c79356b A |
1729 | |
1730 | /* | |
1731 | * Clean out any buffers associated with the vnode. | |
1732 | */ | |
1733 | if (flags & DOCLOSE) { | |
91447636 | 1734 | #if NFSCLIENT |
1c79356b | 1735 | if (vp->v_tag == VT_NFS) |
55e303ae A |
1736 | nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0); |
1737 | else | |
91447636 A |
1738 | #endif |
1739 | { | |
1740 | VNOP_FSYNC(vp, MNT_WAIT, &context); | |
1741 | buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
1742 | } | |
1743 | if (UBCINFOEXISTS(vp)) | |
1744 | /* | |
1745 | * Clean the pages in VM. | |
1746 | */ | |
1747 | (void)ubc_sync_range(vp, (off_t)0, ubc_getsize(vp), UBC_PUSHALL); | |
55e303ae | 1748 | } |
91447636 A |
1749 | if (UBCINFOEXISTS(vp)) |
1750 | cluster_release(vp->v_ubcinfo); | |
0b4e3aa0 | 1751 | |
91447636 A |
1752 | if (active || need_inactive) |
1753 | VNOP_INACTIVE(vp, &context); | |
0b4e3aa0 A |
1754 | |
1755 | /* Destroy ubc named reference */ | |
91447636 | 1756 | ubc_destroy_named(vp); |
0b4e3aa0 | 1757 | |
1c79356b A |
1758 | /* |
1759 | * Reclaim the vnode. | |
1760 | */ | |
91447636 | 1761 | if (VNOP_RECLAIM(vp, &context)) |
1c79356b | 1762 | panic("vclean: cannot reclaim"); |
55e303ae A |
1763 | |
1764 | // make sure the name & parent ptrs get cleaned out! | |
91447636 | 1765 | vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); |
55e303ae | 1766 | |
91447636 | 1767 | vnode_lock(vp); |
1c79356b | 1768 | |
91447636 | 1769 | vp->v_mount = dead_mountp; |
1c79356b A |
1770 | vp->v_op = dead_vnodeop_p; |
1771 | vp->v_tag = VT_NON; | |
91447636 | 1772 | vp->v_data = NULL; |
1c79356b | 1773 | |
91447636 | 1774 | vp->v_lflag |= VL_DEAD; |
55e303ae | 1775 | |
91447636 A |
1776 | if (already_terminating == 0) { |
1777 | vp->v_lflag &= ~VL_TERMINATE; | |
1778 | /* | |
1779 | * Done with purge, notify sleepers of the grim news. | |
1780 | */ | |
1781 | if (vp->v_lflag & VL_TERMWANT) { | |
1782 | vp->v_lflag &= ~VL_TERMWANT; | |
1783 | wakeup(&vp->v_lflag); | |
1784 | } | |
1c79356b A |
1785 | } |
1786 | } | |
1787 | ||
1788 | /* | |
1789 | * Eliminate all activity associated with the requested vnode | |
1790 | * and with all vnodes aliased to the requested vnode. | |
1791 | */ | |
1792 | int | |
91447636 | 1793 | vn_revoke(vnode_t vp, int flags, __unused vfs_context_t a_context) |
1c79356b | 1794 | { |
91447636 A |
1795 | struct vnode *vq; |
1796 | int vid; | |
1c79356b A |
1797 | |
1798 | #if DIAGNOSTIC | |
91447636 A |
1799 | if ((flags & REVOKEALL) == 0) |
1800 | panic("vnop_revoke"); | |
1c79356b A |
1801 | #endif |
1802 | ||
1c79356b A |
1803 | if (vp->v_flag & VALIASED) { |
1804 | /* | |
1805 | * If a vgone (or vclean) is already in progress, | |
1806 | * wait until it is done and return. | |
1807 | */ | |
91447636 A |
1808 | vnode_lock(vp); |
1809 | if (vp->v_lflag & VL_TERMINATE) { | |
1810 | vnode_unlock(vp); | |
1811 | return(ENOENT); | |
1c79356b | 1812 | } |
91447636 | 1813 | vnode_unlock(vp); |
1c79356b A |
1814 | /* |
1815 | * Ensure that vp will not be vgone'd while we | |
1816 | * are eliminating its aliases. | |
1817 | */ | |
91447636 | 1818 | SPECHASH_LOCK(); |
1c79356b | 1819 | while (vp->v_flag & VALIASED) { |
1c79356b A |
1820 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { |
1821 | if (vq->v_rdev != vp->v_rdev || | |
1822 | vq->v_type != vp->v_type || vp == vq) | |
1823 | continue; | |
91447636 A |
1824 | vid = vq->v_id; |
1825 | SPECHASH_UNLOCK(); | |
1826 | if (vnode_getwithvid(vq,vid)){ | |
1827 | SPECHASH_LOCK(); | |
1828 | break; | |
1829 | } | |
1830 | vnode_reclaim_internal(vq, 0, 0); | |
1831 | vnode_put(vq); | |
1832 | SPECHASH_LOCK(); | |
1c79356b A |
1833 | break; |
1834 | } | |
1c79356b | 1835 | } |
91447636 | 1836 | SPECHASH_UNLOCK(); |
1c79356b | 1837 | } |
91447636 A |
1838 | vnode_reclaim_internal(vp, 0, 0); |
1839 | ||
1c79356b A |
1840 | return (0); |
1841 | } | |
1842 | ||
1843 | /* | |
1844 | * Recycle an unused vnode to the front of the free list. | |
1845 | * Release the passed interlock if the vnode will be recycled. | |
1846 | */ | |
1847 | int | |
91447636 | 1848 | vnode_recycle(vp) |
1c79356b | 1849 | struct vnode *vp; |
1c79356b | 1850 | { |
91447636 | 1851 | vnode_lock(vp); |
1c79356b | 1852 | |
91447636 A |
1853 | if (vp->v_iocount || vp->v_usecount) { |
1854 | vp->v_lflag |= VL_MARKTERM; | |
1855 | vnode_unlock(vp); | |
1856 | return(0); | |
1857 | } | |
1858 | vnode_reclaim_internal(vp, 1, 0); | |
1859 | vnode_unlock(vp); | |
1860 | ||
1861 | return (1); | |
1c79356b A |
1862 | } |
1863 | ||
91447636 A |
1864 | static int |
1865 | vnode_reload(vnode_t vp) | |
1c79356b | 1866 | { |
91447636 | 1867 | vnode_lock(vp); |
1c79356b | 1868 | |
91447636 A |
1869 | if ((vp->v_iocount > 1) || vp->v_usecount) { |
1870 | vnode_unlock(vp); | |
1871 | return(0); | |
1872 | } | |
1873 | if (vp->v_iocount <= 0) | |
1874 | panic("vnode_reload with no iocount %d", vp->v_iocount); | |
1875 | ||
1876 | /* mark for release when iocount is dopped */ | |
1877 | vp->v_lflag |= VL_MARKTERM; | |
1878 | vnode_unlock(vp); | |
1879 | ||
1880 | return (1); | |
1c79356b A |
1881 | } |
1882 | ||
91447636 A |
1883 | |
1884 | static void | |
1885 | vgone(vnode_t vp) | |
1c79356b A |
1886 | { |
1887 | struct vnode *vq; | |
1888 | struct vnode *vx; | |
1889 | ||
1c79356b A |
1890 | /* |
1891 | * Clean out the filesystem specific data. | |
91447636 A |
1892 | * vclean also takes care of removing the |
1893 | * vnode from any mount list it might be on | |
1c79356b | 1894 | */ |
91447636 A |
1895 | vclean(vp, DOCLOSE, current_proc()); |
1896 | ||
1c79356b A |
1897 | /* |
1898 | * If special device, remove it from special device alias list | |
1899 | * if it is on one. | |
1900 | */ | |
1901 | if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { | |
91447636 A |
1902 | SPECHASH_LOCK(); |
1903 | if (*vp->v_hashchain == vp) { | |
1904 | *vp->v_hashchain = vp->v_specnext; | |
1905 | } else { | |
1906 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { | |
1907 | if (vq->v_specnext != vp) | |
1908 | continue; | |
1909 | vq->v_specnext = vp->v_specnext; | |
1910 | break; | |
1911 | } | |
1c79356b A |
1912 | if (vq == NULL) |
1913 | panic("missing bdev"); | |
1c79356b | 1914 | } |
91447636 A |
1915 | if (vp->v_flag & VALIASED) { |
1916 | vx = NULL; | |
1917 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { | |
1918 | if (vq->v_rdev != vp->v_rdev || | |
1919 | vq->v_type != vp->v_type) | |
1920 | continue; | |
1921 | if (vx) | |
1922 | break; | |
1923 | vx = vq; | |
1924 | } | |
1925 | if (vx == NULL) | |
1926 | panic("missing alias"); | |
1927 | if (vq == NULL) | |
1928 | vx->v_flag &= ~VALIASED; | |
1929 | vp->v_flag &= ~VALIASED; | |
1930 | } | |
1931 | SPECHASH_UNLOCK(); | |
1932 | { | |
1933 | struct specinfo *tmp = vp->v_specinfo; | |
1934 | vp->v_specinfo = NULL; | |
1935 | FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO); | |
1936 | } | |
1c79356b | 1937 | } |
1c79356b A |
1938 | } |
1939 | ||
1940 | /* | |
1941 | * Lookup a vnode by device number. | |
1942 | */ | |
1943 | int | |
91447636 | 1944 | check_mountedon(dev_t dev, enum vtype type, int *errorp) |
1c79356b | 1945 | { |
91447636 | 1946 | vnode_t vp; |
1c79356b | 1947 | int rc = 0; |
91447636 | 1948 | int vid; |
1c79356b | 1949 | |
91447636 A |
1950 | loop: |
1951 | SPECHASH_LOCK(); | |
1c79356b A |
1952 | for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { |
1953 | if (dev != vp->v_rdev || type != vp->v_type) | |
1954 | continue; | |
91447636 A |
1955 | vid = vp->v_id; |
1956 | SPECHASH_UNLOCK(); | |
1957 | if (vnode_getwithvid(vp,vid)) | |
1958 | goto loop; | |
1959 | vnode_lock(vp); | |
1960 | if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { | |
1961 | vnode_unlock(vp); | |
1962 | if ((*errorp = vfs_mountedon(vp)) != 0) | |
1963 | rc = 1; | |
1964 | } else | |
1965 | vnode_unlock(vp); | |
1966 | vnode_put(vp); | |
1967 | return(rc); | |
1c79356b | 1968 | } |
91447636 A |
1969 | SPECHASH_UNLOCK(); |
1970 | return (0); | |
1c79356b A |
1971 | } |
1972 | ||
1973 | /* | |
1974 | * Calculate the total number of references to a special device. | |
1975 | */ | |
1976 | int | |
91447636 | 1977 | vcount(vnode_t vp) |
1c79356b | 1978 | { |
91447636 | 1979 | vnode_t vq, vnext; |
1c79356b | 1980 | int count; |
91447636 | 1981 | int vid; |
1c79356b A |
1982 | |
1983 | loop: | |
1984 | if ((vp->v_flag & VALIASED) == 0) | |
91447636 A |
1985 | return (vp->v_usecount - vp->v_kusecount); |
1986 | ||
1987 | SPECHASH_LOCK(); | |
1c79356b A |
1988 | for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { |
1989 | vnext = vq->v_specnext; | |
1990 | if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) | |
1991 | continue; | |
91447636 A |
1992 | vid = vq->v_id; |
1993 | SPECHASH_UNLOCK(); | |
1994 | ||
1995 | if (vnode_getwithvid(vq, vid)) { | |
1996 | goto loop; | |
1997 | } | |
1c79356b A |
1998 | /* |
1999 | * Alias, but not in use, so flush it out. | |
2000 | */ | |
91447636 A |
2001 | vnode_lock(vq); |
2002 | if ((vq->v_usecount == 0) && (vq->v_iocount == 1) && vq != vp) { | |
2003 | vnode_reclaim_internal(vq, 1, 0); | |
2004 | vnode_unlock(vq); | |
2005 | vnode_put(vq); | |
1c79356b A |
2006 | goto loop; |
2007 | } | |
91447636 A |
2008 | count += (vq->v_usecount - vq->v_kusecount); |
2009 | vnode_unlock(vq); | |
2010 | vnode_put(vq); | |
2011 | ||
2012 | SPECHASH_LOCK(); | |
1c79356b | 2013 | } |
91447636 A |
2014 | SPECHASH_UNLOCK(); |
2015 | ||
1c79356b A |
2016 | return (count); |
2017 | } | |
2018 | ||
2019 | int prtactive = 0; /* 1 => print out reclaim of active vnodes */ | |
2020 | ||
2021 | /* | |
2022 | * Print out a description of a vnode. | |
2023 | */ | |
2024 | static char *typename[] = | |
2025 | { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; | |
2026 | ||
2027 | void | |
91447636 | 2028 | vprint(const char *label, struct vnode *vp) |
1c79356b | 2029 | { |
91447636 | 2030 | char sbuf[64]; |
1c79356b A |
2031 | |
2032 | if (label != NULL) | |
2033 | printf("%s: ", label); | |
91447636 A |
2034 | printf("type %s, usecount %d, writecount %d", |
2035 | typename[vp->v_type], vp->v_usecount, vp->v_writecount); | |
2036 | sbuf[0] = '\0'; | |
1c79356b | 2037 | if (vp->v_flag & VROOT) |
91447636 | 2038 | strcat(sbuf, "|VROOT"); |
1c79356b | 2039 | if (vp->v_flag & VTEXT) |
91447636 | 2040 | strcat(sbuf, "|VTEXT"); |
1c79356b | 2041 | if (vp->v_flag & VSYSTEM) |
91447636 | 2042 | strcat(sbuf, "|VSYSTEM"); |
9bccf70c | 2043 | if (vp->v_flag & VNOFLUSH) |
91447636 | 2044 | strcat(sbuf, "|VNOFLUSH"); |
1c79356b | 2045 | if (vp->v_flag & VBWAIT) |
91447636 | 2046 | strcat(sbuf, "|VBWAIT"); |
1c79356b | 2047 | if (vp->v_flag & VALIASED) |
91447636 A |
2048 | strcat(sbuf, "|VALIASED"); |
2049 | if (sbuf[0] != '\0') | |
2050 | printf(" flags (%s)", &sbuf[1]); | |
1c79356b A |
2051 | } |
2052 | ||
1c79356b | 2053 | |
91447636 A |
2054 | int |
2055 | vn_getpath(struct vnode *vp, char *pathbuf, int *len) | |
2056 | { | |
2057 | return build_path(vp, pathbuf, *len, len); | |
1c79356b | 2058 | } |
91447636 A |
2059 | |
2060 | ||
2061 | static char *extension_table=NULL; | |
2062 | static int nexts; | |
2063 | static int max_ext_width; | |
1c79356b | 2064 | |
55e303ae | 2065 | static int |
91447636 A |
2066 | extension_cmp(void *a, void *b) |
2067 | { | |
2068 | return (strlen((char *)a) - strlen((char *)b)); | |
2069 | } | |
55e303ae | 2070 | |
55e303ae | 2071 | |
91447636 A |
2072 | // |
2073 | // This is the api LaunchServices uses to inform the kernel | |
2074 | // the list of package extensions to ignore. | |
2075 | // | |
2076 | // Internally we keep the list sorted by the length of the | |
2077 | // the extension (from longest to shortest). We sort the | |
2078 | // list of extensions so that we can speed up our searches | |
2079 | // when comparing file names -- we only compare extensions | |
2080 | // that could possibly fit into the file name, not all of | |
2081 | // them (i.e. a short 8 character name can't have an 8 | |
2082 | // character extension). | |
2083 | // | |
2084 | __private_extern__ int | |
2085 | set_package_extensions_table(void *data, int nentries, int maxwidth) | |
2086 | { | |
2087 | char *new_exts, *ptr; | |
2088 | int error, i, len; | |
2089 | ||
2090 | if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) { | |
2091 | return EINVAL; | |
2092 | } | |
2093 | ||
2094 | MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK); | |
2095 | ||
2096 | error = copyin(CAST_USER_ADDR_T(data), new_exts, nentries * maxwidth); | |
2097 | if (error) { | |
2098 | FREE(new_exts, M_TEMP); | |
2099 | return error; | |
2100 | } | |
2101 | ||
2102 | if (extension_table) { | |
2103 | FREE(extension_table, M_TEMP); | |
2104 | } | |
2105 | extension_table = new_exts; | |
2106 | nexts = nentries; | |
2107 | max_ext_width = maxwidth; | |
2108 | ||
2109 | qsort(extension_table, nexts, maxwidth, extension_cmp); | |
2110 | ||
2111 | return 0; | |
2112 | } | |
2113 | ||
2114 | ||
2115 | __private_extern__ int | |
2116 | is_package_name(char *name, int len) | |
2117 | { | |
2118 | int i, extlen; | |
2119 | char *ptr, *name_ext; | |
2120 | ||
2121 | if (len <= 3) { | |
2122 | return 0; | |
2123 | } | |
2124 | ||
2125 | name_ext = NULL; | |
2126 | for(ptr=name; *ptr != '\0'; ptr++) { | |
2127 | if (*ptr == '.') { | |
2128 | name_ext = ptr; | |
55e303ae | 2129 | } |
91447636 | 2130 | } |
55e303ae | 2131 | |
91447636 A |
2132 | // if there is no "." extension, it can't match |
2133 | if (name_ext == NULL) { | |
2134 | return 0; | |
2135 | } | |
55e303ae | 2136 | |
91447636 A |
2137 | // advance over the "." |
2138 | name_ext++; | |
55e303ae | 2139 | |
91447636 A |
2140 | // now iterate over all the extensions to see if any match |
2141 | ptr = &extension_table[0]; | |
2142 | for(i=0; i < nexts; i++, ptr+=max_ext_width) { | |
2143 | extlen = strlen(ptr); | |
2144 | if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') { | |
2145 | // aha, a match! | |
2146 | return 1; | |
55e303ae A |
2147 | } |
2148 | } | |
2149 | ||
91447636 A |
2150 | // if we get here, no extension matched |
2151 | return 0; | |
55e303ae A |
2152 | } |
2153 | ||
91447636 A |
2154 | int |
2155 | vn_path_package_check(__unused vnode_t vp, char *path, int pathlen, int *component) | |
55e303ae | 2156 | { |
91447636 A |
2157 | char *ptr, *end; |
2158 | int comp=0; | |
2159 | ||
2160 | *component = -1; | |
2161 | if (*path != '/') { | |
2162 | return EINVAL; | |
2163 | } | |
2164 | ||
2165 | end = path + 1; | |
2166 | while(end < path + pathlen && *end != '\0') { | |
2167 | while(end < path + pathlen && *end == '/' && *end != '\0') { | |
2168 | end++; | |
2169 | } | |
2170 | ||
2171 | ptr = end; | |
2172 | ||
2173 | while(end < path + pathlen && *end != '/' && *end != '\0') { | |
2174 | end++; | |
2175 | } | |
2176 | ||
2177 | if (end > path + pathlen) { | |
2178 | // hmm, string wasn't null terminated | |
2179 | return EINVAL; | |
2180 | } | |
2181 | ||
2182 | *end = '\0'; | |
2183 | if (is_package_name(ptr, end - ptr)) { | |
2184 | *component = comp; | |
2185 | break; | |
2186 | } | |
55e303ae | 2187 | |
91447636 A |
2188 | end++; |
2189 | comp++; | |
2190 | } | |
2191 | ||
2192 | return 0; | |
2193 | } | |
55e303ae A |
2194 | |
2195 | ||
1c79356b A |
2196 | /* |
2197 | * Top level filesystem related information gathering. | |
2198 | */ | |
91447636 A |
2199 | extern unsigned int vfs_nummntops; |
2200 | ||
1c79356b | 2201 | int |
91447636 A |
2202 | vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, |
2203 | user_addr_t newp, size_t newlen, struct proc *p) | |
1c79356b | 2204 | { |
91447636 | 2205 | struct vfstable *vfsp; |
55e303ae A |
2206 | int *username; |
2207 | u_int usernamelen; | |
2208 | int error; | |
91447636 | 2209 | struct vfsconf *vfsc; |
1c79356b | 2210 | |
9bccf70c A |
2211 | /* |
2212 | * The VFS_NUMMNTOPS shouldn't be at name[0] since | |
2213 | * is a VFS generic variable. So now we must check | |
2214 | * namelen so we don't end up covering any UFS | |
2215 | * variables (sinc UFS vfc_typenum is 1). | |
2216 | * | |
2217 | * It should have been: | |
2218 | * name[0]: VFS_GENERIC | |
2219 | * name[1]: VFS_NUMMNTOPS | |
2220 | */ | |
2221 | if (namelen == 1 && name[0] == VFS_NUMMNTOPS) { | |
1c79356b A |
2222 | return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops)); |
2223 | } | |
2224 | ||
2225 | /* all sysctl names at this level are at least name and field */ | |
2226 | if (namelen < 2) | |
55e303ae | 2227 | return (EISDIR); /* overloaded */ |
1c79356b | 2228 | if (name[0] != VFS_GENERIC) { |
91447636 A |
2229 | struct vfs_context context; |
2230 | ||
1c79356b A |
2231 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) |
2232 | if (vfsp->vfc_typenum == name[0]) | |
2233 | break; | |
2234 | if (vfsp == NULL) | |
91447636 A |
2235 | return (ENOTSUP); |
2236 | context.vc_proc = p; | |
2237 | context.vc_ucred = kauth_cred_get(); | |
2238 | ||
1c79356b | 2239 | return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, |
91447636 | 2240 | oldp, oldlenp, newp, newlen, &context)); |
1c79356b A |
2241 | } |
2242 | switch (name[1]) { | |
2243 | case VFS_MAXTYPENUM: | |
2244 | return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); | |
2245 | case VFS_CONF: | |
2246 | if (namelen < 3) | |
2247 | return (ENOTDIR); /* overloaded */ | |
2248 | for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) | |
2249 | if (vfsp->vfc_typenum == name[2]) | |
2250 | break; | |
2251 | if (vfsp == NULL) | |
91447636 A |
2252 | return (ENOTSUP); |
2253 | vfsc = (struct vfsconf *)vfsp; | |
2254 | if (proc_is64bit(p)) { | |
2255 | struct user_vfsconf usr_vfsc; | |
2256 | usr_vfsc.vfc_vfsops = CAST_USER_ADDR_T(vfsc->vfc_vfsops); | |
2257 | bcopy(vfsc->vfc_name, usr_vfsc.vfc_name, sizeof(usr_vfsc.vfc_name)); | |
2258 | usr_vfsc.vfc_typenum = vfsc->vfc_typenum; | |
2259 | usr_vfsc.vfc_refcount = vfsc->vfc_refcount; | |
2260 | usr_vfsc.vfc_flags = vfsc->vfc_flags; | |
2261 | usr_vfsc.vfc_mountroot = CAST_USER_ADDR_T(vfsc->vfc_mountroot); | |
2262 | usr_vfsc.vfc_next = CAST_USER_ADDR_T(vfsc->vfc_next); | |
2263 | return (sysctl_rdstruct(oldp, oldlenp, newp, &usr_vfsc, | |
2264 | sizeof(usr_vfsc))); | |
2265 | } | |
2266 | else { | |
2267 | return (sysctl_rdstruct(oldp, oldlenp, newp, vfsc, | |
2268 | sizeof(struct vfsconf))); | |
2269 | } | |
2270 | ||
2271 | case VFS_SET_PACKAGE_EXTS: | |
2272 | return set_package_extensions_table((void *)name[1], name[2], name[3]); | |
1c79356b | 2273 | } |
55e303ae A |
2274 | /* |
2275 | * We need to get back into the general MIB, so we need to re-prepend | |
2276 | * CTL_VFS to our name and try userland_sysctl(). | |
2277 | */ | |
2278 | usernamelen = namelen + 1; | |
2279 | MALLOC(username, int *, usernamelen * sizeof(*username), | |
2280 | M_TEMP, M_WAITOK); | |
2281 | bcopy(name, username + 1, namelen * sizeof(*name)); | |
2282 | username[0] = CTL_VFS; | |
91447636 A |
2283 | error = userland_sysctl(p, username, usernamelen, oldp, |
2284 | oldlenp, 1, newp, newlen, oldlenp); | |
55e303ae A |
2285 | FREE(username, M_TEMP); |
2286 | return (error); | |
1c79356b A |
2287 | } |
2288 | ||
2289 | int kinfo_vdebug = 1; | |
2290 | #define KINFO_VNODESLOP 10 | |
2291 | /* | |
2292 | * Dump vnode list (via sysctl). | |
2293 | * Copyout address of vnode followed by vnode. | |
2294 | */ | |
2295 | /* ARGSUSED */ | |
2296 | int | |
91447636 | 2297 | sysctl_vnode(__unused user_addr_t where, __unused size_t *sizep) |
1c79356b | 2298 | { |
91447636 | 2299 | #if 0 |
1c79356b A |
2300 | struct mount *mp, *nmp; |
2301 | struct vnode *nvp, *vp; | |
2302 | char *bp = where, *savebp; | |
2303 | char *ewhere; | |
2304 | int error; | |
2305 | ||
2306 | #define VPTRSZ sizeof (struct vnode *) | |
2307 | #define VNODESZ sizeof (struct vnode) | |
2308 | if (where == NULL) { | |
2309 | *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); | |
2310 | return (0); | |
2311 | } | |
2312 | ewhere = where + *sizep; | |
2313 | ||
1c79356b | 2314 | for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { |
91447636 | 2315 | if (vfs_busy(mp, LK_NOWAIT)) { |
1c79356b A |
2316 | nmp = mp->mnt_list.cqe_next; |
2317 | continue; | |
2318 | } | |
2319 | savebp = bp; | |
2320 | again: | |
91447636 | 2321 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { |
1c79356b A |
2322 | /* |
2323 | * Check that the vp is still associated with | |
2324 | * this filesystem. RACE: could have been | |
2325 | * recycled onto the same filesystem. | |
2326 | */ | |
2327 | if (vp->v_mount != mp) { | |
1c79356b A |
2328 | if (kinfo_vdebug) |
2329 | printf("kinfo: vp changed\n"); | |
2330 | bp = savebp; | |
2331 | goto again; | |
2332 | } | |
1c79356b | 2333 | if (bp + VPTRSZ + VNODESZ > ewhere) { |
91447636 | 2334 | vfs_unbusy(mp); |
1c79356b A |
2335 | *sizep = bp - where; |
2336 | return (ENOMEM); | |
2337 | } | |
1c79356b | 2338 | if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || |
55e303ae | 2339 | (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) { |
91447636 | 2340 | vfs_unbusy(mp); |
1c79356b | 2341 | return (error); |
55e303ae | 2342 | } |
1c79356b | 2343 | bp += VPTRSZ + VNODESZ; |
1c79356b | 2344 | } |
1c79356b | 2345 | nmp = mp->mnt_list.cqe_next; |
91447636 | 2346 | vfs_unbusy(mp); |
1c79356b | 2347 | } |
1c79356b A |
2348 | |
2349 | *sizep = bp - where; | |
2350 | return (0); | |
91447636 A |
2351 | #else |
2352 | return(EINVAL); | |
2353 | #endif | |
1c79356b A |
2354 | } |
2355 | ||
2356 | /* | |
2357 | * Check to see if a filesystem is mounted on a block device. | |
2358 | */ | |
2359 | int | |
2360 | vfs_mountedon(vp) | |
2361 | struct vnode *vp; | |
2362 | { | |
2363 | struct vnode *vq; | |
2364 | int error = 0; | |
2365 | ||
91447636 A |
2366 | SPECHASH_LOCK(); |
2367 | if (vp->v_specflags & SI_MOUNTEDON) { | |
2368 | error = EBUSY; | |
2369 | goto out; | |
2370 | } | |
1c79356b | 2371 | if (vp->v_flag & VALIASED) { |
1c79356b A |
2372 | for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { |
2373 | if (vq->v_rdev != vp->v_rdev || | |
2374 | vq->v_type != vp->v_type) | |
2375 | continue; | |
2376 | if (vq->v_specflags & SI_MOUNTEDON) { | |
2377 | error = EBUSY; | |
2378 | break; | |
2379 | } | |
2380 | } | |
1c79356b | 2381 | } |
91447636 A |
2382 | out: |
2383 | SPECHASH_UNLOCK(); | |
1c79356b A |
2384 | return (error); |
2385 | } | |
2386 | ||
2387 | /* | |
2388 | * Unmount all filesystems. The list is traversed in reverse order | |
2389 | * of mounting to avoid dependencies. | |
2390 | */ | |
0b4e3aa0 | 2391 | __private_extern__ void |
1c79356b A |
2392 | vfs_unmountall() |
2393 | { | |
91447636 | 2394 | struct mount *mp; |
0b4e3aa0 | 2395 | struct proc *p = current_proc(); |
91447636 | 2396 | int error; |
89b3af67 | 2397 | int skip_listremove; |
1c79356b A |
2398 | |
2399 | /* | |
2400 | * Since this only runs when rebooting, it is not interlocked. | |
2401 | */ | |
91447636 A |
2402 | mount_list_lock(); |
2403 | while(!TAILQ_EMPTY(&mountlist)) { | |
2404 | mp = TAILQ_LAST(&mountlist, mntlist); | |
2405 | mount_list_unlock(); | |
89b3af67 A |
2406 | skip_listremove = 0; |
2407 | error = dounmount(mp, MNT_FORCE, &skip_listremove, p); | |
91447636 A |
2408 | if (error) { |
2409 | mount_list_lock(); | |
89b3af67 A |
2410 | if (skip_listremove == 0) { |
2411 | TAILQ_REMOVE(&mountlist, mp, mnt_list); | |
2412 | printf("unmount of %s failed (", mp->mnt_vfsstat.f_mntonname); | |
2413 | } | |
2414 | ||
91447636 A |
2415 | if (error == EBUSY) |
2416 | printf("BUSY)\n"); | |
2417 | else | |
2418 | printf("%d)\n", error); | |
2419 | continue; | |
1c79356b | 2420 | } |
91447636 | 2421 | mount_list_lock(); |
1c79356b | 2422 | } |
91447636 | 2423 | mount_list_unlock(); |
1c79356b A |
2424 | } |
2425 | ||
1c79356b | 2426 | |
91447636 A |
2427 | /* |
2428 | * This routine is called from vnode_pager_no_senders() | |
2429 | * which in turn can be called with vnode locked by vnode_uncache() | |
2430 | * But it could also get called as a result of vm_object_cache_trim(). | |
2431 | * In that case lock state is unknown. | |
2432 | * AGE the vnode so that it gets recycled quickly. | |
1c79356b | 2433 | */ |
91447636 A |
2434 | __private_extern__ void |
2435 | vnode_pager_vrele(struct vnode *vp) | |
1c79356b | 2436 | { |
91447636 | 2437 | vnode_lock(vp); |
1c79356b | 2438 | |
91447636 A |
2439 | if (!ISSET(vp->v_lflag, VL_TERMINATE)) |
2440 | panic("vnode_pager_vrele: vp not in termination"); | |
2441 | vp->v_lflag &= ~VNAMED_UBC; | |
1c79356b | 2442 | |
91447636 A |
2443 | if (UBCINFOEXISTS(vp)) { |
2444 | struct ubc_info *uip = vp->v_ubcinfo; | |
1c79356b | 2445 | |
91447636 A |
2446 | if (ISSET(uip->ui_flags, UI_WASMAPPED)) |
2447 | SET(vp->v_flag, VWASMAPPED); | |
2448 | vp->v_ubcinfo = UBC_INFO_NULL; | |
2449 | ||
2450 | ubc_info_deallocate(uip); | |
2451 | } else { | |
2452 | panic("NO ubcinfo in vnode_pager_vrele"); | |
1c79356b | 2453 | } |
91447636 A |
2454 | vnode_unlock(vp); |
2455 | ||
2456 | wakeup(&vp->v_lflag); | |
1c79356b A |
2457 | } |
2458 | ||
91447636 A |
2459 | |
2460 | #include <sys/disk.h> | |
2461 | ||
2462 | errno_t | |
2463 | vfs_init_io_attributes(vnode_t devvp, mount_t mp) | |
1c79356b | 2464 | { |
91447636 A |
2465 | int error; |
2466 | off_t readblockcnt; | |
2467 | off_t writeblockcnt; | |
2468 | off_t readmaxcnt; | |
2469 | off_t writemaxcnt; | |
2470 | off_t readsegcnt; | |
2471 | off_t writesegcnt; | |
2472 | off_t readsegsize; | |
2473 | off_t writesegsize; | |
2474 | u_long blksize; | |
2475 | u_int64_t temp; | |
2476 | struct vfs_context context; | |
1c79356b | 2477 | |
91447636 | 2478 | proc_t p = current_proc(); |
0b4e3aa0 | 2479 | |
91447636 A |
2480 | context.vc_proc = p; |
2481 | context.vc_ucred = kauth_cred_get(); | |
0b4e3aa0 | 2482 | |
55e303ae A |
2483 | int isvirtual = 0; |
2484 | /* | |
2485 | * determine if this mount point exists on the same device as the root | |
2486 | * partition... if so, then it comes under the hard throttle control | |
2487 | */ | |
2488 | int thisunit = -1; | |
2489 | static int rootunit = -1; | |
55e303ae A |
2490 | |
2491 | if (rootunit == -1) { | |
91447636 | 2492 | if (VNOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, &context)) |
55e303ae A |
2493 | rootunit = -1; |
2494 | else if (rootvp == devvp) | |
2495 | mp->mnt_kern_flag |= MNTK_ROOTDEV; | |
2496 | } | |
2497 | if (devvp != rootvp && rootunit != -1) { | |
91447636 | 2498 | if (VNOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, &context) == 0) { |
55e303ae A |
2499 | if (thisunit == rootunit) |
2500 | mp->mnt_kern_flag |= MNTK_ROOTDEV; | |
2501 | } | |
2502 | } | |
91447636 A |
2503 | /* |
2504 | * force the spec device to re-cache | |
2505 | * the underlying block size in case | |
2506 | * the filesystem overrode the initial value | |
2507 | */ | |
2508 | set_fsblocksize(devvp); | |
2509 | ||
2510 | ||
2511 | if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, | |
2512 | (caddr_t)&blksize, 0, &context))) | |
2513 | return (error); | |
2514 | ||
2515 | mp->mnt_devblocksize = blksize; | |
2516 | ||
2517 | if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, &context) == 0) { | |
55e303ae A |
2518 | if (isvirtual) |
2519 | mp->mnt_kern_flag |= MNTK_VIRTUALDEV; | |
2520 | } | |
2521 | ||
91447636 A |
2522 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, |
2523 | (caddr_t)&readblockcnt, 0, &context))) | |
0b4e3aa0 A |
2524 | return (error); |
2525 | ||
91447636 A |
2526 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, |
2527 | (caddr_t)&writeblockcnt, 0, &context))) | |
55e303ae A |
2528 | return (error); |
2529 | ||
91447636 A |
2530 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, |
2531 | (caddr_t)&readmaxcnt, 0, &context))) | |
55e303ae A |
2532 | return (error); |
2533 | ||
91447636 A |
2534 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, |
2535 | (caddr_t)&writemaxcnt, 0, &context))) | |
0b4e3aa0 A |
2536 | return (error); |
2537 | ||
91447636 A |
2538 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, |
2539 | (caddr_t)&readsegcnt, 0, &context))) | |
0b4e3aa0 A |
2540 | return (error); |
2541 | ||
91447636 A |
2542 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE, |
2543 | (caddr_t)&writesegcnt, 0, &context))) | |
55e303ae A |
2544 | return (error); |
2545 | ||
91447636 A |
2546 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD, |
2547 | (caddr_t)&readsegsize, 0, &context))) | |
55e303ae A |
2548 | return (error); |
2549 | ||
91447636 A |
2550 | if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, |
2551 | (caddr_t)&writesegsize, 0, &context))) | |
0b4e3aa0 A |
2552 | return (error); |
2553 | ||
55e303ae A |
2554 | if (readmaxcnt) |
2555 | temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt; | |
2556 | else { | |
2557 | if (readblockcnt) { | |
2558 | temp = readblockcnt * blksize; | |
2559 | temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; | |
2560 | } else | |
2561 | temp = MAXPHYS; | |
2562 | } | |
91447636 | 2563 | mp->mnt_maxreadcnt = (u_int32_t)temp; |
55e303ae A |
2564 | |
2565 | if (writemaxcnt) | |
2566 | temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt; | |
2567 | else { | |
2568 | if (writeblockcnt) { | |
2569 | temp = writeblockcnt * blksize; | |
2570 | temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; | |
2571 | } else | |
2572 | temp = MAXPHYS; | |
2573 | } | |
0b4e3aa0 A |
2574 | mp->mnt_maxwritecnt = (u_int32_t)temp; |
2575 | ||
55e303ae A |
2576 | if (readsegcnt) { |
2577 | temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt; | |
2578 | mp->mnt_segreadcnt = (u_int16_t)temp; | |
2579 | } | |
2580 | if (writesegcnt) { | |
2581 | temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt; | |
2582 | mp->mnt_segwritecnt = (u_int16_t)temp; | |
2583 | } | |
2584 | if (readsegsize) | |
2585 | temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize; | |
2586 | else | |
2587 | temp = mp->mnt_maxreadcnt; | |
91447636 | 2588 | mp->mnt_maxsegreadsize = (u_int32_t)temp; |
0b4e3aa0 | 2589 | |
55e303ae A |
2590 | if (writesegsize) |
2591 | temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize; | |
2592 | else | |
2593 | temp = mp->mnt_maxwritecnt; | |
91447636 | 2594 | mp->mnt_maxsegwritesize = (u_int32_t)temp; |
0b4e3aa0 | 2595 | |
55e303ae A |
2596 | return (error); |
2597 | } | |
2598 | ||
2599 | static struct klist fs_klist; | |
2600 | ||
2601 | void | |
2602 | vfs_event_init(void) | |
2603 | { | |
2604 | ||
2605 | klist_init(&fs_klist); | |
2606 | } | |
2607 | ||
2608 | void | |
91447636 | 2609 | vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data) |
55e303ae A |
2610 | { |
2611 | ||
2612 | KNOTE(&fs_klist, event); | |
2613 | } | |
2614 | ||
2615 | /* | |
2616 | * return the number of mounted filesystems. | |
2617 | */ | |
2618 | static int | |
2619 | sysctl_vfs_getvfscnt(void) | |
2620 | { | |
91447636 A |
2621 | return(mount_getvfscnt()); |
2622 | } | |
2623 | ||
0b4e3aa0 | 2624 | |
91447636 A |
2625 | static int |
2626 | mount_getvfscnt(void) | |
2627 | { | |
2628 | int ret; | |
2629 | ||
2630 | mount_list_lock(); | |
2631 | ret = nummounts; | |
2632 | mount_list_unlock(); | |
55e303ae | 2633 | return (ret); |
91447636 A |
2634 | |
2635 | } | |
2636 | ||
2637 | ||
2638 | ||
2639 | static int | |
2640 | mount_fillfsids(fsid_t *fsidlst, int count) | |
2641 | { | |
2642 | struct mount *mp; | |
2643 | int actual=0; | |
2644 | ||
2645 | actual = 0; | |
2646 | mount_list_lock(); | |
2647 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
2648 | if (actual <= count) { | |
2649 | fsidlst[actual] = mp->mnt_vfsstat.f_fsid; | |
2650 | actual++; | |
2651 | } | |
2652 | } | |
2653 | mount_list_unlock(); | |
2654 | return (actual); | |
2655 | ||
55e303ae A |
2656 | } |
2657 | ||
2658 | /* | |
2659 | * fill in the array of fsid_t's up to a max of 'count', the actual | |
2660 | * number filled in will be set in '*actual'. If there are more fsid_t's | |
2661 | * than room in fsidlst then ENOMEM will be returned and '*actual' will | |
2662 | * have the actual count. | |
2663 | * having *actual filled out even in the error case is depended upon. | |
2664 | */ | |
2665 | static int | |
2666 | sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual) | |
2667 | { | |
2668 | struct mount *mp; | |
2669 | ||
2670 | *actual = 0; | |
91447636 A |
2671 | mount_list_lock(); |
2672 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
55e303ae A |
2673 | (*actual)++; |
2674 | if (*actual <= count) | |
91447636 | 2675 | fsidlst[(*actual) - 1] = mp->mnt_vfsstat.f_fsid; |
55e303ae | 2676 | } |
91447636 | 2677 | mount_list_unlock(); |
55e303ae A |
2678 | return (*actual <= count ? 0 : ENOMEM); |
2679 | } | |
2680 | ||
2681 | static int | |
2682 | sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS | |
2683 | { | |
2684 | int actual, error; | |
2685 | size_t space; | |
2686 | fsid_t *fsidlst; | |
2687 | ||
2688 | /* This is a readonly node. */ | |
91447636 | 2689 | if (req->newptr != USER_ADDR_NULL) |
55e303ae A |
2690 | return (EPERM); |
2691 | ||
2692 | /* they are querying us so just return the space required. */ | |
91447636 | 2693 | if (req->oldptr == USER_ADDR_NULL) { |
55e303ae A |
2694 | req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t); |
2695 | return 0; | |
2696 | } | |
2697 | again: | |
2698 | /* | |
2699 | * Retrieve an accurate count of the amount of space required to copy | |
2700 | * out all the fsids in the system. | |
2701 | */ | |
2702 | space = req->oldlen; | |
2703 | req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t); | |
2704 | ||
2705 | /* they didn't give us enough space. */ | |
2706 | if (space < req->oldlen) | |
2707 | return (ENOMEM); | |
2708 | ||
2709 | MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK); | |
2710 | error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t), | |
2711 | &actual); | |
2712 | /* | |
2713 | * If we get back ENOMEM, then another mount has been added while we | |
2714 | * slept in malloc above. If this is the case then try again. | |
2715 | */ | |
2716 | if (error == ENOMEM) { | |
2717 | FREE(fsidlst, M_TEMP); | |
2718 | req->oldlen = space; | |
2719 | goto again; | |
2720 | } | |
2721 | if (error == 0) { | |
2722 | error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t)); | |
2723 | } | |
2724 | FREE(fsidlst, M_TEMP); | |
2725 | return (error); | |
2726 | } | |
2727 | ||
2728 | /* | |
2729 | * Do a sysctl by fsid. | |
2730 | */ | |
2731 | static int | |
2732 | sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS | |
2733 | { | |
2734 | struct vfsidctl vc; | |
91447636 | 2735 | struct user_vfsidctl user_vc; |
55e303ae | 2736 | struct mount *mp; |
91447636 | 2737 | struct vfsstatfs *sp; |
55e303ae A |
2738 | struct proc *p; |
2739 | int *name; | |
2740 | int error, flags, namelen; | |
91447636 A |
2741 | struct vfs_context context; |
2742 | boolean_t is_64_bit; | |
55e303ae A |
2743 | |
2744 | name = arg1; | |
2745 | namelen = arg2; | |
2746 | p = req->p; | |
91447636 A |
2747 | context.vc_proc = p; |
2748 | context.vc_ucred = kauth_cred_get(); | |
2749 | is_64_bit = proc_is64bit(p); | |
55e303ae | 2750 | |
91447636 A |
2751 | if (is_64_bit) { |
2752 | error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); | |
2753 | if (error) | |
2754 | return (error); | |
2755 | if (user_vc.vc_vers != VFS_CTL_VERS1) | |
2756 | return (EINVAL); | |
2757 | mp = mount_list_lookupby_fsid(&user_vc.vc_fsid, 0, 0); | |
2758 | } | |
2759 | else { | |
2760 | error = SYSCTL_IN(req, &vc, sizeof(vc)); | |
2761 | if (error) | |
2762 | return (error); | |
2763 | if (vc.vc_vers != VFS_CTL_VERS1) | |
2764 | return (EINVAL); | |
2765 | mp = mount_list_lookupby_fsid(&vc.vc_fsid, 0, 0); | |
2766 | } | |
55e303ae A |
2767 | if (mp == NULL) |
2768 | return (ENOENT); | |
2769 | /* reset so that the fs specific code can fetch it. */ | |
2770 | req->newidx = 0; | |
2771 | /* | |
2772 | * Note if this is a VFS_CTL then we pass the actual sysctl req | |
2773 | * in for "oldp" so that the lower layer can DTRT and use the | |
2774 | * SYSCTL_IN/OUT routines. | |
2775 | */ | |
2776 | if (mp->mnt_op->vfs_sysctl != NULL) { | |
91447636 A |
2777 | if (is_64_bit) { |
2778 | if (vfs_64bitready(mp)) { | |
2779 | error = mp->mnt_op->vfs_sysctl(name, namelen, | |
2780 | CAST_USER_ADDR_T(req), | |
2781 | NULL, USER_ADDR_NULL, 0, | |
2782 | &context); | |
2783 | } | |
2784 | else { | |
2785 | error = ENOTSUP; | |
2786 | } | |
2787 | } | |
2788 | else { | |
2789 | error = mp->mnt_op->vfs_sysctl(name, namelen, | |
2790 | CAST_USER_ADDR_T(req), | |
2791 | NULL, USER_ADDR_NULL, 0, | |
2792 | &context); | |
2793 | } | |
2794 | if (error != ENOTSUP) | |
55e303ae A |
2795 | return (error); |
2796 | } | |
2797 | switch (name[0]) { | |
2798 | case VFS_CTL_UMOUNT: | |
91447636 A |
2799 | req->newidx = 0; |
2800 | if (is_64_bit) { | |
2801 | req->newptr = user_vc.vc_ptr; | |
2802 | req->newlen = (size_t)user_vc.vc_len; | |
2803 | } | |
2804 | else { | |
2805 | req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); | |
2806 | req->newlen = vc.vc_len; | |
2807 | } | |
55e303ae A |
2808 | error = SYSCTL_IN(req, &flags, sizeof(flags)); |
2809 | if (error) | |
2810 | break; | |
2811 | error = safedounmount(mp, flags, p); | |
2812 | break; | |
2813 | case VFS_CTL_STATFS: | |
91447636 A |
2814 | req->newidx = 0; |
2815 | if (is_64_bit) { | |
2816 | req->newptr = user_vc.vc_ptr; | |
2817 | req->newlen = (size_t)user_vc.vc_len; | |
2818 | } | |
2819 | else { | |
2820 | req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); | |
2821 | req->newlen = vc.vc_len; | |
2822 | } | |
55e303ae A |
2823 | error = SYSCTL_IN(req, &flags, sizeof(flags)); |
2824 | if (error) | |
2825 | break; | |
91447636 | 2826 | sp = &mp->mnt_vfsstat; |
55e303ae | 2827 | if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) && |
91447636 | 2828 | (error = vfs_update_vfsstat(mp, &context))) |
55e303ae | 2829 | return (error); |
91447636 A |
2830 | if (is_64_bit) { |
2831 | struct user_statfs sfs; | |
2832 | bzero(&sfs, sizeof(sfs)); | |
2833 | sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; | |
2834 | sfs.f_type = mp->mnt_vtable->vfc_typenum; | |
2835 | sfs.f_bsize = (user_long_t)sp->f_bsize; | |
2836 | sfs.f_iosize = (user_long_t)sp->f_iosize; | |
2837 | sfs.f_blocks = (user_long_t)sp->f_blocks; | |
2838 | sfs.f_bfree = (user_long_t)sp->f_bfree; | |
2839 | sfs.f_bavail = (user_long_t)sp->f_bavail; | |
2840 | sfs.f_files = (user_long_t)sp->f_files; | |
2841 | sfs.f_ffree = (user_long_t)sp->f_ffree; | |
2842 | sfs.f_fsid = sp->f_fsid; | |
2843 | sfs.f_owner = sp->f_owner; | |
2844 | ||
2845 | strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); | |
2846 | strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); | |
2847 | strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); | |
2848 | ||
2849 | error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); | |
2850 | } | |
2851 | else { | |
2852 | struct statfs sfs; | |
2853 | bzero(&sfs, sizeof(struct statfs)); | |
2854 | sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; | |
2855 | sfs.f_type = mp->mnt_vtable->vfc_typenum; | |
2856 | ||
2857 | /* | |
2858 | * It's possible for there to be more than 2^^31 blocks in the filesystem, so we | |
2859 | * have to fudge the numbers here in that case. We inflate the blocksize in order | |
2860 | * to reflect the filesystem size as best we can. | |
2861 | */ | |
2862 | if (sp->f_blocks > LONG_MAX) { | |
2863 | int shift; | |
2864 | ||
2865 | /* | |
2866 | * Work out how far we have to shift the block count down to make it fit. | |
2867 | * Note that it's possible to have to shift so far that the resulting | |
2868 | * blocksize would be unreportably large. At that point, we will clip | |
2869 | * any values that don't fit. | |
2870 | * | |
2871 | * For safety's sake, we also ensure that f_iosize is never reported as | |
2872 | * being smaller than f_bsize. | |
2873 | */ | |
2874 | for (shift = 0; shift < 32; shift++) { | |
2875 | if ((sp->f_blocks >> shift) <= LONG_MAX) | |
2876 | break; | |
2877 | if ((sp->f_bsize << (shift + 1)) > LONG_MAX) | |
2878 | break; | |
2879 | } | |
2880 | #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) | |
2881 | sfs.f_blocks = (long)__SHIFT_OR_CLIP(sp->f_blocks, shift); | |
2882 | sfs.f_bfree = (long)__SHIFT_OR_CLIP(sp->f_bfree, shift); | |
2883 | sfs.f_bavail = (long)__SHIFT_OR_CLIP(sp->f_bavail, shift); | |
2884 | #undef __SHIFT_OR_CLIP | |
2885 | sfs.f_bsize = (long)(sp->f_bsize << shift); | |
2886 | sfs.f_iosize = lmax(sp->f_iosize, sp->f_bsize); | |
2887 | } else { | |
2888 | sfs.f_bsize = (long)sp->f_bsize; | |
2889 | sfs.f_iosize = (long)sp->f_iosize; | |
2890 | sfs.f_blocks = (long)sp->f_blocks; | |
2891 | sfs.f_bfree = (long)sp->f_bfree; | |
2892 | sfs.f_bavail = (long)sp->f_bavail; | |
2893 | } | |
2894 | sfs.f_files = (long)sp->f_files; | |
2895 | sfs.f_ffree = (long)sp->f_ffree; | |
2896 | sfs.f_fsid = sp->f_fsid; | |
2897 | sfs.f_owner = sp->f_owner; | |
2898 | ||
2899 | strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); | |
2900 | strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); | |
2901 | strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); | |
2902 | ||
2903 | error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); | |
2904 | } | |
55e303ae A |
2905 | break; |
2906 | default: | |
91447636 | 2907 | return (ENOTSUP); |
55e303ae | 2908 | } |
0b4e3aa0 A |
2909 | return (error); |
2910 | } | |
2911 | ||
55e303ae A |
2912 | static int filt_fsattach(struct knote *kn); |
2913 | static void filt_fsdetach(struct knote *kn); | |
2914 | static int filt_fsevent(struct knote *kn, long hint); | |
2915 | ||
2916 | struct filterops fs_filtops = | |
2917 | { 0, filt_fsattach, filt_fsdetach, filt_fsevent }; | |
2918 | ||
2919 | static int | |
2920 | filt_fsattach(struct knote *kn) | |
2921 | { | |
2922 | ||
2923 | kn->kn_flags |= EV_CLEAR; | |
2924 | KNOTE_ATTACH(&fs_klist, kn); | |
2925 | return (0); | |
2926 | } | |
2927 | ||
2928 | static void | |
2929 | filt_fsdetach(struct knote *kn) | |
2930 | { | |
2931 | ||
2932 | KNOTE_DETACH(&fs_klist, kn); | |
2933 | } | |
2934 | ||
2935 | static int | |
2936 | filt_fsevent(struct knote *kn, long hint) | |
2937 | { | |
2938 | ||
2939 | kn->kn_fflags |= hint; | |
2940 | return (kn->kn_fflags != 0); | |
2941 | } | |
2942 | ||
2943 | static int | |
2944 | sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS | |
2945 | { | |
2946 | int out, error; | |
2947 | pid_t pid; | |
2948 | size_t space; | |
2949 | struct proc *p; | |
2950 | ||
2951 | /* We need a pid. */ | |
91447636 | 2952 | if (req->newptr == USER_ADDR_NULL) |
55e303ae A |
2953 | return (EINVAL); |
2954 | ||
2955 | error = SYSCTL_IN(req, &pid, sizeof(pid)); | |
2956 | if (error) | |
2957 | return (error); | |
2958 | ||
2959 | p = pfind(pid < 0 ? -pid : pid); | |
2960 | if (p == NULL) | |
2961 | return (ESRCH); | |
2962 | ||
2963 | /* | |
2964 | * Fetching the value is ok, but we only fetch if the old | |
2965 | * pointer is given. | |
2966 | */ | |
91447636 | 2967 | if (req->oldptr != USER_ADDR_NULL) { |
55e303ae A |
2968 | out = !((p->p_flag & P_NOREMOTEHANG) == 0); |
2969 | error = SYSCTL_OUT(req, &out, sizeof(out)); | |
2970 | return (error); | |
2971 | } | |
2972 | ||
89b3af67 | 2973 | /* XXX req->p->p_ucred -> kauth_cred_get() - current unsafe ??? */ |
55e303ae A |
2974 | /* cansignal offers us enough security. */ |
2975 | if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0) | |
2976 | return (EPERM); | |
2977 | ||
2978 | if (pid < 0) | |
2979 | p->p_flag &= ~P_NOREMOTEHANG; | |
2980 | else | |
2981 | p->p_flag |= P_NOREMOTEHANG; | |
2982 | ||
2983 | return (0); | |
2984 | } | |
2985 | /* the vfs.generic. branch. */ | |
2986 | SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge"); | |
2987 | /* retreive a list of mounted filesystem fsid_t */ | |
2988 | SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD, | |
2989 | 0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids"); | |
2990 | /* perform operations on filesystem via fsid_t */ | |
2991 | SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW, | |
2992 | sysctl_vfs_ctlbyfsid, "ctlbyfsid"); | |
2993 | SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW, | |
2994 | 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang"); | |
91447636 A |
2995 | |
2996 | ||
2997 | int num_reusedvnodes=0; | |
55e303ae | 2998 | |
91447636 A |
2999 | static int |
3000 | new_vnode(vnode_t *vpp) | |
3001 | { | |
3002 | vnode_t vp; | |
3003 | int retries = 0; /* retry incase of tablefull */ | |
3004 | int vpid; | |
3005 | struct timespec ts; | |
3006 | ||
3007 | retry: | |
3008 | vnode_list_lock(); | |
3009 | ||
3010 | if ( !TAILQ_EMPTY(&vnode_free_list)) { | |
3011 | /* | |
3012 | * Pick the first vp for possible reuse | |
3013 | */ | |
3014 | vp = TAILQ_FIRST(&vnode_free_list); | |
3015 | ||
3016 | if (vp->v_lflag & VL_DEAD) | |
3017 | goto steal_this_vp; | |
3018 | } else | |
3019 | vp = NULL; | |
3020 | ||
3021 | /* | |
3022 | * we're either empty, or the next guy on the | |
3023 | * list is a valid vnode... if we're under the | |
3024 | * limit, we'll create a new vnode | |
3025 | */ | |
3026 | if (numvnodes < desiredvnodes) { | |
3027 | numvnodes++; | |
3028 | vnode_list_unlock(); | |
3029 | MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK); | |
3030 | bzero((char *)vp, sizeof *vp); | |
3031 | VLISTNONE(vp); /* avoid double queue removal */ | |
3032 | lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr); | |
3033 | ||
3034 | nanouptime(&ts); | |
3035 | vp->v_id = ts.tv_nsec; | |
3036 | vp->v_flag = VSTANDARD; | |
3037 | ||
3038 | goto done; | |
3039 | } | |
3040 | if (vp == NULL) { | |
3041 | /* | |
3042 | * we've reached the system imposed maximum number of vnodes | |
3043 | * but there isn't a single one available | |
3044 | * wait a bit and then retry... if we can't get a vnode | |
3045 | * after 100 retries, than log a complaint | |
3046 | */ | |
3047 | if (++retries <= 100) { | |
3048 | vnode_list_unlock(); | |
3049 | IOSleep(1); | |
3050 | goto retry; | |
3051 | } | |
3052 | ||
3053 | vnode_list_unlock(); | |
3054 | tablefull("vnode"); | |
3055 | log(LOG_EMERG, "%d desired, %d numvnodes, " | |
3056 | "%d free, %d inactive\n", | |
3057 | desiredvnodes, numvnodes, freevnodes, inactivevnodes); | |
3058 | *vpp = 0; | |
3059 | return (ENFILE); | |
3060 | } | |
3061 | steal_this_vp: | |
3062 | vpid = vp->v_id; | |
3063 | ||
3064 | VREMFREE("new_vnode", vp); | |
3065 | VLISTNONE(vp); | |
3066 | ||
3067 | vnode_list_unlock(); | |
3068 | vnode_lock(vp); | |
3069 | ||
3070 | /* | |
3071 | * We could wait for the vnode_lock after removing the vp from the freelist | |
3072 | * and the vid is bumped only at the very end of reclaim. So it is possible | |
3073 | * that we are looking at a vnode that is being terminated. If so skip it. | |
3074 | */ | |
3075 | if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || | |
3076 | VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) { | |
3077 | /* | |
3078 | * we lost the race between dropping the list lock | |
3079 | * and picking up the vnode_lock... someone else | |
3080 | * used this vnode and it is now in a new state | |
3081 | * so we need to go back and try again | |
3082 | */ | |
3083 | vnode_unlock(vp); | |
3084 | goto retry; | |
3085 | } | |
3086 | if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) { | |
3087 | /* | |
3088 | * we did a vnode_rele_ext that asked for | |
3089 | * us not to reenter the filesystem during | |
3090 | * the release even though VL_NEEDINACTIVE was | |
3091 | * set... we'll do it here by doing a | |
3092 | * vnode_get/vnode_put | |
3093 | * | |
3094 | * pick up an iocount so that we can call | |
3095 | * vnode_put and drive the VNOP_INACTIVE... | |
3096 | * vnode_put will either leave us off | |
3097 | * the freelist if a new ref comes in, | |
3098 | * or put us back on the end of the freelist | |
3099 | * or recycle us if we were marked for termination... | |
3100 | * so we'll just go grab a new candidate | |
3101 | */ | |
3102 | vp->v_iocount++; | |
3103 | #ifdef JOE_DEBUG | |
3104 | record_vp(vp, 1); | |
3105 | #endif | |
3106 | vnode_put_locked(vp); | |
3107 | vnode_unlock(vp); | |
3108 | goto retry; | |
3109 | } | |
3110 | OSAddAtomic(1, &num_reusedvnodes); | |
3111 | ||
3112 | /* Checks for anyone racing us for recycle */ | |
3113 | if (vp->v_type != VBAD) { | |
3114 | if (vp->v_lflag & VL_DEAD) | |
3115 | panic("new_vnode: the vnode is VL_DEAD but not VBAD"); | |
3116 | ||
3117 | (void)vnode_reclaim_internal(vp, 1, 1); | |
3118 | ||
3119 | if ((VONLIST(vp))) | |
3120 | panic("new_vnode: vp on list "); | |
3121 | if (vp->v_usecount || vp->v_iocount || vp->v_kusecount || | |
3122 | (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH))) | |
3123 | panic("new_vnode: free vnode still referenced\n"); | |
3124 | if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0)) | |
3125 | panic("new_vnode: vnode seems to be on mount list "); | |
3126 | if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren)) | |
3127 | panic("new_vnode: vnode still hooked into the name cache"); | |
3128 | } | |
3129 | if (vp->v_unsafefs) { | |
3130 | lck_mtx_destroy(&vp->v_unsafefs->fsnodelock, vnode_lck_grp); | |
3131 | FREE_ZONE((void *)vp->v_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS); | |
3132 | vp->v_unsafefs = (struct unsafe_fsnode *)NULL; | |
3133 | } | |
3134 | vp->v_lflag = 0; | |
3135 | vp->v_writecount = 0; | |
3136 | vp->v_references = 0; | |
3137 | vp->v_iterblkflags = 0; | |
3138 | vp->v_flag = VSTANDARD; | |
3139 | /* vbad vnodes can point to dead_mountp */ | |
3140 | vp->v_mount = 0; | |
3141 | vp->v_defer_reclaimlist = (vnode_t)0; | |
3142 | ||
3143 | vnode_unlock(vp); | |
3144 | done: | |
3145 | *vpp = vp; | |
3146 | ||
3147 | return (0); | |
3148 | } | |
3149 | ||
3150 | void | |
3151 | vnode_lock(vnode_t vp) | |
3152 | { | |
3153 | lck_mtx_lock(&vp->v_lock); | |
3154 | } | |
3155 | ||
3156 | void | |
3157 | vnode_unlock(vnode_t vp) | |
3158 | { | |
3159 | lck_mtx_unlock(&vp->v_lock); | |
3160 | } | |
3161 | ||
3162 | ||
3163 | ||
3164 | int | |
3165 | vnode_get(struct vnode *vp) | |
3166 | { | |
3167 | vnode_lock(vp); | |
3168 | ||
3169 | if ( (vp->v_iocount == 0) && (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) ) { | |
3170 | vnode_unlock(vp); | |
3171 | return(ENOENT); | |
3172 | } | |
3173 | vp->v_iocount++; | |
3174 | #ifdef JOE_DEBUG | |
3175 | record_vp(vp, 1); | |
3176 | #endif | |
3177 | vnode_unlock(vp); | |
3178 | ||
3179 | return(0); | |
3180 | } | |
3181 | ||
3182 | int | |
3183 | vnode_getwithvid(vnode_t vp, int vid) | |
3184 | { | |
3185 | return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID))); | |
3186 | } | |
3187 | ||
3188 | int | |
3189 | vnode_getwithref(vnode_t vp) | |
3190 | { | |
3191 | return(vget_internal(vp, 0, 0)); | |
3192 | } | |
3193 | ||
3194 | ||
3195 | int | |
3196 | vnode_put(vnode_t vp) | |
3197 | { | |
3198 | int retval; | |
3199 | ||
3200 | vnode_lock(vp); | |
3201 | retval = vnode_put_locked(vp); | |
3202 | vnode_unlock(vp); | |
3203 | ||
3204 | return(retval); | |
3205 | } | |
3206 | ||
3207 | int | |
3208 | vnode_put_locked(vnode_t vp) | |
3209 | { | |
3210 | struct vfs_context context; | |
3211 | ||
3212 | retry: | |
3213 | if (vp->v_iocount < 1) | |
3214 | panic("vnode_put(%x): iocount < 1", vp); | |
3215 | ||
3216 | if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { | |
3217 | vnode_dropiocount(vp, 1); | |
3218 | return(0); | |
3219 | } | |
3220 | if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) { | |
3221 | ||
3222 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
3223 | vnode_unlock(vp); | |
3224 | ||
3225 | context.vc_proc = current_proc(); | |
3226 | context.vc_ucred = kauth_cred_get(); | |
3227 | VNOP_INACTIVE(vp, &context); | |
3228 | ||
3229 | vnode_lock(vp); | |
3230 | /* | |
3231 | * because we had to drop the vnode lock before calling | |
3232 | * VNOP_INACTIVE, the state of this vnode may have changed... | |
3233 | * we may pick up both VL_MARTERM and either | |
3234 | * an iocount or a usecount while in the VNOP_INACTIVE call | |
3235 | * we don't want to call vnode_reclaim_internal on a vnode | |
3236 | * that has active references on it... so loop back around | |
3237 | * and reevaluate the state | |
3238 | */ | |
3239 | goto retry; | |
3240 | } | |
3241 | vp->v_lflag &= ~VL_NEEDINACTIVE; | |
3242 | ||
3243 | if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM) | |
3244 | vnode_reclaim_internal(vp, 1, 0); | |
3245 | ||
3246 | vnode_dropiocount(vp, 1); | |
3247 | vnode_list_add(vp); | |
3248 | ||
3249 | return(0); | |
3250 | } | |
3251 | ||
3252 | /* is vnode_t in use by others? */ | |
3253 | int | |
3254 | vnode_isinuse(vnode_t vp, int refcnt) | |
3255 | { | |
3256 | return(vnode_isinuse_locked(vp, refcnt, 0)); | |
3257 | } | |
3258 | ||
3259 | ||
3260 | static int | |
3261 | vnode_isinuse_locked(vnode_t vp, int refcnt, int locked) | |
3262 | { | |
3263 | int retval = 0; | |
3264 | ||
3265 | if (!locked) | |
3266 | vnode_lock(vp); | |
3267 | if ((vp->v_type != VREG) && (vp->v_usecount > refcnt)) { | |
3268 | retval = 1; | |
3269 | goto out; | |
3270 | } | |
3271 | if (vp->v_type == VREG) { | |
3272 | retval = ubc_isinuse_locked(vp, refcnt, 1); | |
3273 | } | |
3274 | ||
3275 | out: | |
3276 | if (!locked) | |
3277 | vnode_unlock(vp); | |
3278 | return(retval); | |
3279 | } | |
3280 | ||
3281 | ||
3282 | /* resume vnode_t */ | |
3283 | errno_t | |
3284 | vnode_resume(vnode_t vp) | |
3285 | { | |
3286 | ||
3287 | vnode_lock(vp); | |
3288 | ||
3289 | if (vp->v_owner == current_thread()) { | |
3290 | vp->v_lflag &= ~VL_SUSPENDED; | |
3291 | vp->v_owner = 0; | |
3292 | vnode_unlock(vp); | |
3293 | wakeup(&vp->v_iocount); | |
3294 | } else | |
3295 | vnode_unlock(vp); | |
3296 | ||
3297 | return(0); | |
3298 | } | |
3299 | ||
3300 | static errno_t | |
3301 | vnode_drain(vnode_t vp) | |
3302 | { | |
3303 | ||
3304 | if (vp->v_lflag & VL_DRAIN) { | |
3305 | panic("vnode_drain: recursuve drain"); | |
3306 | return(ENOENT); | |
3307 | } | |
3308 | vp->v_lflag |= VL_DRAIN; | |
3309 | vp->v_owner = current_thread(); | |
3310 | ||
3311 | while (vp->v_iocount > 1) | |
3312 | msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", 0); | |
3313 | return(0); | |
3314 | } | |
3315 | ||
3316 | ||
3317 | /* | |
3318 | * if the number of recent references via vnode_getwithvid or vnode_getwithref | |
3319 | * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from | |
3320 | * the LRU list if it's currently on it... once the iocount and usecount both drop | |
3321 | * to 0, it will get put back on the end of the list, effectively making it younger | |
3322 | * this allows us to keep actively referenced vnodes in the list without having | |
3323 | * to constantly remove and add to the list each time a vnode w/o a usecount is | |
3324 | * referenced which costs us taking and dropping a global lock twice. | |
3325 | */ | |
3326 | #define UNAGE_THRESHHOLD 10 | |
3327 | ||
3328 | errno_t | |
3329 | vnode_getiocount(vnode_t vp, int locked, int vid, int vflags) | |
3330 | { | |
3331 | int nodead = vflags & VNODE_NODEAD; | |
3332 | int nosusp = vflags & VNODE_NOSUSPEND; | |
3333 | ||
3334 | if (!locked) | |
3335 | vnode_lock(vp); | |
3336 | ||
3337 | for (;;) { | |
3338 | /* | |
3339 | * if it is a dead vnode with deadfs | |
3340 | */ | |
3341 | if (nodead && (vp->v_lflag & VL_DEAD) && ((vp->v_type == VBAD) || (vp->v_data == 0))) { | |
3342 | if (!locked) | |
3343 | vnode_unlock(vp); | |
3344 | return(ENOENT); | |
3345 | } | |
3346 | /* | |
3347 | * will return VL_DEAD ones | |
3348 | */ | |
3349 | if ((vp->v_lflag & (VL_SUSPENDED | VL_DRAIN | VL_TERMINATE)) == 0 ) { | |
3350 | break; | |
3351 | } | |
3352 | /* | |
3353 | * if suspended vnodes are to be failed | |
3354 | */ | |
3355 | if (nosusp && (vp->v_lflag & VL_SUSPENDED)) { | |
3356 | if (!locked) | |
3357 | vnode_unlock(vp); | |
3358 | return(ENOENT); | |
3359 | } | |
3360 | /* | |
3361 | * if you are the owner of drain/suspend/termination , can acquire iocount | |
3362 | * check for VL_TERMINATE; it does not set owner | |
3363 | */ | |
3364 | if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED | VL_TERMINATE)) && | |
3365 | (vp->v_owner == current_thread())) { | |
3366 | break; | |
3367 | } | |
3368 | if (vp->v_lflag & VL_TERMINATE) { | |
3369 | vp->v_lflag |= VL_TERMWANT; | |
3370 | ||
3371 | msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vnode getiocount", 0); | |
3372 | } else | |
3373 | msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", 0); | |
3374 | } | |
3375 | if (vid != vp->v_id) { | |
3376 | if (!locked) | |
3377 | vnode_unlock(vp); | |
3378 | return(ENOENT); | |
3379 | } | |
3380 | if (++vp->v_references >= UNAGE_THRESHHOLD) { | |
3381 | vp->v_references = 0; | |
3382 | vnode_list_remove(vp); | |
3383 | } | |
3384 | vp->v_iocount++; | |
3385 | #ifdef JOE_DEBUG | |
3386 | record_vp(vp, 1); | |
3387 | #endif | |
3388 | if (!locked) | |
3389 | vnode_unlock(vp); | |
3390 | return(0); | |
3391 | } | |
3392 | ||
3393 | static void | |
3394 | vnode_dropiocount (vnode_t vp, int locked) | |
3395 | { | |
3396 | if (!locked) | |
3397 | vnode_lock(vp); | |
3398 | if (vp->v_iocount < 1) | |
3399 | panic("vnode_dropiocount(%x): v_iocount < 1", vp); | |
3400 | ||
3401 | vp->v_iocount--; | |
3402 | #ifdef JOE_DEBUG | |
3403 | record_vp(vp, -1); | |
3404 | #endif | |
3405 | if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1)) | |
3406 | wakeup(&vp->v_iocount); | |
3407 | ||
3408 | if (!locked) | |
3409 | vnode_unlock(vp); | |
3410 | } | |
3411 | ||
3412 | ||
3413 | void | |
3414 | vnode_reclaim(struct vnode * vp) | |
3415 | { | |
3416 | vnode_reclaim_internal(vp, 0, 0); | |
3417 | } | |
3418 | ||
3419 | __private_extern__ | |
3420 | void | |
3421 | vnode_reclaim_internal(struct vnode * vp, int locked, int reuse) | |
3422 | { | |
3423 | int isfifo = 0; | |
3424 | ||
3425 | if (!locked) | |
3426 | vnode_lock(vp); | |
3427 | ||
3428 | if (vp->v_lflag & VL_TERMINATE) { | |
3429 | panic("vnode reclaim in progress"); | |
3430 | } | |
3431 | vp->v_lflag |= VL_TERMINATE; | |
3432 | ||
3433 | if (vnode_drain(vp)) { | |
3434 | panic("vnode drain failed"); | |
3435 | vnode_unlock(vp); | |
3436 | return; | |
3437 | } | |
3438 | isfifo = (vp->v_type == VFIFO); | |
3439 | ||
3440 | if (vp->v_type != VBAD) | |
3441 | vgone(vp); /* clean and reclaim the vnode */ | |
3442 | ||
3443 | /* | |
3444 | * give the vnode a new identity so | |
3445 | * that vnode_getwithvid will fail | |
3446 | * on any stale cache accesses | |
3447 | */ | |
3448 | vp->v_id++; | |
3449 | if (isfifo) { | |
3450 | struct fifoinfo * fip; | |
3451 | ||
3452 | fip = vp->v_fifoinfo; | |
3453 | vp->v_fifoinfo = NULL; | |
3454 | FREE(fip, M_TEMP); | |
3455 | } | |
3456 | ||
3457 | vp->v_type = VBAD; | |
3458 | ||
3459 | if (vp->v_data) | |
3460 | panic("vnode_reclaim_internal: cleaned vnode isn't"); | |
3461 | if (vp->v_numoutput) | |
3462 | panic("vnode_reclaim_internal: Clean vnode has pending I/O's"); | |
3463 | if (UBCINFOEXISTS(vp)) | |
3464 | panic("vnode_reclaim_internal: ubcinfo not cleaned"); | |
3465 | if (vp->v_parent) | |
3466 | panic("vnode_reclaim_internal: vparent not removed"); | |
3467 | if (vp->v_name) | |
3468 | panic("vnode_reclaim_internal: vname not removed"); | |
3469 | ||
3470 | vp->v_socket = 0; | |
3471 | ||
3472 | vp->v_lflag &= ~VL_TERMINATE; | |
3473 | vp->v_lflag &= ~VL_DRAIN; | |
3474 | vp->v_owner = 0; | |
3475 | ||
3476 | if (vp->v_lflag & VL_TERMWANT) { | |
3477 | vp->v_lflag &= ~VL_TERMWANT; | |
3478 | wakeup(&vp->v_lflag); | |
3479 | } | |
3480 | if (!reuse && vp->v_usecount == 0) | |
3481 | vnode_list_add(vp); | |
3482 | if (!locked) | |
3483 | vnode_unlock(vp); | |
3484 | } | |
3485 | ||
3486 | /* USAGE: | |
3487 | * The following api creates a vnode and associates all the parameter specified in vnode_fsparam | |
3488 | * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias | |
3489 | * is obsoleted by this. | |
3490 | * vnode_create(int flavor, size_t size, void * param, vnode_t *vp) | |
3491 | */ | |
3492 | int | |
3493 | vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) | |
3494 | { | |
3495 | int error; | |
3496 | int insert = 1; | |
3497 | vnode_t vp; | |
3498 | vnode_t nvp; | |
3499 | vnode_t dvp; | |
3500 | struct componentname *cnp; | |
3501 | struct vnode_fsparam *param = (struct vnode_fsparam *)data; | |
3502 | ||
3503 | if (flavor == VNCREATE_FLAVOR && (size == VCREATESIZE) && param) { | |
3504 | if ( (error = new_vnode(&vp)) ) { | |
3505 | return(error); | |
3506 | } else { | |
3507 | dvp = param->vnfs_dvp; | |
3508 | cnp = param->vnfs_cnp; | |
3509 | ||
3510 | vp->v_op = param->vnfs_vops; | |
3511 | vp->v_type = param->vnfs_vtype; | |
3512 | vp->v_data = param->vnfs_fsnode; | |
3513 | vp->v_iocount = 1; | |
3514 | ||
3515 | if (param->vnfs_markroot) | |
3516 | vp->v_flag |= VROOT; | |
3517 | if (param->vnfs_marksystem) | |
3518 | vp->v_flag |= VSYSTEM; | |
3519 | else if (vp->v_type == VREG) { | |
3520 | /* | |
3521 | * only non SYSTEM vp | |
3522 | */ | |
3523 | error = ubc_info_init_withsize(vp, param->vnfs_filesize); | |
3524 | if (error) { | |
3525 | #ifdef JOE_DEBUG | |
3526 | record_vp(vp, 1); | |
3527 | #endif | |
3528 | vp->v_mount = 0; | |
3529 | vp->v_op = dead_vnodeop_p; | |
3530 | vp->v_tag = VT_NON; | |
3531 | vp->v_data = NULL; | |
3532 | vp->v_type = VBAD; | |
3533 | vp->v_lflag |= VL_DEAD; | |
3534 | ||
3535 | vnode_put(vp); | |
3536 | return(error); | |
3537 | } | |
3538 | } | |
3539 | #ifdef JOE_DEBUG | |
3540 | record_vp(vp, 1); | |
3541 | #endif | |
3542 | if (vp->v_type == VCHR || vp->v_type == VBLK) { | |
3543 | ||
3544 | if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) { | |
3545 | /* | |
3546 | * if checkalias returns a vnode, it will be locked | |
3547 | * | |
3548 | * first get rid of the unneeded vnode we acquired | |
3549 | */ | |
3550 | vp->v_data = NULL; | |
3551 | vp->v_op = spec_vnodeop_p; | |
3552 | vp->v_type = VBAD; | |
3553 | vp->v_lflag = VL_DEAD; | |
3554 | vp->v_data = NULL; | |
3555 | vp->v_tag = VT_NON; | |
3556 | vnode_put(vp); | |
3557 | ||
3558 | /* | |
3559 | * switch to aliased vnode and finish | |
3560 | * preparing it | |
3561 | */ | |
3562 | vp = nvp; | |
3563 | ||
3564 | vclean(vp, 0, current_proc()); | |
3565 | vp->v_op = param->vnfs_vops; | |
3566 | vp->v_type = param->vnfs_vtype; | |
3567 | vp->v_data = param->vnfs_fsnode; | |
3568 | vp->v_lflag = 0; | |
3569 | vp->v_mount = NULL; | |
3570 | insmntque(vp, param->vnfs_mp); | |
3571 | insert = 0; | |
3572 | vnode_unlock(vp); | |
3573 | } | |
3574 | } | |
3575 | ||
3576 | if (vp->v_type == VFIFO) { | |
3577 | struct fifoinfo *fip; | |
3578 | ||
3579 | MALLOC(fip, struct fifoinfo *, | |
3580 | sizeof(*fip), M_TEMP, M_WAITOK); | |
3581 | bzero(fip, sizeof(struct fifoinfo )); | |
3582 | vp->v_fifoinfo = fip; | |
3583 | } | |
3584 | /* The file systems usually pass the address of the location where | |
3585 | * where there store the vnode pointer. When we add the vnode in mount | |
3586 | * point and name cache they are discoverable. So the file system node | |
3587 | * will have the connection to vnode setup by then | |
3588 | */ | |
3589 | *vpp = vp; | |
3590 | ||
3591 | if (param->vnfs_mp) { | |
3592 | if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL) | |
3593 | vp->v_flag |= VLOCKLOCAL; | |
3594 | if (insert) { | |
3595 | /* | |
3596 | * enter in mount vnode list | |
3597 | */ | |
3598 | insmntque(vp, param->vnfs_mp); | |
3599 | } | |
3600 | #ifdef INTERIM_FSNODE_LOCK | |
3601 | if (param->vnfs_mp->mnt_vtable->vfc_threadsafe == 0) { | |
3602 | MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *, | |
3603 | sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK); | |
3604 | vp->v_unsafefs->fsnode_count = 0; | |
3605 | vp->v_unsafefs->fsnodeowner = (void *)NULL; | |
3606 | lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr); | |
3607 | } | |
3608 | #endif /* INTERIM_FSNODE_LOCK */ | |
3609 | } | |
3610 | if (dvp && vnode_ref(dvp) == 0) { | |
3611 | vp->v_parent = dvp; | |
3612 | } | |
3613 | if (cnp) { | |
3614 | if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) { | |
3615 | /* | |
3616 | * enter into name cache | |
3617 | * we've got the info to enter it into the name cache now | |
3618 | */ | |
3619 | cache_enter(dvp, vp, cnp); | |
3620 | } | |
3621 | vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); | |
3622 | } | |
3623 | if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) { | |
3624 | /* | |
3625 | * this vnode is being created as cacheable in the name cache | |
3626 | * this allows us to re-enter it in the cache | |
3627 | */ | |
3628 | vp->v_flag |= VNCACHEABLE; | |
3629 | } | |
3630 | if ((vp->v_flag & VSYSTEM) && (vp->v_type != VREG)) | |
3631 | panic("incorrect vnode setup"); | |
3632 | ||
3633 | return(0); | |
3634 | } | |
3635 | } | |
3636 | return (EINVAL); | |
3637 | } | |
3638 | ||
3639 | int | |
3640 | vnode_addfsref(vnode_t vp) | |
3641 | { | |
3642 | vnode_lock(vp); | |
3643 | if (vp->v_lflag & VNAMED_FSHASH) | |
3644 | panic("add_fsref: vp already has named reference"); | |
3645 | if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) | |
3646 | panic("addfsref: vp on the free list\n"); | |
3647 | vp->v_lflag |= VNAMED_FSHASH; | |
3648 | vnode_unlock(vp); | |
3649 | return(0); | |
3650 | ||
3651 | } | |
3652 | int | |
3653 | vnode_removefsref(vnode_t vp) | |
3654 | { | |
3655 | vnode_lock(vp); | |
3656 | if ((vp->v_lflag & VNAMED_FSHASH) == 0) | |
3657 | panic("remove_fsref: no named reference"); | |
3658 | vp->v_lflag &= ~VNAMED_FSHASH; | |
3659 | vnode_unlock(vp); | |
3660 | return(0); | |
3661 | ||
3662 | } | |
3663 | ||
3664 | ||
3665 | int | |
3666 | vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg) | |
3667 | { | |
3668 | mount_t mp; | |
3669 | int ret = 0; | |
3670 | fsid_t * fsid_list; | |
3671 | int count, actualcount, i; | |
3672 | void * allocmem; | |
3673 | ||
3674 | count = mount_getvfscnt(); | |
3675 | count += 10; | |
3676 | ||
3677 | fsid_list = (fsid_t *)kalloc(count * sizeof(fsid_t)); | |
3678 | allocmem = (void *)fsid_list; | |
3679 | ||
3680 | actualcount = mount_fillfsids(fsid_list, count); | |
3681 | ||
3682 | for (i=0; i< actualcount; i++) { | |
3683 | ||
3684 | /* obtain the mount point with iteration reference */ | |
3685 | mp = mount_list_lookupby_fsid(&fsid_list[i], 0, 1); | |
3686 | ||
3687 | if(mp == (struct mount *)0) | |
3688 | continue; | |
3689 | mount_lock(mp); | |
3690 | if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { | |
3691 | mount_unlock(mp); | |
3692 | mount_iterdrop(mp); | |
3693 | continue; | |
3694 | ||
3695 | } | |
3696 | mount_unlock(mp); | |
3697 | ||
3698 | /* iterate over all the vnodes */ | |
3699 | ret = callout(mp, arg); | |
3700 | ||
3701 | mount_iterdrop(mp); | |
3702 | ||
3703 | switch (ret) { | |
3704 | case VFS_RETURNED: | |
3705 | case VFS_RETURNED_DONE: | |
3706 | if (ret == VFS_RETURNED_DONE) { | |
3707 | ret = 0; | |
3708 | goto out; | |
3709 | } | |
3710 | break; | |
3711 | ||
3712 | case VFS_CLAIMED_DONE: | |
3713 | ret = 0; | |
3714 | goto out; | |
3715 | case VFS_CLAIMED: | |
3716 | default: | |
3717 | break; | |
3718 | } | |
3719 | ret = 0; | |
3720 | } | |
3721 | ||
3722 | out: | |
3723 | kfree(allocmem, (count * sizeof(fsid_t))); | |
3724 | return (ret); | |
3725 | } | |
3726 | ||
3727 | /* | |
3728 | * Update the vfsstatfs structure in the mountpoint. | |
3729 | */ | |
3730 | int | |
3731 | vfs_update_vfsstat(mount_t mp, vfs_context_t ctx) | |
3732 | { | |
3733 | struct vfs_attr va; | |
3734 | int error; | |
3735 | ||
3736 | /* | |
3737 | * Request the attributes we want to propagate into | |
3738 | * the per-mount vfsstat structure. | |
3739 | */ | |
3740 | VFSATTR_INIT(&va); | |
3741 | VFSATTR_WANTED(&va, f_iosize); | |
3742 | VFSATTR_WANTED(&va, f_blocks); | |
3743 | VFSATTR_WANTED(&va, f_bfree); | |
3744 | VFSATTR_WANTED(&va, f_bavail); | |
3745 | VFSATTR_WANTED(&va, f_bused); | |
3746 | VFSATTR_WANTED(&va, f_files); | |
3747 | VFSATTR_WANTED(&va, f_ffree); | |
3748 | VFSATTR_WANTED(&va, f_bsize); | |
3749 | VFSATTR_WANTED(&va, f_fssubtype); | |
3750 | if ((error = vfs_getattr(mp, &va, ctx)) != 0) { | |
3751 | KAUTH_DEBUG("STAT - filesystem returned error %d", error); | |
3752 | return(error); | |
3753 | } | |
3754 | ||
3755 | /* | |
3756 | * Unpack into the per-mount structure. | |
3757 | * | |
3758 | * We only overwrite these fields, which are likely to change: | |
3759 | * f_blocks | |
3760 | * f_bfree | |
3761 | * f_bavail | |
3762 | * f_bused | |
3763 | * f_files | |
3764 | * f_ffree | |
3765 | * | |
3766 | * And these which are not, but which the FS has no other way | |
3767 | * of providing to us: | |
3768 | * f_bsize | |
3769 | * f_iosize | |
3770 | * f_fssubtype | |
3771 | * | |
3772 | */ | |
3773 | if (VFSATTR_IS_SUPPORTED(&va, f_bsize)) { | |
3774 | mp->mnt_vfsstat.f_bsize = va.f_bsize; | |
3775 | } else { | |
3776 | mp->mnt_vfsstat.f_bsize = mp->mnt_devblocksize; /* default from the device block size */ | |
3777 | } | |
3778 | if (VFSATTR_IS_SUPPORTED(&va, f_iosize)) { | |
3779 | mp->mnt_vfsstat.f_iosize = va.f_iosize; | |
3780 | } else { | |
3781 | mp->mnt_vfsstat.f_iosize = 1024 * 1024; /* 1MB sensible I/O size */ | |
3782 | } | |
3783 | if (VFSATTR_IS_SUPPORTED(&va, f_blocks)) | |
3784 | mp->mnt_vfsstat.f_blocks = va.f_blocks; | |
3785 | if (VFSATTR_IS_SUPPORTED(&va, f_bfree)) | |
3786 | mp->mnt_vfsstat.f_bfree = va.f_bfree; | |
3787 | if (VFSATTR_IS_SUPPORTED(&va, f_bavail)) | |
3788 | mp->mnt_vfsstat.f_bavail = va.f_bavail; | |
3789 | if (VFSATTR_IS_SUPPORTED(&va, f_bused)) | |
3790 | mp->mnt_vfsstat.f_bused = va.f_bused; | |
3791 | if (VFSATTR_IS_SUPPORTED(&va, f_files)) | |
3792 | mp->mnt_vfsstat.f_files = va.f_files; | |
3793 | if (VFSATTR_IS_SUPPORTED(&va, f_ffree)) | |
3794 | mp->mnt_vfsstat.f_ffree = va.f_ffree; | |
3795 | ||
3796 | /* this is unlikely to change, but has to be queried for */ | |
3797 | if (VFSATTR_IS_SUPPORTED(&va, f_fssubtype)) | |
3798 | mp->mnt_vfsstat.f_fssubtype = va.f_fssubtype; | |
3799 | ||
3800 | return(0); | |
3801 | } | |
3802 | ||
3803 | void | |
3804 | mount_list_add(mount_t mp) | |
3805 | { | |
3806 | mount_list_lock(); | |
3807 | TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); | |
3808 | nummounts++; | |
3809 | mount_list_unlock(); | |
3810 | } | |
3811 | ||
3812 | void | |
3813 | mount_list_remove(mount_t mp) | |
3814 | { | |
3815 | mount_list_lock(); | |
3816 | TAILQ_REMOVE(&mountlist, mp, mnt_list); | |
3817 | nummounts--; | |
3818 | mp->mnt_list.tqe_next = 0; | |
3819 | mp->mnt_list.tqe_prev = 0; | |
3820 | mount_list_unlock(); | |
3821 | } | |
3822 | ||
3823 | mount_t | |
3824 | mount_lookupby_volfsid(int volfs_id, int withref) | |
3825 | { | |
3826 | mount_t cur_mount = (mount_t)0; | |
3827 | mount_t mp ; | |
3828 | ||
3829 | mount_list_lock(); | |
3830 | TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |
3831 | if (validfsnode(mp) && mp->mnt_vfsstat.f_fsid.val[0] == volfs_id) { | |
3832 | cur_mount = mp; | |
3833 | if (withref) { | |
3834 | if (mount_iterref(cur_mount, 1)) { | |
3835 | cur_mount = (mount_t)0; | |
3836 | mount_list_unlock(); | |
3837 | goto out; | |
3838 | } | |
3839 | } | |
3840 | break; | |
3841 | } | |
3842 | } | |
3843 | mount_list_unlock(); | |
3844 | if (withref && (cur_mount != (mount_t)0)) { | |
3845 | mp = cur_mount; | |
3846 | if (vfs_busy(mp, LK_NOWAIT) != 0) { | |
3847 | cur_mount = (mount_t)0; | |
3848 | } | |
3849 | mount_iterdrop(mp); | |
3850 | } | |
3851 | out: | |
3852 | return(cur_mount); | |
3853 | } | |
3854 | ||
3855 | ||
3856 | mount_t | |
3857 | mount_list_lookupby_fsid(fsid, locked, withref) | |
3858 | fsid_t *fsid; | |
3859 | int locked; | |
3860 | int withref; | |
3861 | { | |
3862 | mount_t retmp = (mount_t)0; | |
3863 | mount_t mp; | |
3864 | ||
3865 | if (!locked) | |
3866 | mount_list_lock(); | |
3867 | TAILQ_FOREACH(mp, &mountlist, mnt_list) | |
3868 | if (mp->mnt_vfsstat.f_fsid.val[0] == fsid->val[0] && | |
3869 | mp->mnt_vfsstat.f_fsid.val[1] == fsid->val[1]) { | |
3870 | retmp = mp; | |
3871 | if (withref) { | |
3872 | if (mount_iterref(retmp, 1)) | |
3873 | retmp = (mount_t)0; | |
3874 | } | |
3875 | goto out; | |
3876 | } | |
3877 | out: | |
3878 | if (!locked) | |
3879 | mount_list_unlock(); | |
3880 | return (retmp); | |
3881 | } | |
3882 | ||
3883 | errno_t | |
3884 | vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t context) | |
3885 | { | |
3886 | struct nameidata nd; | |
3887 | int error; | |
3888 | struct vfs_context context2; | |
3889 | vfs_context_t ctx = context; | |
3890 | u_long ndflags = 0; | |
3891 | ||
3892 | if (context == NULL) { /* XXX technically an error */ | |
3893 | context2.vc_proc = current_proc(); | |
3894 | context2.vc_ucred = kauth_cred_get(); | |
3895 | ctx = &context2; | |
3896 | } | |
3897 | ||
3898 | if (flags & VNODE_LOOKUP_NOFOLLOW) | |
3899 | ndflags = NOFOLLOW; | |
3900 | else | |
3901 | ndflags = FOLLOW; | |
3902 | ||
3903 | if (flags & VNODE_LOOKUP_NOCROSSMOUNT) | |
3904 | ndflags |= NOCROSSMOUNT; | |
3905 | if (flags & VNODE_LOOKUP_DOWHITEOUT) | |
3906 | ndflags |= DOWHITEOUT; | |
3907 | ||
3908 | /* XXX AUDITVNPATH1 needed ? */ | |
3909 | NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); | |
3910 | ||
3911 | if ((error = namei(&nd))) | |
3912 | return (error); | |
3913 | *vpp = nd.ni_vp; | |
3914 | nameidone(&nd); | |
3915 | ||
3916 | return (0); | |
3917 | } | |
3918 | ||
3919 | errno_t | |
3920 | vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_context_t context) | |
3921 | { | |
3922 | struct nameidata nd; | |
3923 | int error; | |
3924 | struct vfs_context context2; | |
3925 | vfs_context_t ctx = context; | |
3926 | u_long ndflags = 0; | |
3a60a9f5 | 3927 | int lflags = flags; |
91447636 A |
3928 | |
3929 | if (context == NULL) { /* XXX technically an error */ | |
3930 | context2.vc_proc = current_proc(); | |
3931 | context2.vc_ucred = kauth_cred_get(); | |
3932 | ctx = &context2; | |
3933 | } | |
3934 | ||
3a60a9f5 A |
3935 | if (fmode & O_NOFOLLOW) |
3936 | lflags |= VNODE_LOOKUP_NOFOLLOW; | |
3937 | ||
3938 | if (lflags & VNODE_LOOKUP_NOFOLLOW) | |
91447636 A |
3939 | ndflags = NOFOLLOW; |
3940 | else | |
3941 | ndflags = FOLLOW; | |
3942 | ||
3a60a9f5 | 3943 | if (lflags & VNODE_LOOKUP_NOCROSSMOUNT) |
91447636 | 3944 | ndflags |= NOCROSSMOUNT; |
3a60a9f5 | 3945 | if (lflags & VNODE_LOOKUP_DOWHITEOUT) |
91447636 A |
3946 | ndflags |= DOWHITEOUT; |
3947 | ||
3948 | /* XXX AUDITVNPATH1 needed ? */ | |
3949 | NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); | |
3950 | ||
3951 | if ((error = vn_open(&nd, fmode, cmode))) | |
3952 | *vpp = NULL; | |
3953 | else | |
3954 | *vpp = nd.ni_vp; | |
3955 | ||
3956 | return (error); | |
3957 | } | |
3958 | ||
3959 | errno_t | |
3960 | vnode_close(vnode_t vp, int flags, vfs_context_t context) | |
3961 | { | |
3962 | kauth_cred_t cred; | |
3963 | struct proc *p; | |
3964 | int error; | |
3965 | ||
3966 | if (context) { | |
3967 | p = context->vc_proc; | |
3968 | cred = context->vc_ucred; | |
3969 | } else { | |
3970 | p = current_proc(); | |
3971 | cred = kauth_cred_get(); | |
3972 | } | |
3973 | ||
3974 | error = vn_close(vp, flags, cred, p); | |
3975 | vnode_put(vp); | |
3976 | return (error); | |
3977 | } | |
3978 | ||
3979 | errno_t | |
3980 | vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) | |
3981 | { | |
3982 | struct vnode_attr va; | |
3983 | int error; | |
3984 | ||
3985 | VATTR_INIT(&va); | |
3986 | VATTR_WANTED(&va, va_data_size); | |
3987 | error = vnode_getattr(vp, &va, ctx); | |
3988 | if (!error) | |
3989 | *sizep = va.va_data_size; | |
3990 | return(error); | |
3991 | } | |
3992 | ||
3993 | errno_t | |
3994 | vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx) | |
3995 | { | |
3996 | struct vnode_attr va; | |
3997 | ||
3998 | VATTR_INIT(&va); | |
3999 | VATTR_SET(&va, va_data_size, size); | |
4000 | va.va_vaflags = ioflag & 0xffff; | |
4001 | return(vnode_setattr(vp, &va, ctx)); | |
4002 | } | |
4003 | ||
89b3af67 A |
4004 | /* |
4005 | * Create a filesystem object of arbitrary type with arbitrary attributes in | |
4006 | * the spevied directory with the specified name. | |
4007 | * | |
4008 | * Parameters: dvp Pointer to the vnode of the directory | |
4009 | * in which to create the object. | |
4010 | * vpp Pointer to the area into which to | |
4011 | * return the vnode of the created object. | |
4012 | * cnp Component name pointer from the namei | |
4013 | * data structure, containing the name to | |
4014 | * use for the create object. | |
4015 | * vap Pointer to the vnode_attr structure | |
4016 | * describing the object to be created, | |
4017 | * including the type of object. | |
4018 | * flags VN_* flags controlling ACL inheritance | |
4019 | * and whether or not authorization is to | |
4020 | * be required for the operation. | |
4021 | * | |
4022 | * Returns: 0 Success | |
4023 | * !0 errno value | |
4024 | * | |
4025 | * Implicit: *vpp Contains the vnode of the object that | |
4026 | * was created, if successful. | |
4027 | * *cnp May be modified by the underlying VFS. | |
4028 | * *vap May be modified by the underlying VFS. | |
4029 | * modified by either ACL inheritance or | |
4030 | * | |
4031 | * | |
4032 | * be modified, even if the operation is | |
4033 | * | |
4034 | * | |
4035 | * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. | |
4036 | * | |
4037 | * Modification of '*cnp' and '*vap' by the underlying VFS is | |
4038 | * strongly discouraged. | |
4039 | * | |
4040 | * XXX: This function is a 'vn_*' function; it belongs in vfs_vnops.c | |
4041 | * | |
4042 | * XXX: We should enummerate the possible errno values here, and where | |
4043 | * in the code they originated. | |
4044 | */ | |
91447636 A |
4045 | errno_t |
4046 | vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx) | |
4047 | { | |
4048 | kauth_acl_t oacl, nacl; | |
4049 | int initial_acl; | |
4050 | errno_t error; | |
4051 | vnode_t vp = (vnode_t)0; | |
4052 | ||
4053 | error = 0; | |
4054 | oacl = nacl = NULL; | |
4055 | initial_acl = 0; | |
4056 | ||
4057 | KAUTH_DEBUG("%p CREATE - '%s'", dvp, cnp->cn_nameptr); | |
4058 | ||
4059 | /* | |
4060 | * Handle ACL inheritance. | |
4061 | */ | |
4062 | if (!(flags & VN_CREATE_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) { | |
4063 | /* save the original filesec */ | |
4064 | if (VATTR_IS_ACTIVE(vap, va_acl)) { | |
4065 | initial_acl = 1; | |
4066 | oacl = vap->va_acl; | |
4067 | } | |
4068 | ||
4069 | vap->va_acl = NULL; | |
4070 | if ((error = kauth_acl_inherit(dvp, | |
4071 | oacl, | |
4072 | &nacl, | |
4073 | vap->va_type == VDIR, | |
4074 | ctx)) != 0) { | |
4075 | KAUTH_DEBUG("%p CREATE - error %d processing inheritance", dvp, error); | |
4076 | return(error); | |
4077 | } | |
4078 | ||
4079 | /* | |
4080 | * If the generated ACL is NULL, then we can save ourselves some effort | |
4081 | * by clearing the active bit. | |
4082 | */ | |
4083 | if (nacl == NULL) { | |
4084 | VATTR_CLEAR_ACTIVE(vap, va_acl); | |
4085 | } else { | |
4086 | VATTR_SET(vap, va_acl, nacl); | |
4087 | } | |
4088 | } | |
4089 | ||
4090 | /* | |
4091 | * Check and default new attributes. | |
4092 | * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller | |
4093 | * hasn't supplied them. | |
4094 | */ | |
4095 | if ((error = vnode_authattr_new(dvp, vap, flags & VN_CREATE_NOAUTH, ctx)) != 0) { | |
4096 | KAUTH_DEBUG("%p CREATE - error %d handing/defaulting attributes", dvp, error); | |
4097 | goto out; | |
4098 | } | |
4099 | ||
4100 | ||
4101 | /* | |
4102 | * Create the requested node. | |
4103 | */ | |
4104 | switch(vap->va_type) { | |
4105 | case VREG: | |
4106 | error = VNOP_CREATE(dvp, vpp, cnp, vap, ctx); | |
4107 | break; | |
4108 | case VDIR: | |
4109 | error = VNOP_MKDIR(dvp, vpp, cnp, vap, ctx); | |
4110 | break; | |
4111 | case VSOCK: | |
4112 | case VFIFO: | |
4113 | case VBLK: | |
4114 | case VCHR: | |
4115 | error = VNOP_MKNOD(dvp, vpp, cnp, vap, ctx); | |
4116 | break; | |
4117 | default: | |
4118 | panic("vnode_create: unknown vtype %d", vap->va_type); | |
4119 | } | |
4120 | if (error != 0) { | |
4121 | KAUTH_DEBUG("%p CREATE - error %d returned by filesystem", dvp, error); | |
4122 | goto out; | |
4123 | } | |
4124 | ||
4125 | vp = *vpp; | |
4126 | /* | |
4127 | * If some of the requested attributes weren't handled by the VNOP, | |
4128 | * use our fallback code. | |
4129 | */ | |
4130 | if (!VATTR_ALL_SUPPORTED(vap) && *vpp) { | |
4131 | KAUTH_DEBUG(" CREATE - doing fallback with ACL %p", vap->va_acl); | |
4132 | error = vnode_setattr_fallback(*vpp, vap, ctx); | |
4133 | } | |
4134 | if ((error != 0 ) && (vp != (vnode_t)0)) { | |
4135 | *vpp = (vnode_t) 0; | |
4136 | vnode_put(vp); | |
4137 | } | |
4138 | ||
4139 | out: | |
4140 | /* | |
4141 | * If the caller supplied a filesec in vap, it has been replaced | |
4142 | * now by the post-inheritance copy. We need to put the original back | |
4143 | * and free the inherited product. | |
4144 | */ | |
4145 | if (initial_acl) { | |
4146 | VATTR_SET(vap, va_acl, oacl); | |
4147 | } else { | |
4148 | VATTR_CLEAR_ACTIVE(vap, va_acl); | |
4149 | } | |
4150 | if (nacl != NULL) | |
4151 | kauth_acl_free(nacl); | |
4152 | ||
4153 | return(error); | |
4154 | } | |
4155 | ||
4156 | static kauth_scope_t vnode_scope; | |
4157 | static int vnode_authorize_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action, | |
4158 | uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3); | |
4159 | ||
4160 | typedef struct _vnode_authorize_context { | |
4161 | vnode_t vp; | |
4162 | struct vnode_attr *vap; | |
4163 | vnode_t dvp; | |
4164 | struct vnode_attr *dvap; | |
4165 | vfs_context_t ctx; | |
4166 | int flags; | |
4167 | int flags_valid; | |
4168 | #define _VAC_IS_OWNER (1<<0) | |
4169 | #define _VAC_IN_GROUP (1<<1) | |
4170 | #define _VAC_IS_DIR_OWNER (1<<2) | |
4171 | #define _VAC_IN_DIR_GROUP (1<<3) | |
4172 | } *vauth_ctx; | |
4173 | ||
4174 | void | |
4175 | vnode_authorize_init(void) | |
4176 | { | |
4177 | vnode_scope = kauth_register_scope(KAUTH_SCOPE_VNODE, vnode_authorize_callback, NULL); | |
4178 | } | |
4179 | ||
4180 | /* | |
4181 | * Authorize an operation on a vnode. | |
4182 | * | |
4183 | * This is KPI, but here because it needs vnode_scope. | |
4184 | */ | |
4185 | int | |
4186 | vnode_authorize(vnode_t vp, vnode_t dvp, kauth_action_t action, vfs_context_t context) | |
4187 | { | |
4188 | int error, result; | |
4189 | ||
4190 | /* | |
4191 | * We can't authorize against a dead vnode; allow all operations through so that | |
4192 | * the correct error can be returned. | |
4193 | */ | |
4194 | if (vp->v_type == VBAD) | |
4195 | return(0); | |
4196 | ||
4197 | error = 0; | |
4198 | result = kauth_authorize_action(vnode_scope, vfs_context_ucred(context), action, | |
4199 | (uintptr_t)context, (uintptr_t)vp, (uintptr_t)dvp, (uintptr_t)&error); | |
4200 | if (result == EPERM) /* traditional behaviour */ | |
4201 | result = EACCES; | |
4202 | /* did the lower layers give a better error return? */ | |
4203 | if ((result != 0) && (error != 0)) | |
4204 | return(error); | |
4205 | return(result); | |
4206 | } | |
4207 | ||
4208 | /* | |
4209 | * Test for vnode immutability. | |
4210 | * | |
4211 | * The 'append' flag is set when the authorization request is constrained | |
4212 | * to operations which only request the right to append to a file. | |
4213 | * | |
4214 | * The 'ignore' flag is set when an operation modifying the immutability flags | |
4215 | * is being authorized. We check the system securelevel to determine which | |
4216 | * immutability flags we can ignore. | |
4217 | */ | |
4218 | static int | |
4219 | vnode_immutable(struct vnode_attr *vap, int append, int ignore) | |
4220 | { | |
4221 | int mask; | |
4222 | ||
4223 | /* start with all bits precluding the operation */ | |
4224 | mask = IMMUTABLE | APPEND; | |
4225 | ||
4226 | /* if appending only, remove the append-only bits */ | |
4227 | if (append) | |
4228 | mask &= ~APPEND; | |
4229 | ||
4230 | /* ignore only set when authorizing flags changes */ | |
4231 | if (ignore) { | |
4232 | if (securelevel <= 0) { | |
4233 | /* in insecure state, flags do not inhibit changes */ | |
4234 | mask = 0; | |
4235 | } else { | |
4236 | /* in secure state, user flags don't inhibit */ | |
4237 | mask &= ~(UF_IMMUTABLE | UF_APPEND); | |
4238 | } | |
4239 | } | |
4240 | KAUTH_DEBUG("IMMUTABLE - file flags 0x%x mask 0x%x append = %d ignore = %d", vap->va_flags, mask, append, ignore); | |
4241 | if ((vap->va_flags & mask) != 0) | |
4242 | return(EPERM); | |
4243 | return(0); | |
4244 | } | |
4245 | ||
4246 | static int | |
4247 | vauth_node_owner(struct vnode_attr *vap, kauth_cred_t cred) | |
4248 | { | |
4249 | int result; | |
4250 | ||
4251 | /* default assumption is not-owner */ | |
4252 | result = 0; | |
4253 | ||
4254 | /* | |
4255 | * If the filesystem has given us a UID, we treat this as authoritative. | |
4256 | */ | |
4257 | if (vap && VATTR_IS_SUPPORTED(vap, va_uid)) { | |
4258 | result = (vap->va_uid == kauth_cred_getuid(cred)) ? 1 : 0; | |
4259 | } | |
4260 | /* we could test the owner UUID here if we had a policy for it */ | |
4261 | ||
4262 | return(result); | |
4263 | } | |
4264 | ||
4265 | static int | |
4266 | vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember) | |
4267 | { | |
4268 | int error; | |
4269 | int result; | |
4270 | ||
4271 | error = 0; | |
4272 | result = 0; | |
4273 | ||
4274 | /* the caller is expected to have asked the filesystem for a group at some point */ | |
4275 | if (vap && VATTR_IS_SUPPORTED(vap, va_gid)) { | |
4276 | error = kauth_cred_ismember_gid(cred, vap->va_gid, &result); | |
4277 | } | |
4278 | /* we could test the group UUID here if we had a policy for it */ | |
4279 | ||
4280 | if (!error) | |
4281 | *ismember = result; | |
4282 | return(error); | |
4283 | } | |
4284 | ||
4285 | static int | |
4286 | vauth_file_owner(vauth_ctx vcp) | |
4287 | { | |
4288 | int result; | |
4289 | ||
4290 | if (vcp->flags_valid & _VAC_IS_OWNER) { | |
4291 | result = (vcp->flags & _VAC_IS_OWNER) ? 1 : 0; | |
4292 | } else { | |
4293 | result = vauth_node_owner(vcp->vap, vcp->ctx->vc_ucred); | |
4294 | ||
4295 | /* cache our result */ | |
4296 | vcp->flags_valid |= _VAC_IS_OWNER; | |
4297 | if (result) { | |
4298 | vcp->flags |= _VAC_IS_OWNER; | |
4299 | } else { | |
4300 | vcp->flags &= ~_VAC_IS_OWNER; | |
4301 | } | |
4302 | } | |
4303 | return(result); | |
4304 | } | |
4305 | ||
4306 | static int | |
4307 | vauth_file_ingroup(vauth_ctx vcp, int *ismember) | |
4308 | { | |
4309 | int error; | |
4310 | ||
4311 | if (vcp->flags_valid & _VAC_IN_GROUP) { | |
4312 | *ismember = (vcp->flags & _VAC_IN_GROUP) ? 1 : 0; | |
4313 | error = 0; | |
4314 | } else { | |
4315 | error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember); | |
4316 | ||
4317 | if (!error) { | |
4318 | /* cache our result */ | |
4319 | vcp->flags_valid |= _VAC_IN_GROUP; | |
4320 | if (*ismember) { | |
4321 | vcp->flags |= _VAC_IN_GROUP; | |
4322 | } else { | |
4323 | vcp->flags &= ~_VAC_IN_GROUP; | |
4324 | } | |
4325 | } | |
4326 | ||
4327 | } | |
4328 | return(error); | |
4329 | } | |
4330 | ||
4331 | static int | |
4332 | vauth_dir_owner(vauth_ctx vcp) | |
4333 | { | |
4334 | int result; | |
4335 | ||
4336 | if (vcp->flags_valid & _VAC_IS_DIR_OWNER) { | |
4337 | result = (vcp->flags & _VAC_IS_DIR_OWNER) ? 1 : 0; | |
4338 | } else { | |
4339 | result = vauth_node_owner(vcp->dvap, vcp->ctx->vc_ucred); | |
4340 | ||
4341 | /* cache our result */ | |
4342 | vcp->flags_valid |= _VAC_IS_DIR_OWNER; | |
4343 | if (result) { | |
4344 | vcp->flags |= _VAC_IS_DIR_OWNER; | |
4345 | } else { | |
4346 | vcp->flags &= ~_VAC_IS_DIR_OWNER; | |
4347 | } | |
4348 | } | |
4349 | return(result); | |
4350 | } | |
4351 | ||
4352 | static int | |
4353 | vauth_dir_ingroup(vauth_ctx vcp, int *ismember) | |
4354 | { | |
4355 | int error; | |
4356 | ||
4357 | if (vcp->flags_valid & _VAC_IN_DIR_GROUP) { | |
4358 | *ismember = (vcp->flags & _VAC_IN_DIR_GROUP) ? 1 : 0; | |
4359 | error = 0; | |
4360 | } else { | |
4361 | error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember); | |
4362 | ||
4363 | if (!error) { | |
4364 | /* cache our result */ | |
4365 | vcp->flags_valid |= _VAC_IN_DIR_GROUP; | |
4366 | if (*ismember) { | |
4367 | vcp->flags |= _VAC_IN_DIR_GROUP; | |
4368 | } else { | |
4369 | vcp->flags &= ~_VAC_IN_DIR_GROUP; | |
4370 | } | |
4371 | } | |
4372 | } | |
4373 | return(error); | |
4374 | } | |
4375 | ||
4376 | /* | |
4377 | * Test the posix permissions in (vap) to determine whether (credential) | |
4378 | * may perform (action) | |
4379 | */ | |
4380 | static int | |
4381 | vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir) | |
4382 | { | |
4383 | struct vnode_attr *vap; | |
4384 | int needed, error, owner_ok, group_ok, world_ok, ismember; | |
4385 | #ifdef KAUTH_DEBUG_ENABLE | |
4386 | const char *where; | |
4387 | # define _SETWHERE(c) where = c; | |
4388 | #else | |
4389 | # define _SETWHERE(c) | |
4390 | #endif | |
4391 | ||
4392 | /* checking file or directory? */ | |
4393 | if (on_dir) { | |
4394 | vap = vcp->dvap; | |
4395 | } else { | |
4396 | vap = vcp->vap; | |
4397 | } | |
4398 | ||
4399 | error = 0; | |
4400 | ||
4401 | /* | |
4402 | * We want to do as little work here as possible. So first we check | |
4403 | * which sets of permissions grant us the access we need, and avoid checking | |
4404 | * whether specific permissions grant access when more generic ones would. | |
4405 | */ | |
4406 | ||
4407 | /* owner permissions */ | |
4408 | needed = 0; | |
4409 | if (action & VREAD) | |
4410 | needed |= S_IRUSR; | |
4411 | if (action & VWRITE) | |
4412 | needed |= S_IWUSR; | |
4413 | if (action & VEXEC) | |
4414 | needed |= S_IXUSR; | |
4415 | owner_ok = (needed & vap->va_mode) == needed; | |
4416 | ||
4417 | /* group permissions */ | |
4418 | needed = 0; | |
4419 | if (action & VREAD) | |
4420 | needed |= S_IRGRP; | |
4421 | if (action & VWRITE) | |
4422 | needed |= S_IWGRP; | |
4423 | if (action & VEXEC) | |
4424 | needed |= S_IXGRP; | |
4425 | group_ok = (needed & vap->va_mode) == needed; | |
4426 | ||
4427 | /* world permissions */ | |
4428 | needed = 0; | |
4429 | if (action & VREAD) | |
4430 | needed |= S_IROTH; | |
4431 | if (action & VWRITE) | |
4432 | needed |= S_IWOTH; | |
4433 | if (action & VEXEC) | |
4434 | needed |= S_IXOTH; | |
4435 | world_ok = (needed & vap->va_mode) == needed; | |
4436 | ||
4437 | /* If granted/denied by all three, we're done */ | |
4438 | if (owner_ok && group_ok && world_ok) { | |
4439 | _SETWHERE("all"); | |
4440 | goto out; | |
4441 | } | |
4442 | if (!owner_ok && !group_ok && !world_ok) { | |
4443 | _SETWHERE("all"); | |
4444 | error = EACCES; | |
4445 | goto out; | |
4446 | } | |
4447 | ||
4448 | /* Check ownership (relatively cheap) */ | |
4449 | if ((on_dir && vauth_dir_owner(vcp)) || | |
4450 | (!on_dir && vauth_file_owner(vcp))) { | |
4451 | _SETWHERE("user"); | |
4452 | if (!owner_ok) | |
4453 | error = EACCES; | |
4454 | goto out; | |
4455 | } | |
4456 | ||
4457 | /* Not owner; if group and world both grant it we're done */ | |
4458 | if (group_ok && world_ok) { | |
4459 | _SETWHERE("group/world"); | |
4460 | goto out; | |
4461 | } | |
4462 | if (!group_ok && !world_ok) { | |
4463 | _SETWHERE("group/world"); | |
4464 | error = EACCES; | |
4465 | goto out; | |
4466 | } | |
4467 | ||
4468 | /* Check group membership (most expensive) */ | |
4469 | ismember = 0; | |
4470 | if (on_dir) { | |
4471 | error = vauth_dir_ingroup(vcp, &ismember); | |
4472 | } else { | |
4473 | error = vauth_file_ingroup(vcp, &ismember); | |
4474 | } | |
4475 | if (error) | |
4476 | goto out; | |
4477 | if (ismember) { | |
4478 | _SETWHERE("group"); | |
4479 | if (!group_ok) | |
4480 | error = EACCES; | |
4481 | goto out; | |
4482 | } | |
4483 | ||
4484 | /* Not owner, not in group, use world result */ | |
4485 | _SETWHERE("world"); | |
4486 | if (!world_ok) | |
4487 | error = EACCES; | |
4488 | ||
4489 | /* FALLTHROUGH */ | |
4490 | ||
4491 | out: | |
4492 | KAUTH_DEBUG("%p %s - posix %s permissions : need %s%s%s %x have %s%s%s%s%s%s%s%s%s UID = %d file = %d,%d", | |
4493 | vcp->vp, (error == 0) ? "ALLOWED" : "DENIED", where, | |
4494 | (action & VREAD) ? "r" : "-", | |
4495 | (action & VWRITE) ? "w" : "-", | |
4496 | (action & VEXEC) ? "x" : "-", | |
4497 | needed, | |
4498 | (vap->va_mode & S_IRUSR) ? "r" : "-", | |
4499 | (vap->va_mode & S_IWUSR) ? "w" : "-", | |
4500 | (vap->va_mode & S_IXUSR) ? "x" : "-", | |
4501 | (vap->va_mode & S_IRGRP) ? "r" : "-", | |
4502 | (vap->va_mode & S_IWGRP) ? "w" : "-", | |
4503 | (vap->va_mode & S_IXGRP) ? "x" : "-", | |
4504 | (vap->va_mode & S_IROTH) ? "r" : "-", | |
4505 | (vap->va_mode & S_IWOTH) ? "w" : "-", | |
4506 | (vap->va_mode & S_IXOTH) ? "x" : "-", | |
4507 | kauth_cred_getuid(vcp->ctx->vc_ucred), | |
4508 | on_dir ? vcp->dvap->va_uid : vcp->vap->va_uid, | |
4509 | on_dir ? vcp->dvap->va_gid : vcp->vap->va_gid); | |
4510 | return(error); | |
4511 | } | |
4512 | ||
4513 | /* | |
4514 | * Authorize the deletion of the node vp from the directory dvp. | |
4515 | * | |
4516 | * We assume that: | |
4517 | * - Neither the node nor the directory are immutable. | |
4518 | * - The user is not the superuser. | |
4519 | * | |
4520 | * Deletion is not permitted if the directory is sticky and the caller is not owner of the | |
4521 | * node or directory. | |
4522 | * | |
4523 | * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be | |
4524 | * deleted. If neither denies the permission, and the caller has Posix write access to the | |
4525 | * directory, then the node may be deleted. | |
4526 | */ | |
4527 | static int | |
4528 | vnode_authorize_delete(vauth_ctx vcp) | |
4529 | { | |
4530 | struct vnode_attr *vap = vcp->vap; | |
4531 | struct vnode_attr *dvap = vcp->dvap; | |
4532 | kauth_cred_t cred = vcp->ctx->vc_ucred; | |
4533 | struct kauth_acl_eval eval; | |
4534 | int error, delete_denied, delete_child_denied, ismember; | |
4535 | ||
4536 | /* check the ACL on the directory */ | |
4537 | delete_child_denied = 0; | |
4538 | if (VATTR_IS_NOT(dvap, va_acl, NULL)) { | |
4539 | eval.ae_requested = KAUTH_VNODE_DELETE_CHILD; | |
4540 | eval.ae_acl = &dvap->va_acl->acl_ace[0]; | |
4541 | eval.ae_count = dvap->va_acl->acl_entrycount; | |
4542 | eval.ae_options = 0; | |
4543 | if (vauth_dir_owner(vcp)) | |
4544 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4545 | if ((error = vauth_dir_ingroup(vcp, &ismember)) != 0) | |
4546 | return(error); | |
4547 | if (ismember) | |
4548 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4549 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4550 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4551 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4552 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4553 | ||
4554 | error = kauth_acl_evaluate(cred, &eval); | |
4555 | ||
4556 | if (error != 0) { | |
4557 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4558 | return(error); | |
4559 | } | |
4560 | if (eval.ae_result == KAUTH_RESULT_DENY) | |
4561 | delete_child_denied = 1; | |
4562 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4563 | KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp); | |
4564 | return(0); | |
4565 | } | |
4566 | } | |
4567 | ||
4568 | /* check the ACL on the node */ | |
4569 | delete_denied = 0; | |
4570 | if (VATTR_IS_NOT(vap, va_acl, NULL)) { | |
4571 | eval.ae_requested = KAUTH_VNODE_DELETE; | |
4572 | eval.ae_acl = &vap->va_acl->acl_ace[0]; | |
4573 | eval.ae_count = vap->va_acl->acl_entrycount; | |
4574 | eval.ae_options = 0; | |
4575 | if (vauth_file_owner(vcp)) | |
4576 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4577 | if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) | |
4578 | return(error); | |
4579 | if (ismember) | |
4580 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4581 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4582 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4583 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4584 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4585 | ||
4586 | if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { | |
4587 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4588 | return(error); | |
4589 | } | |
4590 | if (eval.ae_result == KAUTH_RESULT_DENY) | |
4591 | delete_denied = 1; | |
4592 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4593 | KAUTH_DEBUG("%p ALLOWED - granted by file ACL", vcp->vp); | |
4594 | return(0); | |
4595 | } | |
4596 | } | |
4597 | ||
4598 | /* if denied by ACL on directory or node, return denial */ | |
4599 | if (delete_denied || delete_child_denied) { | |
4600 | KAUTH_DEBUG("%p ALLOWED - denied by ACL", vcp->vp); | |
4601 | return(EACCES); | |
4602 | } | |
4603 | ||
4604 | /* enforce sticky bit behaviour */ | |
4605 | if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) { | |
4606 | KAUTH_DEBUG("%p DENIED - sticky bit rules (user %d file %d dir %d)", | |
4607 | vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid); | |
4608 | return(EACCES); | |
4609 | } | |
4610 | ||
4611 | /* check the directory */ | |
4612 | if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) { | |
4613 | KAUTH_DEBUG("%p ALLOWED - granted by posix permisssions", vcp->vp); | |
4614 | return(error); | |
4615 | } | |
4616 | ||
4617 | /* not denied, must be OK */ | |
4618 | return(0); | |
4619 | } | |
4620 | ||
4621 | ||
4622 | /* | |
4623 | * Authorize an operation based on the node's attributes. | |
4624 | */ | |
4625 | static int | |
4626 | vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_rights_t preauth_rights) | |
4627 | { | |
4628 | struct vnode_attr *vap = vcp->vap; | |
4629 | kauth_cred_t cred = vcp->ctx->vc_ucred; | |
4630 | struct kauth_acl_eval eval; | |
4631 | int error, ismember; | |
4632 | mode_t posix_action; | |
4633 | ||
4634 | /* | |
4635 | * If we are the file owner, we automatically have some rights. | |
4636 | * | |
4637 | * Do we need to expand this to support group ownership? | |
4638 | */ | |
4639 | if (vauth_file_owner(vcp)) | |
4640 | acl_rights &= ~(KAUTH_VNODE_WRITE_SECURITY); | |
4641 | ||
4642 | /* | |
4643 | * If we are checking both TAKE_OWNERSHIP and WRITE_SECURITY, we can | |
4644 | * mask the latter. If TAKE_OWNERSHIP is requested the caller is about to | |
4645 | * change ownership to themselves, and WRITE_SECURITY is implicitly | |
4646 | * granted to the owner. We need to do this because at this point | |
4647 | * WRITE_SECURITY may not be granted as the caller is not currently | |
4648 | * the owner. | |
4649 | */ | |
4650 | if ((acl_rights & KAUTH_VNODE_TAKE_OWNERSHIP) && | |
4651 | (acl_rights & KAUTH_VNODE_WRITE_SECURITY)) | |
4652 | acl_rights &= ~KAUTH_VNODE_WRITE_SECURITY; | |
4653 | ||
4654 | if (acl_rights == 0) { | |
4655 | KAUTH_DEBUG("%p ALLOWED - implicit or no rights required", vcp->vp); | |
4656 | return(0); | |
4657 | } | |
4658 | ||
4659 | /* if we have an ACL, evaluate it */ | |
4660 | if (VATTR_IS_NOT(vap, va_acl, NULL)) { | |
4661 | eval.ae_requested = acl_rights; | |
4662 | eval.ae_acl = &vap->va_acl->acl_ace[0]; | |
4663 | eval.ae_count = vap->va_acl->acl_entrycount; | |
4664 | eval.ae_options = 0; | |
4665 | if (vauth_file_owner(vcp)) | |
4666 | eval.ae_options |= KAUTH_AEVAL_IS_OWNER; | |
4667 | if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) | |
4668 | return(error); | |
4669 | if (ismember) | |
4670 | eval.ae_options |= KAUTH_AEVAL_IN_GROUP; | |
4671 | eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; | |
4672 | eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; | |
4673 | eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; | |
4674 | eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; | |
4675 | ||
4676 | if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { | |
4677 | KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); | |
4678 | return(error); | |
4679 | } | |
4680 | ||
4681 | if (eval.ae_result == KAUTH_RESULT_DENY) { | |
4682 | KAUTH_DEBUG("%p DENIED - by ACL", vcp->vp); | |
4683 | return(EACCES); /* deny, deny, counter-allege */ | |
4684 | } | |
4685 | if (eval.ae_result == KAUTH_RESULT_ALLOW) { | |
4686 | KAUTH_DEBUG("%p ALLOWED - all rights granted by ACL", vcp->vp); | |
4687 | return(0); | |
4688 | } | |
4689 | /* fall through and evaluate residual rights */ | |
4690 | } else { | |
4691 | /* no ACL, everything is residual */ | |
4692 | eval.ae_residual = acl_rights; | |
4693 | } | |
4694 | ||
4695 | /* | |
4696 | * Grant residual rights that have been pre-authorized. | |
4697 | */ | |
4698 | eval.ae_residual &= ~preauth_rights; | |
4699 | ||
4700 | /* | |
4701 | * We grant WRITE_ATTRIBUTES to the owner if it hasn't been denied. | |
4702 | */ | |
4703 | if (vauth_file_owner(vcp)) | |
4704 | eval.ae_residual &= ~KAUTH_VNODE_WRITE_ATTRIBUTES; | |
4705 | ||
4706 | if (eval.ae_residual == 0) { | |
4707 | KAUTH_DEBUG("%p ALLOWED - rights already authorized", vcp->vp); | |
4708 | return(0); | |
4709 | } | |
4710 | ||
4711 | /* | |
4712 | * Bail if we have residual rights that can't be granted by posix permissions, | |
4713 | * or aren't presumed granted at this point. | |
4714 | * | |
4715 | * XXX these can be collapsed for performance | |
4716 | */ | |
4717 | if (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) { | |
4718 | KAUTH_DEBUG("%p DENIED - CHANGE_OWNER not permitted", vcp->vp); | |
4719 | return(EACCES); | |
4720 | } | |
4721 | if (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) { | |
4722 | KAUTH_DEBUG("%p DENIED - WRITE_SECURITY not permitted", vcp->vp); | |
4723 | return(EACCES); | |
4724 | } | |
4725 | ||
4726 | #if DIAGNOSTIC | |
4727 | if (eval.ae_residual & KAUTH_VNODE_DELETE) | |
4728 | panic("vnode_authorize: can't be checking delete permission here"); | |
4729 | #endif | |
4730 | ||
4731 | /* | |
4732 | * Compute the fallback posix permissions that will satisfy the remaining | |
4733 | * rights. | |
4734 | */ | |
4735 | posix_action = 0; | |
4736 | if (eval.ae_residual & (KAUTH_VNODE_READ_DATA | | |
4737 | KAUTH_VNODE_LIST_DIRECTORY | | |
4738 | KAUTH_VNODE_READ_EXTATTRIBUTES)) | |
4739 | posix_action |= VREAD; | |
4740 | if (eval.ae_residual & (KAUTH_VNODE_WRITE_DATA | | |
4741 | KAUTH_VNODE_ADD_FILE | | |
4742 | KAUTH_VNODE_ADD_SUBDIRECTORY | | |
4743 | KAUTH_VNODE_DELETE_CHILD | | |
4744 | KAUTH_VNODE_WRITE_ATTRIBUTES | | |
4745 | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) | |
4746 | posix_action |= VWRITE; | |
4747 | if (eval.ae_residual & (KAUTH_VNODE_EXECUTE | | |
4748 | KAUTH_VNODE_SEARCH)) | |
4749 | posix_action |= VEXEC; | |
4750 | ||
4751 | if (posix_action != 0) { | |
4752 | return(vnode_authorize_posix(vcp, posix_action, 0 /* !on_dir */)); | |
4753 | } else { | |
4754 | KAUTH_DEBUG("%p ALLOWED - residual rights %s%s%s%s%s%s%s%s%s%s%s%s%s%s granted due to no posix mapping", | |
4755 | vcp->vp, | |
4756 | (eval.ae_residual & KAUTH_VNODE_READ_DATA) | |
4757 | ? vnode_isdir(vcp->vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", | |
4758 | (eval.ae_residual & KAUTH_VNODE_WRITE_DATA) | |
4759 | ? vnode_isdir(vcp->vp) ? " ADD_FILE" : " WRITE_DATA" : "", | |
4760 | (eval.ae_residual & KAUTH_VNODE_EXECUTE) | |
4761 | ? vnode_isdir(vcp->vp) ? " SEARCH" : " EXECUTE" : "", | |
4762 | (eval.ae_residual & KAUTH_VNODE_DELETE) | |
4763 | ? " DELETE" : "", | |
4764 | (eval.ae_residual & KAUTH_VNODE_APPEND_DATA) | |
4765 | ? vnode_isdir(vcp->vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", | |
4766 | (eval.ae_residual & KAUTH_VNODE_DELETE_CHILD) | |
4767 | ? " DELETE_CHILD" : "", | |
4768 | (eval.ae_residual & KAUTH_VNODE_READ_ATTRIBUTES) | |
4769 | ? " READ_ATTRIBUTES" : "", | |
4770 | (eval.ae_residual & KAUTH_VNODE_WRITE_ATTRIBUTES) | |
4771 | ? " WRITE_ATTRIBUTES" : "", | |
4772 | (eval.ae_residual & KAUTH_VNODE_READ_EXTATTRIBUTES) | |
4773 | ? " READ_EXTATTRIBUTES" : "", | |
4774 | (eval.ae_residual & KAUTH_VNODE_WRITE_EXTATTRIBUTES) | |
4775 | ? " WRITE_EXTATTRIBUTES" : "", | |
4776 | (eval.ae_residual & KAUTH_VNODE_READ_SECURITY) | |
4777 | ? " READ_SECURITY" : "", | |
4778 | (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) | |
4779 | ? " WRITE_SECURITY" : "", | |
4780 | (eval.ae_residual & KAUTH_VNODE_CHECKIMMUTABLE) | |
4781 | ? " CHECKIMMUTABLE" : "", | |
4782 | (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) | |
4783 | ? " CHANGE_OWNER" : ""); | |
4784 | } | |
4785 | ||
4786 | /* | |
4787 | * Lack of required Posix permissions implies no reason to deny access. | |
4788 | */ | |
4789 | return(0); | |
4790 | } | |
4791 | ||
4792 | /* | |
4793 | * Check for file immutability. | |
4794 | */ | |
4795 | static int | |
4796 | vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore) | |
4797 | { | |
4798 | mount_t mp; | |
4799 | int error; | |
4800 | int append; | |
4801 | ||
4802 | /* | |
4803 | * Perform immutability checks for operations that change data. | |
4804 | * | |
4805 | * Sockets, fifos and devices require special handling. | |
4806 | */ | |
4807 | switch(vp->v_type) { | |
4808 | case VSOCK: | |
4809 | case VFIFO: | |
4810 | case VBLK: | |
4811 | case VCHR: | |
4812 | /* | |
4813 | * Writing to these nodes does not change the filesystem data, | |
4814 | * so forget that it's being tried. | |
4815 | */ | |
4816 | rights &= ~KAUTH_VNODE_WRITE_DATA; | |
4817 | break; | |
4818 | default: | |
4819 | break; | |
4820 | } | |
4821 | ||
4822 | error = 0; | |
4823 | if (rights & KAUTH_VNODE_WRITE_RIGHTS) { | |
4824 | ||
4825 | /* check per-filesystem options if possible */ | |
4826 | mp = vnode_mount(vp); | |
4827 | if (mp != NULL) { | |
4828 | ||
4829 | /* check for no-EA filesystems */ | |
4830 | if ((rights & KAUTH_VNODE_WRITE_EXTATTRIBUTES) && | |
4831 | (vfs_flags(mp) & MNT_NOUSERXATTR)) { | |
4832 | KAUTH_DEBUG("%p DENIED - filesystem disallowed extended attributes", vp); | |
4833 | error = EACCES; /* User attributes disabled */ | |
4834 | goto out; | |
4835 | } | |
4836 | } | |
4837 | ||
4838 | /* check for file immutability */ | |
4839 | append = 0; | |
4840 | if (vp->v_type == VDIR) { | |
4841 | if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY)) == rights) | |
4842 | append = 1; | |
4843 | } else { | |
4844 | if ((rights & KAUTH_VNODE_APPEND_DATA) == rights) | |
4845 | append = 1; | |
4846 | } | |
4847 | if ((error = vnode_immutable(vap, append, ignore)) != 0) { | |
4848 | KAUTH_DEBUG("%p DENIED - file is immutable", vp); | |
4849 | goto out; | |
4850 | } | |
4851 | } | |
4852 | out: | |
4853 | return(error); | |
4854 | } | |
4855 | ||
4856 | /* | |
4857 | * Handle authorization actions for filesystems that advertise that the server will | |
4858 | * be enforcing. | |
4859 | */ | |
4860 | static int | |
4861 | vnode_authorize_opaque(vnode_t vp, int *resultp, kauth_action_t action, vfs_context_t ctx) | |
4862 | { | |
4863 | int error; | |
4864 | ||
4865 | /* | |
4866 | * If the vp is a device node, socket or FIFO it actually represents a local | |
4867 | * endpoint, so we need to handle it locally. | |
4868 | */ | |
4869 | switch(vp->v_type) { | |
4870 | case VBLK: | |
4871 | case VCHR: | |
4872 | case VSOCK: | |
4873 | case VFIFO: | |
4874 | return(0); | |
4875 | default: | |
4876 | break; | |
4877 | } | |
4878 | ||
4879 | /* | |
4880 | * In the advisory request case, if the filesystem doesn't think it's reliable | |
4881 | * we will attempt to formulate a result ourselves based on VNOP_GETATTR data. | |
4882 | */ | |
4883 | if ((action & KAUTH_VNODE_ACCESS) && !vfs_authopaqueaccess(vnode_mount(vp))) | |
4884 | return(0); | |
4885 | ||
4886 | /* | |
4887 | * Let the filesystem have a say in the matter. It's OK for it to not implemnent | |
4888 | * VNOP_ACCESS, as most will authorise inline with the actual request. | |
4889 | */ | |
4890 | if ((error = VNOP_ACCESS(vp, action, ctx)) != ENOTSUP) { | |
4891 | *resultp = error; | |
4892 | KAUTH_DEBUG("%p DENIED - opaque filesystem VNOP_ACCESS denied access", vp); | |
4893 | return(1); | |
4894 | } | |
4895 | ||
4896 | /* | |
4897 | * Typically opaque filesystems do authorisation in-line, but exec is a special case. In | |
4898 | * order to be reasonably sure that exec will be permitted, we try a bit harder here. | |
4899 | */ | |
4900 | if ((action & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp)) { | |
4901 | /* try a VNOP_OPEN for readonly access */ | |
4902 | if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { | |
4903 | *resultp = error; | |
4904 | KAUTH_DEBUG("%p DENIED - EXECUTE denied because file could not be opened readonly", vp); | |
4905 | return(1); | |
4906 | } | |
4907 | VNOP_CLOSE(vp, FREAD, ctx); | |
4908 | } | |
4909 | ||
4910 | /* | |
4911 | * We don't have any reason to believe that the request has to be denied at this point, | |
4912 | * so go ahead and allow it. | |
4913 | */ | |
4914 | *resultp = 0; | |
4915 | KAUTH_DEBUG("%p ALLOWED - bypassing access check for non-local filesystem", vp); | |
4916 | return(1); | |
4917 | } | |
4918 | ||
4919 | static int | |
4920 | vnode_authorize_callback(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action, | |
4921 | uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) | |
4922 | { | |
4923 | struct _vnode_authorize_context auth_context; | |
4924 | vauth_ctx vcp; | |
4925 | vfs_context_t ctx; | |
4926 | vnode_t vp, dvp; | |
4927 | kauth_cred_t cred; | |
4928 | kauth_ace_rights_t rights; | |
4929 | struct vnode_attr va, dva; | |
4930 | int result; | |
4931 | int *errorp; | |
4932 | int noimmutable; | |
4933 | ||
4934 | vcp = &auth_context; | |
4935 | ctx = vcp->ctx = (vfs_context_t)arg0; | |
4936 | vp = vcp->vp = (vnode_t)arg1; | |
4937 | dvp = vcp->dvp = (vnode_t)arg2; | |
4938 | errorp = (int *)arg3; | |
4939 | /* note that we authorize against the context, not the passed cred (the same thing anyway) */ | |
4940 | cred = ctx->vc_ucred; | |
4941 | ||
4942 | VATTR_INIT(&va); | |
4943 | vcp->vap = &va; | |
4944 | VATTR_INIT(&dva); | |
4945 | vcp->dvap = &dva; | |
4946 | ||
4947 | vcp->flags = vcp->flags_valid = 0; | |
4948 | ||
4949 | #if DIAGNOSTIC | |
4950 | if ((ctx == NULL) || (vp == NULL) || (cred == NULL)) | |
4951 | panic("vnode_authorize: bad arguments (context %p vp %p cred %p)", ctx, vp, cred); | |
4952 | #endif | |
4953 | ||
4954 | KAUTH_DEBUG("%p AUTH - %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s on %s '%s' (0x%x:%p/%p)", | |
4955 | vp, vfs_context_proc(ctx)->p_comm, | |
4956 | (action & KAUTH_VNODE_ACCESS) ? "access" : "auth", | |
4957 | (action & KAUTH_VNODE_READ_DATA) ? vnode_isdir(vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", | |
4958 | (action & KAUTH_VNODE_WRITE_DATA) ? vnode_isdir(vp) ? " ADD_FILE" : " WRITE_DATA" : "", | |
4959 | (action & KAUTH_VNODE_EXECUTE) ? vnode_isdir(vp) ? " SEARCH" : " EXECUTE" : "", | |
4960 | (action & KAUTH_VNODE_DELETE) ? " DELETE" : "", | |
4961 | (action & KAUTH_VNODE_APPEND_DATA) ? vnode_isdir(vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", | |
4962 | (action & KAUTH_VNODE_DELETE_CHILD) ? " DELETE_CHILD" : "", | |
4963 | (action & KAUTH_VNODE_READ_ATTRIBUTES) ? " READ_ATTRIBUTES" : "", | |
4964 | (action & KAUTH_VNODE_WRITE_ATTRIBUTES) ? " WRITE_ATTRIBUTES" : "", | |
4965 | (action & KAUTH_VNODE_READ_EXTATTRIBUTES) ? " READ_EXTATTRIBUTES" : "", | |
4966 | (action & KAUTH_VNODE_WRITE_EXTATTRIBUTES) ? " WRITE_EXTATTRIBUTES" : "", | |
4967 | (action & KAUTH_VNODE_READ_SECURITY) ? " READ_SECURITY" : "", | |
4968 | (action & KAUTH_VNODE_WRITE_SECURITY) ? " WRITE_SECURITY" : "", | |
4969 | (action & KAUTH_VNODE_CHANGE_OWNER) ? " CHANGE_OWNER" : "", | |
4970 | (action & KAUTH_VNODE_NOIMMUTABLE) ? " (noimmutable)" : "", | |
4971 | vnode_isdir(vp) ? "directory" : "file", | |
4972 | vp->v_name ? vp->v_name : "<NULL>", action, vp, dvp); | |
4973 | ||
4974 | /* | |
4975 | * Extract the control bits from the action, everything else is | |
4976 | * requested rights. | |
4977 | */ | |
4978 | noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0; | |
4979 | rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE); | |
4980 | ||
4981 | if (rights & KAUTH_VNODE_DELETE) { | |
4982 | #if DIAGNOSTIC | |
4983 | if (dvp == NULL) | |
4984 | panic("vnode_authorize: KAUTH_VNODE_DELETE test requires a directory"); | |
4985 | #endif | |
4986 | } else { | |
4987 | dvp = NULL; | |
4988 | } | |
4989 | ||
4990 | /* | |
4991 | * Check for read-only filesystems. | |
4992 | */ | |
4993 | if ((rights & KAUTH_VNODE_WRITE_RIGHTS) && | |
4994 | (vp->v_mount->mnt_flag & MNT_RDONLY) && | |
4995 | ((vp->v_type == VREG) || (vp->v_type == VDIR) || | |
4996 | (vp->v_type == VLNK) || (vp->v_type == VCPLX) || | |
4997 | (rights & KAUTH_VNODE_DELETE) || (rights & KAUTH_VNODE_DELETE_CHILD))) { | |
4998 | result = EROFS; | |
4999 | goto out; | |
5000 | } | |
5001 | ||
5002 | /* | |
5003 | * Check for noexec filesystems. | |
5004 | */ | |
5005 | if ((rights & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp) && (vp->v_mount->mnt_flag & MNT_NOEXEC)) { | |
5006 | result = EACCES; | |
5007 | goto out; | |
5008 | } | |
5009 | ||
5010 | /* | |
5011 | * Handle cases related to filesystems with non-local enforcement. | |
5012 | * This call can return 0, in which case we will fall through to perform a | |
5013 | * check based on VNOP_GETATTR data. Otherwise it returns 1 and sets | |
5014 | * an appropriate result, at which point we can return immediately. | |
5015 | */ | |
5016 | if (vfs_authopaque(vp->v_mount) && vnode_authorize_opaque(vp, &result, action, ctx)) | |
5017 | goto out; | |
5018 | ||
5019 | /* | |
5020 | * Get vnode attributes and extended security information for the vnode | |
5021 | * and directory if required. | |
5022 | */ | |
5023 | VATTR_WANTED(&va, va_mode); | |
5024 | VATTR_WANTED(&va, va_uid); | |
5025 | VATTR_WANTED(&va, va_gid); | |
5026 | VATTR_WANTED(&va, va_flags); | |
5027 | VATTR_WANTED(&va, va_acl); | |
5028 | if ((result = vnode_getattr(vp, &va, ctx)) != 0) { | |
5029 | KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result); | |
5030 | goto out; | |
5031 | } | |
5032 | if (dvp) { | |
5033 | VATTR_WANTED(&dva, va_mode); | |
5034 | VATTR_WANTED(&dva, va_uid); | |
5035 | VATTR_WANTED(&dva, va_gid); | |
5036 | VATTR_WANTED(&dva, va_flags); | |
5037 | VATTR_WANTED(&dva, va_acl); | |
5038 | if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) { | |
5039 | KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result); | |
5040 | goto out; | |
5041 | } | |
5042 | } | |
5043 | ||
5044 | /* | |
5045 | * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes | |
5046 | * *_EXTATTRIBUTES. | |
5047 | */ | |
5048 | if (S_ISXATTR(va.va_mode)) { | |
5049 | if (rights & KAUTH_VNODE_READ_DATA) { | |
5050 | rights &= ~KAUTH_VNODE_READ_DATA; | |
5051 | rights |= KAUTH_VNODE_READ_EXTATTRIBUTES; | |
5052 | } | |
5053 | if (rights & KAUTH_VNODE_WRITE_DATA) { | |
5054 | rights &= ~KAUTH_VNODE_WRITE_DATA; | |
5055 | rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES; | |
5056 | } | |
5057 | } | |
5058 | ||
5059 | /* | |
5060 | * Check for immutability. | |
5061 | * | |
5062 | * In the deletion case, parent directory immutability vetoes specific | |
5063 | * file rights. | |
5064 | */ | |
5065 | if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0) | |
5066 | goto out; | |
5067 | if ((rights & KAUTH_VNODE_DELETE) && | |
5068 | ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0)) | |
5069 | goto out; | |
5070 | ||
5071 | /* | |
5072 | * Clear rights that have been authorized by reaching this point, bail if nothing left to | |
5073 | * check. | |
5074 | */ | |
5075 | rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE); | |
5076 | if (rights == 0) | |
5077 | goto out; | |
5078 | ||
5079 | /* | |
5080 | * If we're not the superuser, authorize based on file properties. | |
5081 | */ | |
5082 | if (!vfs_context_issuser(ctx)) { | |
5083 | /* process delete rights */ | |
5084 | if ((rights & KAUTH_VNODE_DELETE) && | |
5085 | ((result = vnode_authorize_delete(vcp)) != 0)) | |
5086 | goto out; | |
5087 | ||
5088 | /* process remaining rights */ | |
5089 | if ((rights & ~KAUTH_VNODE_DELETE) && | |
5090 | ((result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE)) != 0)) | |
5091 | goto out; | |
5092 | } else { | |
5093 | ||
5094 | /* | |
5095 | * Execute is only granted to root if one of the x bits is set. This check only | |
5096 | * makes sense if the posix mode bits are actually supported. | |
5097 | */ | |
5098 | if ((rights & KAUTH_VNODE_EXECUTE) && | |
5099 | (vp->v_type == VREG) && | |
5100 | VATTR_IS_SUPPORTED(&va, va_mode) && | |
5101 | !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { | |
5102 | result = EPERM; | |
5103 | KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode); | |
5104 | goto out; | |
5105 | } | |
5106 | ||
5107 | KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp); | |
5108 | } | |
5109 | ||
5110 | out: | |
5111 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) | |
5112 | kauth_acl_free(va.va_acl); | |
5113 | if (VATTR_IS_SUPPORTED(&dva, va_acl) && (dva.va_acl != NULL)) | |
5114 | kauth_acl_free(dva.va_acl); | |
5115 | if (result) { | |
5116 | *errorp = result; | |
5117 | KAUTH_DEBUG("%p DENIED - auth denied", vp); | |
5118 | return(KAUTH_RESULT_DENY); | |
5119 | } | |
5120 | ||
5121 | /* | |
5122 | * Note that this implies that we will allow requests for no rights, as well as | |
5123 | * for rights that we do not recognise. There should be none of these. | |
5124 | */ | |
5125 | KAUTH_DEBUG("%p ALLOWED - auth granted", vp); | |
5126 | return(KAUTH_RESULT_ALLOW); | |
5127 | } | |
5128 | ||
5129 | /* | |
5130 | * Check that the attribute information in vattr can be legally applied to | |
5131 | * a new file by the context. | |
5132 | */ | |
5133 | int | |
5134 | vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx) | |
5135 | { | |
5136 | int error; | |
5137 | int is_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode; | |
5138 | kauth_cred_t cred; | |
5139 | guid_t changer; | |
5140 | mount_t dmp; | |
5141 | ||
5142 | error = 0; | |
5143 | defaulted_owner = defaulted_group = defaulted_mode = 0; | |
5144 | ||
5145 | /* | |
5146 | * Require that the filesystem support extended security to apply any. | |
5147 | */ | |
5148 | if (!vfs_extendedsecurity(dvp->v_mount) && | |
5149 | (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { | |
5150 | error = EINVAL; | |
5151 | goto out; | |
5152 | } | |
5153 | ||
5154 | /* | |
5155 | * Default some fields. | |
5156 | */ | |
5157 | dmp = dvp->v_mount; | |
5158 | ||
5159 | /* | |
5160 | * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit owner is set, that | |
5161 | * owner takes ownership of all new files. | |
5162 | */ | |
5163 | if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsowner != KAUTH_UID_NONE)) { | |
5164 | VATTR_SET(vap, va_uid, dmp->mnt_fsowner); | |
5165 | defaulted_owner = 1; | |
5166 | } else { | |
5167 | if (!VATTR_IS_ACTIVE(vap, va_uid)) { | |
5168 | /* default owner is current user */ | |
5169 | VATTR_SET(vap, va_uid, kauth_cred_getuid(vfs_context_ucred(ctx))); | |
5170 | defaulted_owner = 1; | |
5171 | } | |
5172 | } | |
5173 | ||
5174 | /* | |
5175 | * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that | |
5176 | * group takes ownership of all new files. | |
5177 | */ | |
5178 | if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsgroup != KAUTH_GID_NONE)) { | |
5179 | VATTR_SET(vap, va_gid, dmp->mnt_fsgroup); | |
5180 | defaulted_group = 1; | |
5181 | } else { | |
5182 | if (!VATTR_IS_ACTIVE(vap, va_gid)) { | |
5183 | /* default group comes from parent object, fallback to current user */ | |
5184 | struct vnode_attr dva; | |
5185 | VATTR_INIT(&dva); | |
5186 | VATTR_WANTED(&dva, va_gid); | |
5187 | if ((error = vnode_getattr(dvp, &dva, ctx)) != 0) | |
5188 | goto out; | |
5189 | if (VATTR_IS_SUPPORTED(&dva, va_gid)) { | |
5190 | VATTR_SET(vap, va_gid, dva.va_gid); | |
5191 | } else { | |
5192 | VATTR_SET(vap, va_gid, kauth_cred_getgid(vfs_context_ucred(ctx))); | |
5193 | } | |
5194 | defaulted_group = 1; | |
5195 | } | |
5196 | } | |
5197 | ||
5198 | if (!VATTR_IS_ACTIVE(vap, va_flags)) | |
5199 | VATTR_SET(vap, va_flags, 0); | |
5200 | ||
5201 | /* default mode is everything, masked with current umask */ | |
5202 | if (!VATTR_IS_ACTIVE(vap, va_mode)) { | |
5203 | VATTR_SET(vap, va_mode, ACCESSPERMS & ~vfs_context_proc(ctx)->p_fd->fd_cmask); | |
5204 | KAUTH_DEBUG("ATTR - defaulting new file mode to %o from umask %o", vap->va_mode, vfs_context_proc(ctx)->p_fd->fd_cmask); | |
5205 | defaulted_mode = 1; | |
5206 | } | |
5207 | /* set timestamps to now */ | |
5208 | if (!VATTR_IS_ACTIVE(vap, va_create_time)) { | |
5209 | nanotime(&vap->va_create_time); | |
5210 | VATTR_SET_ACTIVE(vap, va_create_time); | |
5211 | } | |
5212 | ||
5213 | /* | |
5214 | * Check for attempts to set nonsensical fields. | |
5215 | */ | |
5216 | if (vap->va_active & ~VNODE_ATTR_NEWOBJ) { | |
5217 | error = EINVAL; | |
5218 | KAUTH_DEBUG("ATTR - ERROR - attempt to set unsupported new-file attributes %llx", | |
5219 | vap->va_active & ~VNODE_ATTR_NEWOBJ); | |
5220 | goto out; | |
5221 | } | |
5222 | ||
5223 | /* | |
5224 | * Quickly check for the applicability of any enforcement here. | |
5225 | * Tests below maintain the integrity of the local security model. | |
5226 | */ | |
5227 | if (vfs_authopaque(vnode_mount(dvp))) | |
5228 | goto out; | |
5229 | ||
5230 | /* | |
5231 | * We need to know if the caller is the superuser, or if the work is | |
5232 | * otherwise already authorised. | |
5233 | */ | |
5234 | cred = vfs_context_ucred(ctx); | |
5235 | if (noauth) { | |
5236 | /* doing work for the kernel */ | |
5237 | is_suser = 1; | |
5238 | } else { | |
5239 | is_suser = vfs_context_issuser(ctx); | |
5240 | } | |
5241 | ||
5242 | ||
5243 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5244 | if (is_suser) { | |
5245 | if ((vap->va_flags & (UF_SETTABLE | SF_SETTABLE)) != vap->va_flags) { | |
5246 | error = EPERM; | |
5247 | KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); | |
5248 | goto out; | |
5249 | } | |
5250 | } else { | |
5251 | if ((vap->va_flags & UF_SETTABLE) != vap->va_flags) { | |
5252 | error = EPERM; | |
5253 | KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); | |
5254 | goto out; | |
5255 | } | |
5256 | } | |
5257 | } | |
5258 | ||
5259 | /* if not superuser, validate legality of new-item attributes */ | |
5260 | if (!is_suser) { | |
5261 | if (!defaulted_mode && VATTR_IS_ACTIVE(vap, va_mode)) { | |
5262 | /* setgid? */ | |
5263 | if (vap->va_mode & S_ISGID) { | |
5264 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5265 | KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); | |
5266 | goto out; | |
5267 | } | |
5268 | if (!ismember) { | |
5269 | KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", vap->va_gid); | |
5270 | error = EPERM; | |
5271 | goto out; | |
5272 | } | |
5273 | } | |
5274 | ||
5275 | /* setuid? */ | |
5276 | if ((vap->va_mode & S_ISUID) && (vap->va_uid != kauth_cred_getuid(cred))) { | |
5277 | KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); | |
5278 | error = EPERM; | |
5279 | goto out; | |
5280 | } | |
5281 | } | |
5282 | if (!defaulted_owner && (vap->va_uid != kauth_cred_getuid(cred))) { | |
5283 | KAUTH_DEBUG(" DENIED - cannot create new item owned by %d", vap->va_uid); | |
5284 | error = EPERM; | |
5285 | goto out; | |
5286 | } | |
5287 | if (!defaulted_group) { | |
5288 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5289 | KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); | |
5290 | goto out; | |
5291 | } | |
5292 | if (!ismember) { | |
5293 | KAUTH_DEBUG(" DENIED - cannot create new item with group %d - not a member", vap->va_gid); | |
5294 | error = EPERM; | |
5295 | goto out; | |
5296 | } | |
5297 | } | |
5298 | ||
5299 | /* initialising owner/group UUID */ | |
5300 | if (VATTR_IS_ACTIVE(vap, va_uuuid)) { | |
5301 | if ((error = kauth_cred_getguid(cred, &changer)) != 0) { | |
5302 | KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); | |
5303 | /* XXX ENOENT here - no GUID - should perhaps become EPERM */ | |
5304 | goto out; | |
5305 | } | |
5306 | if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { | |
5307 | KAUTH_DEBUG(" ERROR - cannot create item with supplied owner UUID - not us"); | |
5308 | error = EPERM; | |
5309 | goto out; | |
5310 | } | |
5311 | } | |
5312 | if (VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5313 | if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { | |
5314 | KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); | |
5315 | goto out; | |
5316 | } | |
5317 | if (!ismember) { | |
5318 | KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); | |
5319 | error = EPERM; | |
5320 | goto out; | |
5321 | } | |
5322 | } | |
5323 | } | |
5324 | out: | |
5325 | return(error); | |
5326 | } | |
5327 | ||
5328 | /* | |
5329 | * Check that the attribute information in vap can be legally written by the context. | |
5330 | * | |
5331 | * Call this when you're not sure about the vnode_attr; either its contents have come | |
5332 | * from an unknown source, or when they are variable. | |
5333 | * | |
5334 | * Returns errno, or zero and sets *actionp to the KAUTH_VNODE_* actions that | |
5335 | * must be authorized to be permitted to write the vattr. | |
5336 | */ | |
5337 | int | |
5338 | vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_context_t ctx) | |
5339 | { | |
5340 | struct vnode_attr ova; | |
5341 | kauth_action_t required_action; | |
5342 | int error, is_suser, ismember, chowner, chgroup; | |
5343 | guid_t changer; | |
5344 | gid_t group; | |
5345 | uid_t owner; | |
5346 | mode_t newmode; | |
5347 | kauth_cred_t cred; | |
5348 | uint32_t fdelta; | |
5349 | ||
5350 | VATTR_INIT(&ova); | |
5351 | required_action = 0; | |
5352 | error = 0; | |
5353 | ||
5354 | /* | |
5355 | * Quickly check for enforcement applicability. | |
5356 | */ | |
5357 | if (vfs_authopaque(vnode_mount(vp))) | |
5358 | goto out; | |
5359 | ||
5360 | /* | |
5361 | * Check for attempts to set nonsensical fields. | |
5362 | */ | |
5363 | if (vap->va_active & VNODE_ATTR_RDONLY) { | |
5364 | KAUTH_DEBUG("ATTR - ERROR: attempt to set readonly attribute(s)"); | |
5365 | error = EINVAL; | |
5366 | goto out; | |
5367 | } | |
5368 | ||
5369 | /* | |
5370 | * We need to know if the caller is the superuser. | |
5371 | */ | |
5372 | cred = vfs_context_ucred(ctx); | |
5373 | is_suser = kauth_cred_issuser(cred); | |
5374 | ||
5375 | /* | |
5376 | * If any of the following are changing, we need information from the old file: | |
5377 | * va_uid | |
5378 | * va_gid | |
5379 | * va_mode | |
5380 | * va_uuuid | |
5381 | * va_guuid | |
5382 | */ | |
5383 | if (VATTR_IS_ACTIVE(vap, va_uid) || | |
5384 | VATTR_IS_ACTIVE(vap, va_gid) || | |
5385 | VATTR_IS_ACTIVE(vap, va_mode) || | |
5386 | VATTR_IS_ACTIVE(vap, va_uuuid) || | |
5387 | VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5388 | VATTR_WANTED(&ova, va_mode); | |
5389 | VATTR_WANTED(&ova, va_uid); | |
5390 | VATTR_WANTED(&ova, va_gid); | |
5391 | VATTR_WANTED(&ova, va_uuuid); | |
5392 | VATTR_WANTED(&ova, va_guuid); | |
5393 | KAUTH_DEBUG("ATTR - security information changing, fetching existing attributes"); | |
5394 | } | |
5395 | ||
5396 | /* | |
5397 | * If timestamps are being changed, we need to know who the file is owned | |
5398 | * by. | |
5399 | */ | |
5400 | if (VATTR_IS_ACTIVE(vap, va_create_time) || | |
5401 | VATTR_IS_ACTIVE(vap, va_change_time) || | |
5402 | VATTR_IS_ACTIVE(vap, va_modify_time) || | |
5403 | VATTR_IS_ACTIVE(vap, va_access_time) || | |
5404 | VATTR_IS_ACTIVE(vap, va_backup_time)) { | |
5405 | ||
5406 | VATTR_WANTED(&ova, va_uid); | |
5407 | #if 0 /* enable this when we support UUIDs as official owners */ | |
5408 | VATTR_WANTED(&ova, va_uuuid); | |
5409 | #endif | |
5410 | KAUTH_DEBUG("ATTR - timestamps changing, fetching uid and GUID"); | |
5411 | } | |
5412 | ||
5413 | /* | |
5414 | * If flags are being changed, we need the old flags. | |
5415 | */ | |
5416 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5417 | KAUTH_DEBUG("ATTR - flags changing, fetching old flags"); | |
5418 | VATTR_WANTED(&ova, va_flags); | |
5419 | } | |
5420 | ||
5421 | /* | |
5422 | * If the size is being set, make sure it's not a directory. | |
5423 | */ | |
5424 | if (VATTR_IS_ACTIVE(vap, va_data_size)) { | |
5425 | /* size is meaningless on a directory, don't permit this */ | |
5426 | if (vnode_isdir(vp)) { | |
5427 | KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory"); | |
5428 | error = EISDIR; | |
5429 | goto out; | |
5430 | } | |
5431 | } | |
5432 | ||
5433 | /* | |
5434 | * Get old data. | |
5435 | */ | |
5436 | KAUTH_DEBUG("ATTR - fetching old attributes %016llx", ova.va_active); | |
5437 | if ((error = vnode_getattr(vp, &ova, ctx)) != 0) { | |
5438 | KAUTH_DEBUG(" ERROR - got %d trying to get attributes", error); | |
5439 | goto out; | |
5440 | } | |
5441 | ||
5442 | /* | |
5443 | * Size changes require write access to the file data. | |
5444 | */ | |
5445 | if (VATTR_IS_ACTIVE(vap, va_data_size)) { | |
5446 | /* if we can't get the size, or it's different, we need write access */ | |
5447 | KAUTH_DEBUG("ATTR - size change, requiring WRITE_DATA"); | |
5448 | required_action |= KAUTH_VNODE_WRITE_DATA; | |
5449 | } | |
5450 | ||
5451 | /* | |
5452 | * Changing timestamps? | |
5453 | * | |
5454 | * Note that we are only called to authorize user-requested time changes; | |
5455 | * side-effect time changes are not authorized. Authorisation is only | |
5456 | * required for existing files. | |
5457 | * | |
5458 | * Non-owners are not permitted to change the time on an existing | |
5459 | * file to anything other than the current time. | |
5460 | */ | |
5461 | if (VATTR_IS_ACTIVE(vap, va_create_time) || | |
5462 | VATTR_IS_ACTIVE(vap, va_change_time) || | |
5463 | VATTR_IS_ACTIVE(vap, va_modify_time) || | |
5464 | VATTR_IS_ACTIVE(vap, va_access_time) || | |
5465 | VATTR_IS_ACTIVE(vap, va_backup_time)) { | |
5466 | /* | |
5467 | * The owner and root may set any timestamps they like, | |
5468 | * provided that the file is not immutable. The owner still needs | |
5469 | * WRITE_ATTRIBUTES (implied by ownership but still deniable). | |
5470 | */ | |
5471 | if (is_suser || vauth_node_owner(&ova, cred)) { | |
5472 | KAUTH_DEBUG("ATTR - root or owner changing timestamps"); | |
5473 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE | KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5474 | } else { | |
5475 | /* just setting the current time? */ | |
5476 | if (vap->va_vaflags & VA_UTIMES_NULL) { | |
5477 | KAUTH_DEBUG("ATTR - non-root/owner changing timestamps, requiring WRITE_ATTRIBUTES"); | |
5478 | required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5479 | } else { | |
5480 | KAUTH_DEBUG("ATTR - ERROR: illegal timestamp modification attempted"); | |
5481 | error = EACCES; | |
5482 | goto out; | |
5483 | } | |
5484 | } | |
5485 | } | |
5486 | ||
5487 | /* | |
5488 | * Changing file mode? | |
5489 | */ | |
5490 | if (VATTR_IS_ACTIVE(vap, va_mode) && VATTR_IS_SUPPORTED(&ova, va_mode) && (ova.va_mode != vap->va_mode)) { | |
5491 | KAUTH_DEBUG("ATTR - mode change from %06o to %06o", ova.va_mode, vap->va_mode); | |
5492 | ||
5493 | /* | |
5494 | * Mode changes always have the same basic auth requirements. | |
5495 | */ | |
5496 | if (is_suser) { | |
5497 | KAUTH_DEBUG("ATTR - superuser mode change, requiring immutability check"); | |
5498 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; | |
5499 | } else { | |
5500 | /* need WRITE_SECURITY */ | |
5501 | KAUTH_DEBUG("ATTR - non-superuser mode change, requiring WRITE_SECURITY"); | |
5502 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5503 | } | |
5504 | ||
5505 | /* | |
5506 | * Can't set the setgid bit if you're not in the group and not root. Have to have | |
5507 | * existing group information in the case we're not setting it right now. | |
5508 | */ | |
5509 | if (vap->va_mode & S_ISGID) { | |
5510 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ | |
5511 | if (!is_suser) { | |
5512 | if (VATTR_IS_ACTIVE(vap, va_gid)) { | |
5513 | group = vap->va_gid; | |
5514 | } else if (VATTR_IS_SUPPORTED(&ova, va_gid)) { | |
5515 | group = ova.va_gid; | |
5516 | } else { | |
5517 | KAUTH_DEBUG("ATTR - ERROR: setgid but no gid available"); | |
5518 | error = EINVAL; | |
5519 | goto out; | |
5520 | } | |
5521 | /* | |
5522 | * This might be too restrictive; WRITE_SECURITY might be implied by | |
5523 | * membership in this case, rather than being an additional requirement. | |
5524 | */ | |
5525 | if ((error = kauth_cred_ismember_gid(cred, group, &ismember)) != 0) { | |
5526 | KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); | |
5527 | goto out; | |
5528 | } | |
5529 | if (!ismember) { | |
5530 | KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", group); | |
5531 | error = EPERM; | |
5532 | goto out; | |
5533 | } | |
5534 | } | |
5535 | } | |
5536 | ||
5537 | /* | |
5538 | * Can't set the setuid bit unless you're root or the file's owner. | |
5539 | */ | |
5540 | if (vap->va_mode & S_ISUID) { | |
5541 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ | |
5542 | if (!is_suser) { | |
5543 | if (VATTR_IS_ACTIVE(vap, va_uid)) { | |
5544 | owner = vap->va_uid; | |
5545 | } else if (VATTR_IS_SUPPORTED(&ova, va_uid)) { | |
5546 | owner = ova.va_uid; | |
5547 | } else { | |
5548 | KAUTH_DEBUG("ATTR - ERROR: setuid but no uid available"); | |
5549 | error = EINVAL; | |
5550 | goto out; | |
5551 | } | |
5552 | if (owner != kauth_cred_getuid(cred)) { | |
5553 | /* | |
5554 | * We could allow this if WRITE_SECURITY is permitted, perhaps. | |
5555 | */ | |
5556 | KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); | |
5557 | error = EPERM; | |
5558 | goto out; | |
5559 | } | |
5560 | } | |
5561 | } | |
5562 | } | |
5563 | ||
5564 | /* | |
5565 | * Validate/mask flags changes. This checks that only the flags in | |
5566 | * the UF_SETTABLE mask are being set, and preserves the flags in | |
5567 | * the SF_SETTABLE case. | |
5568 | * | |
5569 | * Since flags changes may be made in conjunction with other changes, | |
5570 | * we will ask the auth code to ignore immutability in the case that | |
5571 | * the SF_* flags are not set and we are only manipulating the file flags. | |
5572 | * | |
5573 | */ | |
5574 | if (VATTR_IS_ACTIVE(vap, va_flags)) { | |
5575 | /* compute changing flags bits */ | |
5576 | if (VATTR_IS_SUPPORTED(&ova, va_flags)) { | |
5577 | fdelta = vap->va_flags ^ ova.va_flags; | |
5578 | } else { | |
5579 | fdelta = vap->va_flags; | |
5580 | } | |
5581 | ||
5582 | if (fdelta != 0) { | |
5583 | KAUTH_DEBUG("ATTR - flags changing, requiring WRITE_SECURITY"); | |
5584 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5585 | ||
5586 | /* check that changing bits are legal */ | |
5587 | if (is_suser) { | |
5588 | /* | |
5589 | * The immutability check will prevent us from clearing the SF_* | |
5590 | * flags unless the system securelevel permits it, so just check | |
5591 | * for legal flags here. | |
5592 | */ | |
5593 | if (fdelta & ~(UF_SETTABLE | SF_SETTABLE)) { | |
5594 | error = EPERM; | |
5595 | KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); | |
5596 | goto out; | |
5597 | } | |
5598 | } else { | |
5599 | if (fdelta & ~UF_SETTABLE) { | |
5600 | error = EPERM; | |
5601 | KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); | |
5602 | goto out; | |
5603 | } | |
5604 | } | |
5605 | /* | |
5606 | * If the caller has the ability to manipulate file flags, | |
5607 | * security is not reduced by ignoring them for this operation. | |
5608 | * | |
5609 | * A more complete test here would consider the 'after' states of the flags | |
5610 | * to determine whether it would permit the operation, but this becomes | |
5611 | * very complex. | |
5612 | * | |
5613 | * Ignoring immutability is conditional on securelevel; this does not bypass | |
5614 | * the SF_* flags if securelevel > 0. | |
5615 | */ | |
5616 | required_action |= KAUTH_VNODE_NOIMMUTABLE; | |
5617 | } | |
5618 | } | |
5619 | ||
5620 | /* | |
5621 | * Validate ownership information. | |
5622 | */ | |
5623 | chowner = 0; | |
5624 | chgroup = 0; | |
5625 | ||
5626 | /* | |
5627 | * uid changing | |
5628 | * Note that if the filesystem didn't give us a UID, we expect that it doesn't | |
5629 | * support them in general, and will ignore it if/when we try to set it. | |
5630 | * We might want to clear the uid out of vap completely here. | |
5631 | */ | |
5632 | if (VATTR_IS_ACTIVE(vap, va_uid) && VATTR_IS_SUPPORTED(&ova, va_uid) && (vap->va_uid != ova.va_uid)) { | |
5633 | if (!is_suser && (kauth_cred_getuid(cred) != vap->va_uid)) { | |
5634 | KAUTH_DEBUG(" DENIED - non-superuser cannot change ownershipt to a third party"); | |
5635 | error = EPERM; | |
5636 | goto out; | |
5637 | } | |
5638 | chowner = 1; | |
5639 | } | |
5640 | ||
5641 | /* | |
5642 | * gid changing | |
5643 | * Note that if the filesystem didn't give us a GID, we expect that it doesn't | |
5644 | * support them in general, and will ignore it if/when we try to set it. | |
5645 | * We might want to clear the gid out of vap completely here. | |
5646 | */ | |
5647 | if (VATTR_IS_ACTIVE(vap, va_gid) && VATTR_IS_SUPPORTED(&ova, va_gid) && (vap->va_gid != ova.va_gid)) { | |
5648 | if (!is_suser) { | |
5649 | if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { | |
5650 | KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); | |
5651 | goto out; | |
5652 | } | |
5653 | if (!ismember) { | |
5654 | KAUTH_DEBUG(" DENIED - group change from %d to %d but not a member of target group", | |
5655 | ova.va_gid, vap->va_gid); | |
5656 | error = EPERM; | |
5657 | goto out; | |
5658 | } | |
5659 | } | |
5660 | chgroup = 1; | |
5661 | } | |
5662 | ||
5663 | /* | |
5664 | * Owner UUID being set or changed. | |
5665 | */ | |
5666 | if (VATTR_IS_ACTIVE(vap, va_uuuid)) { | |
5667 | /* if the owner UUID is not actually changing ... */ | |
5668 | if (VATTR_IS_SUPPORTED(&ova, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid)) | |
5669 | goto no_uuuid_change; | |
5670 | ||
5671 | /* | |
5672 | * The owner UUID cannot be set by a non-superuser to anything other than | |
5673 | * their own. | |
5674 | */ | |
5675 | if (!is_suser) { | |
5676 | if ((error = kauth_cred_getguid(cred, &changer)) != 0) { | |
5677 | KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); | |
5678 | /* XXX ENOENT here - no UUID - should perhaps become EPERM */ | |
5679 | goto out; | |
5680 | } | |
5681 | if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { | |
5682 | KAUTH_DEBUG(" ERROR - cannot set supplied owner UUID - not us"); | |
5683 | error = EPERM; | |
5684 | goto out; | |
5685 | } | |
5686 | } | |
5687 | chowner = 1; | |
5688 | } | |
5689 | no_uuuid_change: | |
5690 | /* | |
5691 | * Group UUID being set or changed. | |
5692 | */ | |
5693 | if (VATTR_IS_ACTIVE(vap, va_guuid)) { | |
5694 | /* if the group UUID is not actually changing ... */ | |
5695 | if (VATTR_IS_SUPPORTED(&ova, va_guuid) && kauth_guid_equal(&vap->va_guuid, &ova.va_guuid)) | |
5696 | goto no_guuid_change; | |
5697 | ||
5698 | /* | |
5699 | * The group UUID cannot be set by a non-superuser to anything other than | |
5700 | * one of which they are a member. | |
5701 | */ | |
5702 | if (!is_suser) { | |
5703 | if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { | |
5704 | KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); | |
5705 | goto out; | |
5706 | } | |
5707 | if (!ismember) { | |
5708 | KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); | |
5709 | error = EPERM; | |
5710 | goto out; | |
5711 | } | |
5712 | } | |
5713 | chgroup = 1; | |
5714 | } | |
5715 | no_guuid_change: | |
5716 | ||
5717 | /* | |
5718 | * Compute authorisation for group/ownership changes. | |
5719 | */ | |
5720 | if (chowner || chgroup) { | |
5721 | if (is_suser) { | |
5722 | KAUTH_DEBUG("ATTR - superuser changing file owner/group, requiring immutability check"); | |
5723 | required_action |= KAUTH_VNODE_CHECKIMMUTABLE; | |
5724 | } else { | |
5725 | if (chowner) { | |
5726 | KAUTH_DEBUG("ATTR - ownership change, requiring TAKE_OWNERSHIP"); | |
5727 | required_action |= KAUTH_VNODE_TAKE_OWNERSHIP; | |
5728 | } | |
5729 | if (chgroup && !chowner) { | |
5730 | KAUTH_DEBUG("ATTR - group change, requiring WRITE_SECURITY"); | |
5731 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5732 | } | |
5733 | ||
5734 | /* clear set-uid and set-gid bits as required by Posix */ | |
5735 | if (VATTR_IS_ACTIVE(vap, va_mode)) { | |
5736 | newmode = vap->va_mode; | |
5737 | } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) { | |
5738 | newmode = ova.va_mode; | |
5739 | } else { | |
5740 | KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits"); | |
5741 | newmode = 0; | |
5742 | } | |
5743 | if (newmode & (S_ISUID | S_ISGID)) { | |
5744 | VATTR_SET(vap, va_mode, newmode & ~(S_ISUID | S_ISGID)); | |
5745 | KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode, vap->va_mode); | |
5746 | } | |
5747 | } | |
5748 | } | |
5749 | ||
5750 | /* | |
5751 | * Authorise changes in the ACL. | |
5752 | */ | |
5753 | if (VATTR_IS_ACTIVE(vap, va_acl)) { | |
5754 | ||
5755 | /* no existing ACL */ | |
5756 | if (!VATTR_IS_ACTIVE(&ova, va_acl) || (ova.va_acl == NULL)) { | |
5757 | ||
5758 | /* adding an ACL */ | |
5759 | if (vap->va_acl != NULL) { | |
5760 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5761 | KAUTH_DEBUG("CHMOD - adding ACL"); | |
5762 | } | |
5763 | ||
5764 | /* removing an existing ACL */ | |
5765 | } else if (vap->va_acl == NULL) { | |
5766 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5767 | KAUTH_DEBUG("CHMOD - removing ACL"); | |
5768 | ||
5769 | /* updating an existing ACL */ | |
5770 | } else { | |
5771 | if (vap->va_acl->acl_entrycount != ova.va_acl->acl_entrycount) { | |
5772 | /* entry count changed, must be different */ | |
5773 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5774 | KAUTH_DEBUG("CHMOD - adding/removing ACL entries"); | |
5775 | } else if (vap->va_acl->acl_entrycount > 0) { | |
5776 | /* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */ | |
5777 | if (!memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0], | |
5778 | sizeof(struct kauth_ace) * vap->va_acl->acl_entrycount)) { | |
5779 | required_action |= KAUTH_VNODE_WRITE_SECURITY; | |
5780 | KAUTH_DEBUG("CHMOD - changing ACL entries"); | |
5781 | } | |
5782 | } | |
5783 | } | |
5784 | } | |
5785 | ||
5786 | /* | |
5787 | * Other attributes that require authorisation. | |
5788 | */ | |
5789 | if (VATTR_IS_ACTIVE(vap, va_encoding)) | |
5790 | required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; | |
5791 | ||
5792 | out: | |
5793 | if (VATTR_IS_SUPPORTED(&ova, va_acl) && (ova.va_acl != NULL)) | |
5794 | kauth_acl_free(ova.va_acl); | |
5795 | if (error == 0) | |
5796 | *actionp = required_action; | |
5797 | return(error); | |
5798 | } | |
5799 | ||
5800 | ||
5801 | void | |
5802 | vfs_setlocklocal(mount_t mp) | |
5803 | { | |
5804 | vnode_t vp; | |
5805 | ||
5806 | mount_lock(mp); | |
5807 | mp->mnt_kern_flag |= MNTK_LOCK_LOCAL; | |
5808 | ||
5809 | /* | |
5810 | * We do not expect anyone to be using any vnodes at the | |
5811 | * time this routine is called. So no need for vnode locking | |
5812 | */ | |
5813 | TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { | |
5814 | vp->v_flag |= VLOCKLOCAL; | |
5815 | } | |
5816 | TAILQ_FOREACH(vp, &mp->mnt_workerqueue, v_mntvnodes) { | |
5817 | vp->v_flag |= VLOCKLOCAL; | |
5818 | } | |
5819 | TAILQ_FOREACH(vp, &mp->mnt_newvnodes, v_mntvnodes) { | |
5820 | vp->v_flag |= VLOCKLOCAL; | |
5821 | } | |
5822 | mount_unlock(mp); | |
5823 | } | |
5824 | ||
5825 | ||
5826 | #ifdef JOE_DEBUG | |
5827 | ||
5828 | record_vp(vnode_t vp, int count) { | |
5829 | struct uthread *ut; | |
5830 | int i; | |
5831 | ||
5832 | if ((vp->v_flag & VSYSTEM)) | |
5833 | return; | |
5834 | ||
5835 | ut = get_bsdthread_info(current_thread()); | |
5836 | ut->uu_iocount += count; | |
5837 | ||
5838 | if (ut->uu_vpindex < 32) { | |
5839 | for (i = 0; i < ut->uu_vpindex; i++) { | |
5840 | if (ut->uu_vps[i] == vp) | |
5841 | return; | |
5842 | } | |
5843 | ut->uu_vps[ut->uu_vpindex] = vp; | |
5844 | ut->uu_vpindex++; | |
5845 | } | |
5846 | } | |
5847 | #endif |