2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed
34 * to Berkeley by John Heidemann of the UCLA Ficus project.
36 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * @(#)vfs_init.c 8.5 (Berkeley) 5/11/95
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
76 #include <sys/param.h>
77 #include <sys/mount_internal.h>
80 #include <sys/vnode_internal.h>
82 #include <sys/namei.h>
83 #include <sys/ucred.h>
84 #include <sys/errno.h>
85 #include <sys/malloc.h>
87 #include <vfs/vfs_journal.h> /* journal_init() */
89 #include <security/mac_framework.h>
90 #include <sys/kauth.h>
93 #include <sys/quota.h>
97 * Sigh, such primitive tools are these...
105 __private_extern__
void vntblinit(void) __attribute__((section("__TEXT, initcode")));
107 extern struct vnodeopv_desc
*vfs_opv_descs
[];
108 /* a list of lists of vnodeops defns */
109 extern struct vnodeop_desc
*vfs_op_descs
[];
110 /* and the operations they perform */
112 * This code doesn't work if the defn is **vnodop_defns with cc.
113 * The problem is because of the compiler sometimes putting in an
114 * extra level of indirection for arrays. It's an interesting
119 typedef int (*PFIvp
)(void *);
122 * A miscellaneous routine.
123 * A generic "default" routine that just returns an error.
126 vn_default_error(void)
135 * Allocate and fill in operations vectors.
137 * An undocumented feature of this approach to defining operations is that
138 * there can be multiple entries in vfs_opv_descs for the same operations
139 * vector. This allows third parties to extend the set of operations
140 * supported by another layer in a binary compatibile way. For example,
141 * assume that NFS needed to be modified to support Ficus. NFS has an entry
142 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
143 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
144 * listing those new operations Ficus adds to NFS, all without modifying the
145 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
146 * that is a(whole)nother story.) This is a feature.
152 int (***opv_desc_vector_p
)(void *);
153 int (**opv_desc_vector
)(void *);
154 struct vnodeopv_entry_desc
*opve_descp
;
157 * Allocate the dynamic vectors and fill them in.
159 for (i
=0; vfs_opv_descs
[i
]; i
++) {
160 opv_desc_vector_p
= vfs_opv_descs
[i
]->opv_desc_vector_p
;
162 * Allocate and init the vector, if it needs it.
163 * Also handle backwards compatibility.
165 if (*opv_desc_vector_p
== NULL
) {
166 MALLOC(*opv_desc_vector_p
, PFIvp
*,
167 vfs_opv_numops
*sizeof(PFIvp
), M_TEMP
, M_WAITOK
);
168 bzero (*opv_desc_vector_p
, vfs_opv_numops
*sizeof(PFIvp
));
169 DODEBUG(printf("vector at %x allocated\n",
172 opv_desc_vector
= *opv_desc_vector_p
;
173 for (j
=0; vfs_opv_descs
[i
]->opv_desc_ops
[j
].opve_op
; j
++) {
174 opve_descp
= &(vfs_opv_descs
[i
]->opv_desc_ops
[j
]);
177 * Sanity check: is this operation listed
178 * in the list of operations? We check this
179 * by seeing if its offest is zero. Since
180 * the default routine should always be listed
181 * first, it should be the only one with a zero
182 * offset. Any other operation with a zero
183 * offset is probably not listed in
184 * vfs_op_descs, and so is probably an error.
186 * A panic here means the layer programmer
187 * has committed the all-too common bug
188 * of adding a new operation to the layer's
189 * list of vnode operations but
190 * not adding the operation to the system-wide
191 * list of supported operations.
193 if (opve_descp
->opve_op
->vdesc_offset
== 0 &&
194 opve_descp
->opve_op
->vdesc_offset
!=
195 VOFFSET(vnop_default
)) {
196 printf("operation %s not listed in %s.\n",
197 opve_descp
->opve_op
->vdesc_name
,
199 panic ("vfs_opv_init: bad operation");
202 * Fill in this entry.
204 opv_desc_vector
[opve_descp
->opve_op
->vdesc_offset
] =
205 opve_descp
->opve_impl
;
209 * Finally, go back and replace unfilled routines
210 * with their default. (Sigh, an O(n^3) algorithm. I
211 * could make it better, but that'd be work, and n is small.)
213 for (i
= 0; vfs_opv_descs
[i
]; i
++) {
214 opv_desc_vector
= *(vfs_opv_descs
[i
]->opv_desc_vector_p
);
216 * Force every operations vector to have a default routine.
218 if (opv_desc_vector
[VOFFSET(vnop_default
)]==NULL
) {
219 panic("vfs_opv_init: operation vector without default routine.");
221 for (k
= 0; k
<vfs_opv_numops
; k
++)
222 if (opv_desc_vector
[k
] == NULL
)
224 opv_desc_vector
[VOFFSET(vnop_default
)];
229 * Initialize known vnode operations vectors.
236 DODEBUG(printf("Vnode_interface_init.\n"));
238 * Set all vnode vectors to a well known value.
240 for (i
= 0; vfs_opv_descs
[i
]; i
++)
241 *(vfs_opv_descs
[i
]->opv_desc_vector_p
) = NULL
;
243 * Figure out how many ops there are by counting the table,
244 * and assign each its offset.
246 for (vfs_opv_numops
= 0, i
= 0; vfs_op_descs
[i
]; i
++) {
247 vfs_op_descs
[i
]->vdesc_offset
= vfs_opv_numops
;
250 DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops
));
254 * Routines having to do with the management of the vnode table.
256 extern struct vnodeops dead_vnodeops
;
257 extern struct vnodeops spec_vnodeops
;
259 /* vars for vnode lock */
260 lck_grp_t
* vnode_lck_grp
;
261 lck_grp_attr_t
* vnode_lck_grp_attr
;
262 lck_attr_t
* vnode_lck_attr
;
265 /* vars for vnode trigger resolver */
266 lck_grp_t
* trigger_vnode_lck_grp
;
267 lck_grp_attr_t
* trigger_vnode_lck_grp_attr
;
268 lck_attr_t
* trigger_vnode_lck_attr
;
271 /* vars for vnode list lock */
272 lck_grp_t
* vnode_list_lck_grp
;
273 lck_grp_attr_t
* vnode_list_lck_grp_attr
;
274 lck_attr_t
* vnode_list_lck_attr
;
275 lck_spin_t
* vnode_list_spin_lock
;
276 lck_mtx_t
* spechash_mtx_lock
;
278 /* vars for vfsconf lock */
279 lck_grp_t
* fsconf_lck_grp
;
280 lck_grp_attr_t
* fsconf_lck_grp_attr
;
281 lck_attr_t
* fsconf_lck_attr
;
284 /* vars for mount lock */
285 lck_grp_t
* mnt_lck_grp
;
286 lck_grp_attr_t
* mnt_lck_grp_attr
;
287 lck_attr_t
* mnt_lck_attr
;
289 /* vars for mount list lock */
290 lck_grp_t
* mnt_list_lck_grp
;
291 lck_grp_attr_t
* mnt_list_lck_grp_attr
;
292 lck_attr_t
* mnt_list_lck_attr
;
293 lck_mtx_t
* mnt_list_mtx_lock
;
295 lck_mtx_t
*pkg_extensions_lck
;
297 struct mount
* dead_mountp
;
299 extern void nspace_handler_init(void);
302 * Initialize the vnode structures and initialize each file system type.
307 struct vfstable
*vfsp
;
311 /* Allocate vnode list lock group attribute and group */
312 vnode_list_lck_grp_attr
= lck_grp_attr_alloc_init();
314 vnode_list_lck_grp
= lck_grp_alloc_init("vnode list", vnode_list_lck_grp_attr
);
316 /* Allocate vnode list lock attribute */
317 vnode_list_lck_attr
= lck_attr_alloc_init();
319 /* Allocate vnode list lock */
320 vnode_list_spin_lock
= lck_spin_alloc_init(vnode_list_lck_grp
, vnode_list_lck_attr
);
322 /* Allocate spec hash list lock */
323 spechash_mtx_lock
= lck_mtx_alloc_init(vnode_list_lck_grp
, vnode_list_lck_attr
);
325 /* Allocate the package extensions table lock */
326 pkg_extensions_lck
= lck_mtx_alloc_init(vnode_list_lck_grp
, vnode_list_lck_attr
);
328 /* allocate vnode lock group attribute and group */
329 vnode_lck_grp_attr
= lck_grp_attr_alloc_init();
331 vnode_lck_grp
= lck_grp_alloc_init("vnode", vnode_lck_grp_attr
);
333 /* Allocate vnode lock attribute */
334 vnode_lck_attr
= lck_attr_alloc_init();
337 trigger_vnode_lck_grp_attr
= lck_grp_attr_alloc_init();
338 trigger_vnode_lck_grp
= lck_grp_alloc_init("trigger_vnode", trigger_vnode_lck_grp_attr
);
339 trigger_vnode_lck_attr
= lck_attr_alloc_init();
342 /* Allocate fs config lock group attribute and group */
343 fsconf_lck_grp_attr
= lck_grp_attr_alloc_init();
345 fsconf_lck_grp
= lck_grp_alloc_init("fs conf", fsconf_lck_grp_attr
);
347 /* Allocate fs config lock attribute */
348 fsconf_lck_attr
= lck_attr_alloc_init();
350 /* Allocate mount point related lock structures */
352 /* Allocate mount list lock group attribute and group */
353 mnt_list_lck_grp_attr
= lck_grp_attr_alloc_init();
355 mnt_list_lck_grp
= lck_grp_alloc_init("mount list", mnt_list_lck_grp_attr
);
357 /* Allocate mount list lock attribute */
358 mnt_list_lck_attr
= lck_attr_alloc_init();
360 /* Allocate mount list lock */
361 mnt_list_mtx_lock
= lck_mtx_alloc_init(mnt_list_lck_grp
, mnt_list_lck_attr
);
364 /* allocate mount lock group attribute and group */
365 mnt_lck_grp_attr
= lck_grp_attr_alloc_init();
367 mnt_lck_grp
= lck_grp_alloc_init("mount", mnt_lck_grp_attr
);
369 /* Allocate mount lock attribute */
370 mnt_lck_attr
= lck_attr_alloc_init();
373 * Initialize the vnode table
377 * Initialize the filesystem event mechanism.
381 * Initialize the vnode name cache
387 * Initialize the journaling locks
391 nspace_handler_init();
394 * Build vnode operation vectors.
397 vfs_opv_init(); /* finish the job */
399 * Initialize each file system type in the static list,
400 * until the first NULL ->vfs_vfsops is encountered.
402 numused_vfsslots
= maxtypenum
= 0;
403 for (vfsp
= vfsconf
, i
= 0; i
< maxvfsslots
; i
++, vfsp
++) {
405 if (vfsp
->vfc_vfsops
== (struct vfsops
*)0)
407 if (i
) vfsconf
[i
-1].vfc_next
= vfsp
;
408 if (maxtypenum
<= vfsp
->vfc_typenum
)
409 maxtypenum
= vfsp
->vfc_typenum
+ 1;
411 bzero(&vfsc
, sizeof(struct vfsconf
));
412 vfsc
.vfc_reserved1
= 0;
413 bcopy(vfsp
->vfc_name
, vfsc
.vfc_name
, sizeof(vfsc
.vfc_name
));
414 vfsc
.vfc_typenum
= vfsp
->vfc_typenum
;
415 vfsc
.vfc_refcount
= vfsp
->vfc_refcount
;
416 vfsc
.vfc_flags
= vfsp
->vfc_flags
;
417 vfsc
.vfc_reserved2
= 0;
418 vfsc
.vfc_reserved3
= 0;
420 (*vfsp
->vfc_vfsops
->vfs_init
)(&vfsc
);
424 /* next vfc_typenum to be used */
425 maxvfsconf
= maxtypenum
;
428 * Initialize the vnop authorization scope.
430 vnode_authorize_init();
433 * Initialiize the quota system.
440 * create a mount point for dead vnodes
442 MALLOC_ZONE(mp
, struct mount
*, sizeof(struct mount
),
444 bzero((char *)mp
, sizeof(struct mount
));
445 /* Initialize the default IO constraints */
446 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
447 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
448 mp
->mnt_maxsegreadsize
= mp
->mnt_maxreadcnt
;
449 mp
->mnt_maxsegwritesize
= mp
->mnt_maxwritecnt
;
450 mp
->mnt_devblocksize
= DEV_BSIZE
;
451 mp
->mnt_alignmentmask
= PAGE_MASK
;
452 mp
->mnt_ioqueue_depth
= MNT_DEFAULT_IOQUEUE_DEPTH
;
455 mp
->mnt_realrootvp
= NULLVP
;
456 mp
->mnt_authcache_ttl
= CACHED_LOOKUP_RIGHT_TTL
;
458 TAILQ_INIT(&mp
->mnt_vnodelist
);
459 TAILQ_INIT(&mp
->mnt_workerqueue
);
460 TAILQ_INIT(&mp
->mnt_newvnodes
);
461 mp
->mnt_flag
= MNT_LOCAL
;
462 mp
->mnt_lflag
= MNT_LDEAD
;
466 mac_mount_label_init(mp
);
467 mac_mount_label_associate(vfs_context_kernel(), mp
);
473 vnode_list_lock(void)
475 lck_spin_lock(vnode_list_spin_lock
);
479 vnode_list_unlock(void)
481 lck_spin_unlock(vnode_list_spin_lock
);
485 mount_list_lock(void)
487 lck_mtx_lock(mnt_list_mtx_lock
);
491 mount_list_unlock(void)
493 lck_mtx_unlock(mnt_list_mtx_lock
);
497 mount_lock_init(mount_t mp
)
499 lck_mtx_init(&mp
->mnt_mlock
, mnt_lck_grp
, mnt_lck_attr
);
500 lck_mtx_init(&mp
->mnt_renamelock
, mnt_lck_grp
, mnt_lck_attr
);
501 lck_rw_init(&mp
->mnt_rwlock
, mnt_lck_grp
, mnt_lck_attr
);
505 mount_lock_destroy(mount_t mp
)
507 lck_mtx_destroy(&mp
->mnt_mlock
, mnt_lck_grp
);
508 lck_mtx_destroy(&mp
->mnt_renamelock
, mnt_lck_grp
);
509 lck_rw_destroy(&mp
->mnt_rwlock
, mnt_lck_grp
);
516 * Description: Add a filesystem to the vfsconf list at the first
517 * unused slot. If no slots are available, return an
520 * Parameter: nvfsp vfsconf for VFS to add
525 * Notes: The vfsconf should be treated as a linked list by
526 * all external references, as the implementation is
527 * expected to change in the future. The linkage is
528 * through ->vfc_next, and the list is NULL terminated.
530 * Warning: This code assumes that vfsconf[0] is non-empty.
533 vfstable_add(struct vfstable
*nvfsp
)
536 struct vfstable
*slotp
, *allocated
= NULL
;
539 * Find the next empty slot; we recognize an empty slot by a
540 * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must
541 * ensure we set the entry back to NULL.
545 for (slot
= 0; slot
< maxvfsslots
; slot
++) {
546 if (vfsconf
[slot
].vfc_vfsops
== NULL
)
549 if (slot
== maxvfsslots
) {
550 if (allocated
== NULL
) {
552 /* out of static slots; allocate one instead */
553 MALLOC(allocated
, struct vfstable
*, sizeof(struct vfstable
),
561 slotp
= &vfsconf
[slot
];
565 * Replace the contents of the next empty slot with the contents
566 * of the provided nvfsp.
568 * Note; Takes advantage of the fact that 'slot' was left
569 * with the value of 'maxvfslots' in the allocation case.
571 bcopy(nvfsp
, slotp
, sizeof(struct vfstable
));
573 slotp
->vfc_next
= vfsconf
[slot
- 1].vfc_next
;
574 vfsconf
[slot
- 1].vfc_next
= slotp
;
576 slotp
->vfc_next
= NULL
;
582 if (allocated
!= NULL
) {
583 FREE(allocated
, M_TEMP
);
592 * Description: Remove a filesystem from the vfsconf list by name.
593 * If no such filesystem exists, return an error.
595 * Parameter: fs_name name of VFS to remove
600 * Notes: Hopefully all filesystems have unique names.
603 vfstable_del(struct vfstable
* vtbl
)
605 struct vfstable
**vcpp
;
606 struct vfstable
*vcdelp
;
609 lck_mtx_assert(mnt_list_mtx_lock
, LCK_MTX_ASSERT_OWNED
);
613 * Traverse the list looking for vtbl; if found, *vcpp
614 * will contain the address of the pointer to the entry to
617 for( vcpp
= &vfsconf
; *vcpp
; vcpp
= &(*vcpp
)->vfc_next
) {
623 return(ESRCH
); /* vtbl not on vfsconf list */
627 *vcpp
= (*vcpp
)->vfc_next
;
630 * Is this an entry from our static table? We find out by
631 * seeing if the pointer to the object to be deleted places
632 * the object in the address space containing the table (or not).
634 if (vcdelp
>= vfsconf
&& vcdelp
< (vfsconf
+ maxvfsslots
)) { /* Y */
635 /* Mark as empty for vfscon_add() */
636 bzero(vcdelp
, sizeof(struct vfstable
));
640 * This entry was dynamically allocated; we must free it;
641 * we would prefer to have just linked the caller's
642 * vfsconf onto our list, but it may not be persistent
643 * because of the previous (copying) implementation.
646 FREE(vcdelp
, M_TEMP
);
651 lck_mtx_assert(mnt_list_mtx_lock
, LCK_MTX_ASSERT_OWNED
);
660 lck_mtx_lock(spechash_mtx_lock
);
664 SPECHASH_UNLOCK(void)
666 lck_mtx_unlock(spechash_mtx_lock
);