2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  30  * Copyright (c) 1989, 1993 
  31  *      The Regents of the University of California.  All rights reserved. 
  33  * This code is derived from software contributed 
  34  * to Berkeley by John Heidemann of the UCLA Ficus project. 
  36  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 
  38  * Redistribution and use in source and binary forms, with or without 
  39  * modification, are permitted provided that the following conditions 
  41  * 1. Redistributions of source code must retain the above copyright 
  42  *    notice, this list of conditions and the following disclaimer. 
  43  * 2. Redistributions in binary form must reproduce the above copyright 
  44  *    notice, this list of conditions and the following disclaimer in the 
  45  *    documentation and/or other materials provided with the distribution. 
  46  * 3. All advertising materials mentioning features or use of this software 
  47  *    must display the following acknowledgement: 
  48  *      This product includes software developed by the University of 
  49  *      California, Berkeley and its contributors. 
  50  * 4. Neither the name of the University nor the names of its contributors 
  51  *    may be used to endorse or promote products derived from this software 
  52  *    without specific prior written permission. 
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  66  *      @(#)vfs_init.c  8.5 (Berkeley) 5/11/95 
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  70  * support for mandatory and extensible security protections.  This notice 
  71  * is included in support of clause 2.2 (b) of the Apple Public License, 
  76 #include <sys/param.h> 
  77 #include <sys/mount_internal.h> 
  80 #include <sys/vnode_internal.h> 
  82 #include <sys/namei.h> 
  83 #include <sys/ucred.h> 
  84 #include <sys/errno.h> 
  85 #include <sys/malloc.h> 
  87 #include <vfs/vfs_journal.h>    /* journal_init() */ 
  89 #include <security/mac_framework.h> 
  90 #include <sys/kauth.h> 
  93 #include <sys/quota.h> 
  97  * Sigh, such primitive tools are these... 
 105 __private_extern__ 
void vntblinit(void) __attribute__((section("__TEXT, initcode"))); 
 107 extern struct vnodeopv_desc 
*vfs_opv_descs
[]; 
 108                                 /* a list of lists of vnodeops defns */ 
 109 extern struct vnodeop_desc 
*vfs_op_descs
[]; 
 110                                 /* and the operations they perform */ 
 112  * This code doesn't work if the defn is **vnodop_defns with cc. 
 113  * The problem is because of the compiler sometimes putting in an 
 114  * extra level of indirection for arrays.  It's an interesting 
 119 typedef int (*PFIvp
)(void *);  
 122  * A miscellaneous routine. 
 123  * A generic "default" routine that just returns an error. 
 126 vn_default_error(void) 
 135  * Allocate and fill in operations vectors. 
 137  * An undocumented feature of this approach to defining operations is that 
 138  * there can be multiple entries in vfs_opv_descs for the same operations 
 139  * vector. This allows third parties to extend the set of operations 
 140  * supported by another layer in a binary compatibile way. For example, 
 141  * assume that NFS needed to be modified to support Ficus. NFS has an entry 
 142  * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 
 143  * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 
 144  * listing those new operations Ficus adds to NFS, all without modifying the 
 145  * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 
 146  * that is a(whole)nother story.) This is a feature. 
 152         int (***opv_desc_vector_p
)(void *); 
 153         int (**opv_desc_vector
)(void *); 
 154         struct vnodeopv_entry_desc 
*opve_descp
; 
 157          * Allocate the dynamic vectors and fill them in. 
 159         for (i
=0; vfs_opv_descs
[i
]; i
++) { 
 160                 opv_desc_vector_p 
= vfs_opv_descs
[i
]->opv_desc_vector_p
; 
 162                  * Allocate and init the vector, if it needs it. 
 163                  * Also handle backwards compatibility. 
 165                 if (*opv_desc_vector_p 
== NULL
) { 
 166                         MALLOC(*opv_desc_vector_p
, PFIvp
*, 
 167                                vfs_opv_numops
*sizeof(PFIvp
), M_TEMP
, M_WAITOK
); 
 168                         bzero (*opv_desc_vector_p
, vfs_opv_numops
*sizeof(PFIvp
)); 
 169                         DODEBUG(printf("vector at %x allocated\n", 
 172                 opv_desc_vector 
= *opv_desc_vector_p
; 
 173                 for (j
=0; vfs_opv_descs
[i
]->opv_desc_ops
[j
].opve_op
; j
++) { 
 174                         opve_descp 
= &(vfs_opv_descs
[i
]->opv_desc_ops
[j
]); 
 177                          * Sanity check:  is this operation listed 
 178                          * in the list of operations?  We check this 
 179                          * by seeing if its offest is zero.  Since 
 180                          * the default routine should always be listed 
 181                          * first, it should be the only one with a zero 
 182                          * offset.  Any other operation with a zero 
 183                          * offset is probably not listed in 
 184                          * vfs_op_descs, and so is probably an error. 
 186                          * A panic here means the layer programmer 
 187                          * has committed the all-too common bug 
 188                          * of adding a new operation to the layer's 
 189                          * list of vnode operations but 
 190                          * not adding the operation to the system-wide 
 191                          * list of supported operations. 
 193                         if (opve_descp
->opve_op
->vdesc_offset 
== 0 && 
 194                                     opve_descp
->opve_op
->vdesc_offset 
!= 
 195                                         VOFFSET(vnop_default
)) { 
 196                                 printf("operation %s not listed in %s.\n", 
 197                                     opve_descp
->opve_op
->vdesc_name
, 
 199                                 panic ("vfs_opv_init: bad operation"); 
 202                          * Fill in this entry. 
 204                         opv_desc_vector
[opve_descp
->opve_op
->vdesc_offset
] = 
 205                                         opve_descp
->opve_impl
; 
 209          * Finally, go back and replace unfilled routines 
 210          * with their default.  (Sigh, an O(n^3) algorithm.  I 
 211          * could make it better, but that'd be work, and n is small.) 
 213         for (i 
= 0; vfs_opv_descs
[i
]; i
++) { 
 214                 opv_desc_vector 
= *(vfs_opv_descs
[i
]->opv_desc_vector_p
); 
 216                  * Force every operations vector to have a default routine. 
 218                 if (opv_desc_vector
[VOFFSET(vnop_default
)]==NULL
) { 
 219                         panic("vfs_opv_init: operation vector without default routine."); 
 221                 for (k 
= 0; k
<vfs_opv_numops
; k
++) 
 222                         if (opv_desc_vector
[k
] == NULL
) 
 224                                         opv_desc_vector
[VOFFSET(vnop_default
)]; 
 229  * Initialize known vnode operations vectors. 
 236         DODEBUG(printf("Vnode_interface_init.\n")); 
 238          * Set all vnode vectors to a well known value. 
 240         for (i 
= 0; vfs_opv_descs
[i
]; i
++) 
 241                 *(vfs_opv_descs
[i
]->opv_desc_vector_p
) = NULL
; 
 243          * Figure out how many ops there are by counting the table, 
 244          * and assign each its offset. 
 246         for (vfs_opv_numops 
= 0, i 
= 0; vfs_op_descs
[i
]; i
++) { 
 247                 vfs_op_descs
[i
]->vdesc_offset 
= vfs_opv_numops
; 
 250         DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops
)); 
 254  * Routines having to do with the management of the vnode table. 
 256 extern struct vnodeops dead_vnodeops
; 
 257 extern struct vnodeops spec_vnodeops
; 
 259 /* vars for vnode lock */ 
 260 lck_grp_t 
* vnode_lck_grp
; 
 261 lck_grp_attr_t 
* vnode_lck_grp_attr
; 
 262 lck_attr_t 
* vnode_lck_attr
; 
 265 /* vars for vnode trigger resolver */ 
 266 lck_grp_t 
* trigger_vnode_lck_grp
; 
 267 lck_grp_attr_t 
* trigger_vnode_lck_grp_attr
; 
 268 lck_attr_t 
* trigger_vnode_lck_attr
; 
 271 /* vars for vnode list lock */ 
 272 lck_grp_t 
* vnode_list_lck_grp
; 
 273 lck_grp_attr_t 
* vnode_list_lck_grp_attr
; 
 274 lck_attr_t 
* vnode_list_lck_attr
; 
 275 lck_spin_t 
* vnode_list_spin_lock
; 
 276 lck_mtx_t 
* spechash_mtx_lock
; 
 278 /* vars for vfsconf lock */ 
 279 lck_grp_t 
* fsconf_lck_grp
; 
 280 lck_grp_attr_t 
* fsconf_lck_grp_attr
; 
 281 lck_attr_t 
* fsconf_lck_attr
; 
 284 /* vars for mount lock */ 
 285 lck_grp_t 
* mnt_lck_grp
; 
 286 lck_grp_attr_t 
* mnt_lck_grp_attr
; 
 287 lck_attr_t 
* mnt_lck_attr
; 
 289 /* vars for mount list lock */ 
 290 lck_grp_t 
* mnt_list_lck_grp
; 
 291 lck_grp_attr_t 
* mnt_list_lck_grp_attr
; 
 292 lck_attr_t 
* mnt_list_lck_attr
; 
 293 lck_mtx_t 
* mnt_list_mtx_lock
; 
 295 lck_mtx_t 
*pkg_extensions_lck
; 
 297 struct mount 
* dead_mountp
; 
 299 extern void nspace_handler_init(void); 
 302  * Initialize the vnode structures and initialize each file system type. 
 307         struct vfstable 
*vfsp
; 
 311         /* Allocate vnode list lock group attribute and group */ 
 312         vnode_list_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
 314         vnode_list_lck_grp 
= lck_grp_alloc_init("vnode list",  vnode_list_lck_grp_attr
); 
 316         /* Allocate vnode list lock attribute */ 
 317         vnode_list_lck_attr 
= lck_attr_alloc_init(); 
 319         /* Allocate vnode list lock */ 
 320         vnode_list_spin_lock 
= lck_spin_alloc_init(vnode_list_lck_grp
, vnode_list_lck_attr
); 
 322         /* Allocate spec hash list lock */ 
 323         spechash_mtx_lock 
= lck_mtx_alloc_init(vnode_list_lck_grp
, vnode_list_lck_attr
); 
 325         /* Allocate the package extensions table lock */ 
 326         pkg_extensions_lck 
= lck_mtx_alloc_init(vnode_list_lck_grp
, vnode_list_lck_attr
); 
 328         /* allocate vnode lock group attribute and group */ 
 329         vnode_lck_grp_attr
= lck_grp_attr_alloc_init(); 
 331         vnode_lck_grp 
= lck_grp_alloc_init("vnode",  vnode_lck_grp_attr
); 
 333         /* Allocate vnode lock attribute */ 
 334         vnode_lck_attr 
= lck_attr_alloc_init(); 
 337         trigger_vnode_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
 338         trigger_vnode_lck_grp 
= lck_grp_alloc_init("trigger_vnode", trigger_vnode_lck_grp_attr
); 
 339         trigger_vnode_lck_attr 
= lck_attr_alloc_init(); 
 342         /* Allocate fs config lock group attribute and group */ 
 343         fsconf_lck_grp_attr
= lck_grp_attr_alloc_init(); 
 345         fsconf_lck_grp 
= lck_grp_alloc_init("fs conf",  fsconf_lck_grp_attr
); 
 347         /* Allocate fs config lock attribute */ 
 348         fsconf_lck_attr 
= lck_attr_alloc_init(); 
 350         /* Allocate mount point related lock structures  */ 
 352         /* Allocate mount list lock group attribute and group */ 
 353         mnt_list_lck_grp_attr
= lck_grp_attr_alloc_init(); 
 355         mnt_list_lck_grp 
= lck_grp_alloc_init("mount list",  mnt_list_lck_grp_attr
); 
 357         /* Allocate mount list lock attribute */ 
 358         mnt_list_lck_attr 
= lck_attr_alloc_init(); 
 360         /* Allocate mount list lock */ 
 361         mnt_list_mtx_lock 
= lck_mtx_alloc_init(mnt_list_lck_grp
, mnt_list_lck_attr
); 
 364         /* allocate mount lock group attribute and group */ 
 365         mnt_lck_grp_attr
= lck_grp_attr_alloc_init(); 
 367         mnt_lck_grp 
= lck_grp_alloc_init("mount",  mnt_lck_grp_attr
); 
 369         /* Allocate mount lock attribute */ 
 370         mnt_lck_attr 
= lck_attr_alloc_init(); 
 373          * Initialize the vnode table 
 377          * Initialize the filesystem event mechanism. 
 381          * Initialize the vnode name cache 
 387          * Initialize the journaling locks 
 391         nspace_handler_init(); 
 394          * Build vnode operation vectors. 
 397         vfs_opv_init();   /* finish the job */ 
 399          * Initialize each file system type in the static list, 
 400          * until the first NULL ->vfs_vfsops is encountered. 
 402         numused_vfsslots 
= maxtypenum 
= 0; 
 403         for (vfsp 
= vfsconf
, i 
= 0; i 
< maxvfsslots
; i
++, vfsp
++) { 
 405                 if (vfsp
->vfc_vfsops 
== (struct vfsops 
*)0) 
 407                 if (i
) vfsconf
[i
-1].vfc_next 
= vfsp
; 
 408                 if (maxtypenum 
<= vfsp
->vfc_typenum
) 
 409                         maxtypenum 
= vfsp
->vfc_typenum 
+ 1; 
 411                 bzero(&vfsc
, sizeof(struct vfsconf
)); 
 412                 vfsc
.vfc_reserved1 
= 0; 
 413                 bcopy(vfsp
->vfc_name
, vfsc
.vfc_name
, sizeof(vfsc
.vfc_name
)); 
 414                 vfsc
.vfc_typenum 
= vfsp
->vfc_typenum
; 
 415                 vfsc
.vfc_refcount 
= vfsp
->vfc_refcount
; 
 416                 vfsc
.vfc_flags 
= vfsp
->vfc_flags
; 
 417                 vfsc
.vfc_reserved2 
= 0; 
 418                 vfsc
.vfc_reserved3 
= 0; 
 420                 (*vfsp
->vfc_vfsops
->vfs_init
)(&vfsc
); 
 424         /* next vfc_typenum to be used */ 
 425         maxvfsconf 
= maxtypenum
; 
 428          * Initialize the vnop authorization scope. 
 430         vnode_authorize_init(); 
 433          * Initialiize the quota system. 
 440          * create a mount point for dead vnodes 
 442         MALLOC_ZONE(mp
, struct mount 
*, sizeof(struct mount
), 
 444         bzero((char *)mp
, sizeof(struct mount
)); 
 445         /* Initialize the default IO constraints */ 
 446         mp
->mnt_maxreadcnt 
= mp
->mnt_maxwritecnt 
= MAXPHYS
; 
 447         mp
->mnt_segreadcnt 
= mp
->mnt_segwritecnt 
= 32; 
 448         mp
->mnt_maxsegreadsize 
= mp
->mnt_maxreadcnt
; 
 449         mp
->mnt_maxsegwritesize 
= mp
->mnt_maxwritecnt
; 
 450         mp
->mnt_devblocksize 
= DEV_BSIZE
; 
 451         mp
->mnt_alignmentmask 
= PAGE_MASK
; 
 452         mp
->mnt_ioqueue_depth 
= MNT_DEFAULT_IOQUEUE_DEPTH
; 
 455         mp
->mnt_realrootvp 
= NULLVP
; 
 456         mp
->mnt_authcache_ttl 
= CACHED_LOOKUP_RIGHT_TTL
; 
 458         TAILQ_INIT(&mp
->mnt_vnodelist
); 
 459         TAILQ_INIT(&mp
->mnt_workerqueue
); 
 460         TAILQ_INIT(&mp
->mnt_newvnodes
); 
 461         mp
->mnt_flag 
= MNT_LOCAL
; 
 462         mp
->mnt_lflag 
= MNT_LDEAD
; 
 466         mac_mount_label_init(mp
); 
 467         mac_mount_label_associate(vfs_context_kernel(), mp
); 
 473 vnode_list_lock(void) 
 475         lck_spin_lock(vnode_list_spin_lock
); 
 479 vnode_list_unlock(void) 
 481         lck_spin_unlock(vnode_list_spin_lock
); 
 485 mount_list_lock(void) 
 487         lck_mtx_lock(mnt_list_mtx_lock
); 
 491 mount_list_unlock(void) 
 493         lck_mtx_unlock(mnt_list_mtx_lock
); 
 497 mount_lock_init(mount_t mp
) 
 499         lck_mtx_init(&mp
->mnt_mlock
, mnt_lck_grp
, mnt_lck_attr
); 
 500         lck_mtx_init(&mp
->mnt_renamelock
, mnt_lck_grp
, mnt_lck_attr
); 
 501         lck_rw_init(&mp
->mnt_rwlock
, mnt_lck_grp
, mnt_lck_attr
); 
 505 mount_lock_destroy(mount_t mp
) 
 507         lck_mtx_destroy(&mp
->mnt_mlock
, mnt_lck_grp
); 
 508         lck_mtx_destroy(&mp
->mnt_renamelock
, mnt_lck_grp
); 
 509         lck_rw_destroy(&mp
->mnt_rwlock
, mnt_lck_grp
); 
 516  * Description: Add a filesystem to the vfsconf list at the first 
 517  *              unused slot.  If no slots are available, return an 
 520  * Parameter:   nvfsp           vfsconf for VFS to add 
 525  * Notes:       The vfsconf should be treated as a linked list by 
 526  *              all external references, as the implementation is 
 527  *              expected to change in the future.  The linkage is 
 528  *              through ->vfc_next, and the list is NULL terminated. 
 530  * Warning:     This code assumes that vfsconf[0] is non-empty. 
 533 vfstable_add(struct vfstable  
*nvfsp
) 
 536         struct vfstable 
*slotp
, *allocated 
= NULL
; 
 539          * Find the next empty slot; we recognize an empty slot by a 
 540          * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must 
 541          * ensure we set the entry back to NULL. 
 545         for (slot 
= 0; slot 
< maxvfsslots
; slot
++) { 
 546                 if (vfsconf
[slot
].vfc_vfsops 
== NULL
) 
 549         if (slot 
== maxvfsslots
) { 
 550                 if (allocated 
== NULL
) { 
 552                         /* out of static slots; allocate one instead */ 
 553                         MALLOC(allocated
, struct vfstable 
*, sizeof(struct vfstable
), 
 561                 slotp 
= &vfsconf
[slot
]; 
 565          * Replace the contents of the next empty slot with the contents 
 566          * of the provided nvfsp. 
 568          * Note; Takes advantage of the fact that 'slot' was left 
 569          * with the value of 'maxvfslots' in the allocation case. 
 571         bcopy(nvfsp
, slotp
, sizeof(struct vfstable
)); 
 573                 slotp
->vfc_next 
= vfsconf
[slot 
- 1].vfc_next
; 
 574                 vfsconf
[slot 
- 1].vfc_next 
= slotp
; 
 576                 slotp
->vfc_next 
= NULL
; 
 582         if (allocated 
!= NULL
) { 
 583                 FREE(allocated
, M_TEMP
); 
 592  * Description: Remove a filesystem from the vfsconf list by name. 
 593  *              If no such filesystem exists, return an error. 
 595  * Parameter:   fs_name         name of VFS to remove 
 600  * Notes:       Hopefully all filesystems have unique names. 
 603 vfstable_del(struct vfstable  
* vtbl
) 
 605         struct vfstable 
**vcpp
; 
 606         struct vfstable 
*vcdelp
; 
 609         lck_mtx_assert(mnt_list_mtx_lock
, LCK_MTX_ASSERT_OWNED
); 
 613          * Traverse the list looking for vtbl; if found, *vcpp 
 614          * will contain the address of the pointer to the entry to 
 617         for( vcpp 
= &vfsconf
; *vcpp
; vcpp 
= &(*vcpp
)->vfc_next
) { 
 623            return(ESRCH
);       /* vtbl not on vfsconf list */ 
 627         *vcpp 
= (*vcpp
)->vfc_next
; 
 630          * Is this an entry from our static table?  We find out by 
 631          * seeing if the pointer to the object to be deleted places 
 632          * the object in the address space containing the table (or not). 
 634         if (vcdelp 
>= vfsconf 
&& vcdelp 
< (vfsconf 
+ maxvfsslots
)) {    /* Y */ 
 635                 /* Mark as empty for vfscon_add() */ 
 636                 bzero(vcdelp
, sizeof(struct vfstable
)); 
 640                  * This entry was dynamically allocated; we must free it; 
 641                  * we would prefer to have just linked the caller's 
 642                  * vfsconf onto our list, but it may not be persistent 
 643                  * because of the previous (copying) implementation. 
 646                 FREE(vcdelp
, M_TEMP
); 
 651         lck_mtx_assert(mnt_list_mtx_lock
, LCK_MTX_ASSERT_OWNED
); 
 660         lck_mtx_lock(spechash_mtx_lock
); 
 664 SPECHASH_UNLOCK(void) 
 666         lck_mtx_unlock(spechash_mtx_lock
);