bsd/vfs/vfs_init.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * This code is derived from software contributed
  34  * to Berkeley by John Heidemann of the UCLA Ficus project.
  35  *
  36  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)vfs_init.c  8.5 (Berkeley) 5/11/95
  67  */
  68
  69
  70 #include <sys/param.h>
  71 #include <sys/mount_internal.h>
  72 #include <sys/time.h>
  73 #include <sys/vm.h>
  74 #include <sys/vnode_internal.h>
  75 #include <sys/stat.h>
  76 #include <sys/namei.h>
  77 #include <sys/ucred.h>
  78 #include <sys/errno.h>
  79 #include <sys/malloc.h>
  80
  81
  82 /*
  83  * Sigh, such primitive tools are these...
  84  */
  85 #if 0
  86 #define DODEBUG(A) A
  87 #else
  88 #define DODEBUG(A)
  89 #endif
  90
  91 extern uid_t console_user;
  92 extern struct vnodeopv_desc *vfs_opv_descs[];
  93                                 /* a list of lists of vnodeops defns */
  94 extern struct vnodeop_desc *vfs_op_descs[];
  95                                 /* and the operations they perform */
  96 /*
  97  * This code doesn't work if the defn is **vnodop_defns with cc.
  98  * The problem is because of the compiler sometimes putting in an
  99  * extra level of indirection for arrays.  It's an interesting
 100  * "feature" of C.
 101  */
 102 int vfs_opv_numops;
 103
 104 typedef (*PFI)();   /* the standard Pointer to a Function returning an Int */
 105
 106 /*
 107  * A miscellaneous routine.
 108  * A generic "default" routine that just returns an error.
 109  */
 110 int
 111 vn_default_error()
 112 {
 113
 114         return (ENOTSUP);
 115 }
 116
 117 /*
 118  * vfs_init.c
 119  *
 120  * Allocate and fill in operations vectors.
 121  *
 122  * An undocumented feature of this approach to defining operations is that
 123  * there can be multiple entries in vfs_opv_descs for the same operations
 124  * vector. This allows third parties to extend the set of operations
 125  * supported by another layer in a binary compatibile way. For example,
 126  * assume that NFS needed to be modified to support Ficus. NFS has an entry
 127  * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
 128  * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
 129  * listing those new operations Ficus adds to NFS, all without modifying the
 130  * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
 131  * that is a(whole)nother story.) This is a feature.
 132  */
 133 void
 134 vfs_opv_init()
 135 {
 136         int i, j, k;
 137         int (***opv_desc_vector_p)(void *);
 138         int (**opv_desc_vector)(void *);
 139         struct vnodeopv_entry_desc *opve_descp;
 140
 141         /*
 142          * Allocate the dynamic vectors and fill them in.
 143          */
 144         for (i=0; vfs_opv_descs[i]; i++) {
 145                 opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
 146                 /*
 147                  * Allocate and init the vector, if it needs it.
 148                  * Also handle backwards compatibility.
 149                  */
 150                 if (*opv_desc_vector_p == NULL) {
 151                         MALLOC(*opv_desc_vector_p, PFI*,
 152                                vfs_opv_numops*sizeof(PFI), M_TEMP, M_WAITOK);
 153                         bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI));
 154                         DODEBUG(printf("vector at %x allocated\n",
 155                             opv_desc_vector_p));
 156                 }
 157                 opv_desc_vector = *opv_desc_vector_p;
 158                 for (j=0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
 159                         opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
 160
 161                         /*
 162                          * Sanity check:  is this operation listed
 163                          * in the list of operations?  We check this
 164                          * by seeing if its offest is zero.  Since
 165                          * the default routine should always be listed
 166                          * first, it should be the only one with a zero
 167                          * offset.  Any other operation with a zero
 168                          * offset is probably not listed in
 169                          * vfs_op_descs, and so is probably an error.
 170                          *
 171                          * A panic here means the layer programmer
 172                          * has committed the all-too common bug
 173                          * of adding a new operation to the layer's
 174                          * list of vnode operations but
 175                          * not adding the operation to the system-wide
 176                          * list of supported operations.
 177                          */
 178                         if (opve_descp->opve_op->vdesc_offset == 0 &&
 179                                     opve_descp->opve_op->vdesc_offset !=
 180                                         VOFFSET(vnop_default)) {
 181                                 printf("operation %s not listed in %s.\n",
 182                                     opve_descp->opve_op->vdesc_name,
 183                                     "vfs_op_descs");
 184                                 panic ("vfs_opv_init: bad operation");
 185                         }
 186                         /*
 187                          * Fill in this entry.
 188                          */
 189                         opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
 190                                         opve_descp->opve_impl;
 191                 }
 192         }
 193         /*
 194          * Finally, go back and replace unfilled routines
 195          * with their default.  (Sigh, an O(n^3) algorithm.  I
 196          * could make it better, but that'd be work, and n is small.)
 197          */
 198         for (i = 0; vfs_opv_descs[i]; i++) {
 199                 opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
 200                 /*
 201                  * Force every operations vector to have a default routine.
 202                  */
 203                 if (opv_desc_vector[VOFFSET(vnop_default)]==NULL) {
 204                         panic("vfs_opv_init: operation vector without default routine.");
 205                 }
 206                 for (k = 0; k<vfs_opv_numops; k++)
 207                         if (opv_desc_vector[k] == NULL)
 208                                 opv_desc_vector[k] =
 209                                         opv_desc_vector[VOFFSET(vnop_default)];
 210         }
 211 }
 212
 213 /*
 214  * Initialize known vnode operations vectors.
 215  */
 216 void
 217 vfs_op_init()
 218 {
 219         int i;
 220
 221         DODEBUG(printf("Vnode_interface_init.\n"));
 222         /*
 223          * Set all vnode vectors to a well known value.
 224          */
 225         for (i = 0; vfs_opv_descs[i]; i++)
 226                 *(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
 227         /*
 228          * Figure out how many ops there are by counting the table,
 229          * and assign each its offset.
 230          */
 231         for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
 232                 vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
 233                 vfs_opv_numops++;
 234         }
 235         DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops));
 236 }
 237
 238 /*
 239  * Routines having to do with the management of the vnode table.
 240  */
 241 extern struct vnodeops dead_vnodeops;
 242 extern struct vnodeops spec_vnodeops;
 243
 244 /* vars for vnode lock */
 245 lck_grp_t * vnode_lck_grp;
 246 lck_grp_attr_t * vnode_lck_grp_attr;
 247 lck_attr_t * vnode_lck_attr;
 248
 249
 250 /* vars for vnode list lock */
 251 lck_grp_t * vnode_list_lck_grp;
 252 lck_grp_attr_t * vnode_list_lck_grp_attr;
 253 lck_attr_t * vnode_list_lck_attr;
 254 lck_mtx_t * vnode_list_mtx_lock;
 255 lck_mtx_t * spechash_mtx_lock;
 256 /* Routine to lock and unlock the  vnode lists */
 257 void vnode_list_lock(void);
 258 void vnode_list_unlock(void);
 259
 260 /* vars for vfsconf lock */
 261 lck_grp_t * fsconf_lck_grp;
 262 lck_grp_attr_t * fsconf_lck_grp_attr;
 263 lck_attr_t * fsconf_lck_attr;
 264
 265
 266 /* vars for mount lock */
 267 lck_grp_t * mnt_lck_grp;
 268 lck_grp_attr_t * mnt_lck_grp_attr;
 269 lck_attr_t * mnt_lck_attr;
 270
 271 /* vars for mount list lock */
 272 lck_grp_t * mnt_list_lck_grp;
 273 lck_grp_attr_t * mnt_list_lck_grp_attr;
 274 lck_attr_t * mnt_list_lck_attr;
 275 lck_mtx_t * mnt_list_mtx_lock;
 276
 277 extern void journal_init();
 278
 279 struct mount * dead_mountp;
 280 /*
 281  * Initialize the vnode structures and initialize each file system type.
 282  */
 283 void
 284 vfsinit()
 285 {
 286         struct vfstable *vfsp;
 287         int i, maxtypenum;
 288         struct mount * mp;
 289
 290         /* Allocate vnode list lock group attribute and group */
 291         vnode_list_lck_grp_attr = lck_grp_attr_alloc_init();
 292
 293         vnode_list_lck_grp = lck_grp_alloc_init("vnode list",  vnode_list_lck_grp_attr);
 294
 295         /* Allocate vnode list lock attribute */
 296         vnode_list_lck_attr = lck_attr_alloc_init();
 297
 298         /* Allocate vnode list lock */
 299         vnode_list_mtx_lock = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
 300
 301         /* Allocate spec hash list lock */
 302         spechash_mtx_lock = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
 303
 304         /* allocate vnode lock group attribute and group */
 305         vnode_lck_grp_attr= lck_grp_attr_alloc_init();
 306
 307         vnode_lck_grp = lck_grp_alloc_init("vnode",  vnode_lck_grp_attr);
 308
 309         /* Allocate vnode lock attribute */
 310         vnode_lck_attr = lck_attr_alloc_init();
 311
 312         /* Allocate fs config lock group attribute and group */
 313         fsconf_lck_grp_attr= lck_grp_attr_alloc_init();
 314
 315         fsconf_lck_grp = lck_grp_alloc_init("fs conf",  fsconf_lck_grp_attr);
 316
 317         /* Allocate fs config lock attribute */
 318         fsconf_lck_attr = lck_attr_alloc_init();
 319
 320         /* Allocate mount point related lock structures  */
 321
 322         /* Allocate mount list lock group attribute and group */
 323         mnt_list_lck_grp_attr= lck_grp_attr_alloc_init();
 324
 325         mnt_list_lck_grp = lck_grp_alloc_init("mount list",  mnt_list_lck_grp_attr);
 326
 327         /* Allocate mount list lock attribute */
 328         mnt_list_lck_attr = lck_attr_alloc_init();
 329
 330         /* Allocate mount list lock */
 331         mnt_list_mtx_lock = lck_mtx_alloc_init(mnt_list_lck_grp, mnt_list_lck_attr);
 332
 333
 334         /* allocate mount lock group attribute and group */
 335         mnt_lck_grp_attr= lck_grp_attr_alloc_init();
 336
 337         mnt_lck_grp = lck_grp_alloc_init("mount",  mnt_lck_grp_attr);
 338
 339         /* Allocate mount lock attribute */
 340         mnt_lck_attr = lck_attr_alloc_init();
 341
 342         /*
 343          * Initialize the "console user" for access purposes:
 344          */
 345         console_user = (uid_t)0;
 346
 347         /*
 348          * Initialize the vnode table
 349          */
 350         vntblinit();
 351         /*
 352          * Initialize the filesystem event mechanism.
 353          */
 354         vfs_event_init();
 355         /*
 356          * Initialize the vnode name cache
 357          */
 358         nchinit();
 359         /*
 360          * Initialize the journaling locks
 361          */
 362         journal_init();
 363         /*
 364          * Build vnode operation vectors.
 365          */
 366         vfs_op_init();
 367         vfs_opv_init();   /* finish the job */
 368         /*
 369          * Initialize each file system type in the static list,
 370          * until the first NULL ->vfs_vfsops is encountered.
 371          */
 372         numused_vfsslots = maxtypenum = 0;
 373         for (vfsp = vfsconf, i = 0; i < maxvfsconf; i++, vfsp++) {
 374                 if (vfsp->vfc_vfsops == (struct vfsops *)0)
 375                         break;
 376                 if (i) vfsconf[i-1].vfc_next = vfsp;
 377                 if (maxtypenum <= vfsp->vfc_typenum)
 378                         maxtypenum = vfsp->vfc_typenum + 1;
 379                 (*vfsp->vfc_vfsops->vfs_init)(vfsp);
 380
 381                 lck_mtx_init(&vfsp->vfc_lock, fsconf_lck_grp, fsconf_lck_attr);
 382
 383                 numused_vfsslots++;
 384         }
 385         /* next vfc_typenum to be used */
 386         maxvfsconf = maxtypenum;
 387
 388         /*
 389          * Initialize the vnop authorization scope.
 390          */
 391         vnode_authorize_init();
 392
 393         /*
 394          * create a mount point for dead vnodes
 395          */
 396         MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount),
 397                 M_MOUNT, M_WAITOK);
 398         bzero((char *)mp, (u_long)sizeof(struct mount));
 399         /* Initialize the default IO constraints */
 400         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 401         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 402         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 403         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 404         mp->mnt_devblocksize = DEV_BSIZE;
 405
 406         TAILQ_INIT(&mp->mnt_vnodelist);
 407         TAILQ_INIT(&mp->mnt_workerqueue);
 408         TAILQ_INIT(&mp->mnt_newvnodes);
 409         mp->mnt_flag = MNT_LOCAL;
 410         mp->mnt_lflag = MNT_LDEAD;
 411         mount_lock_init(mp);
 412         dead_mountp = mp;
 413 }
 414
 415 void
 416 vnode_list_lock()
 417 {
 418         lck_mtx_lock(vnode_list_mtx_lock);
 419 }
 420
 421 void
 422 vnode_list_unlock()
 423 {
 424         lck_mtx_unlock(vnode_list_mtx_lock);
 425 }
 426
 427 void
 428 mount_list_lock()
 429 {
 430         lck_mtx_lock(mnt_list_mtx_lock);
 431 }
 432
 433 void
 434 mount_list_unlock()
 435 {
 436         lck_mtx_unlock(mnt_list_mtx_lock);
 437 }
 438
 439 void
 440 mount_lock_init(mount_t mp)
 441 {
 442         lck_mtx_init(&mp->mnt_mlock, mnt_lck_grp, mnt_lck_attr);
 443         lck_mtx_init(&mp->mnt_renamelock, mnt_lck_grp, mnt_lck_attr);
 444         lck_rw_init(&mp->mnt_rwlock, mnt_lck_grp, mnt_lck_attr);
 445 }
 446
 447 void
 448 mount_lock_destroy(mount_t mp)
 449 {
 450         lck_mtx_destroy(&mp->mnt_mlock, mnt_lck_grp);
 451         lck_mtx_destroy(&mp->mnt_renamelock, mnt_lck_grp);
 452         lck_rw_destroy(&mp->mnt_rwlock, mnt_lck_grp);
 453 }
 454
 455
 456 /*
 457  * Name:        vfstable_add
 458  *
 459  * Description: Add a filesystem to the vfsconf list at the first
 460  *              unused slot.  If no slots are available, return an
 461  *              error.
 462  *
 463  * Parameter:   nvfsp           vfsconf for VFS to add
 464  *
 465  * Returns:     0               Success
 466  *              -1              Failure
 467  *
 468  * Notes:       The vfsconf should be treated as a linked list by
 469  *              all external references, as the implementation is
 470  *              expected to change in the future.  The linkage is
 471  *              through ->vfc_next, and the list is NULL terminated.
 472  *
 473  * Warning:     This code assumes that vfsconf[0] is non-empty.
 474  */
 475 struct vfstable *
 476 vfstable_add(struct vfstable  *nvfsp)
 477 {
 478         int slot;
 479         struct vfstable *slotp;
 480
 481         /*
 482          * Find the next empty slot; we recognize an empty slot by a
 483          * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must
 484          * ensure we set the entry back to NULL.
 485          */
 486         for (slot = 0; slot < maxvfsslots; slot++) {
 487                 if (vfsconf[slot].vfc_vfsops == NULL)
 488                         break;
 489         }
 490         if (slot == maxvfsslots) {
 491                 /* out of static slots; allocate one instead */
 492                 MALLOC(slotp, struct vfstable *, sizeof(struct vfstable),
 493                                                         M_TEMP, M_WAITOK);
 494         } else {
 495                 slotp = &vfsconf[slot];
 496         }
 497
 498         /*
 499          * Replace the contents of the next empty slot with the contents
 500          * of the provided nvfsp.
 501          *
 502          * Note; Takes advantage of the fact that 'slot' was left
 503          * with the value of 'maxvfslots' in the allocation case.
 504          */
 505         bcopy(nvfsp, slotp, sizeof(struct vfstable));
 506         lck_mtx_init(&slotp->vfc_lock, fsconf_lck_grp, fsconf_lck_attr);
 507         if (slot != 0) {
 508                 slotp->vfc_next = vfsconf[slot - 1].vfc_next;
 509                 vfsconf[slot - 1].vfc_next = slotp;
 510         } else {
 511                 slotp->vfc_next = NULL;
 512         }
 513         numused_vfsslots++;
 514
 515         return(slotp);
 516 }
 517
 518 /*
 519  * Name:        vfstable_del
 520  *
 521  * Description: Remove a filesystem from the vfsconf list by name.
 522  *              If no such filesystem exists, return an error.
 523  *
 524  * Parameter:   fs_name         name of VFS to remove
 525  *
 526  * Returns:     0               Success
 527  *              -1              Failure
 528  *
 529  * Notes:       Hopefully all filesystems have unique names.
 530  */
 531 int
 532 vfstable_del(struct vfstable  * vtbl)
 533 {
 534         struct vfstable **vcpp;
 535         struct vfstable *vcdelp;
 536
 537         /*
 538          * Traverse the list looking for vtbl; if found, *vcpp
 539          * will contain the address of the pointer to the entry to
 540          * be removed.
 541          */
 542         for( vcpp = &vfsconf; *vcpp; vcpp = &(*vcpp)->vfc_next) {
 543                 if (*vcpp == vtbl)
 544             break;
 545         }
 546
 547         if (*vcpp == NULL)
 548            return(ESRCH);       /* vtbl not on vfsconf list */
 549
 550         /* Unlink entry */
 551         vcdelp = *vcpp;
 552         *vcpp = (*vcpp)->vfc_next;
 553
 554         lck_mtx_destroy(&vcdelp->vfc_lock, fsconf_lck_grp);
 555
 556         /*
 557          * Is this an entry from our static table?  We find out by
 558          * seeing if the pointer to the object to be deleted places
 559          * the object in the address space containing the table (or not).
 560          */
 561         if (vcdelp >= vfsconf && vcdelp < (vfsconf + maxvfsslots)) {    /* Y */
 562                 /* Mark as empty for vfscon_add() */
 563                 bzero(vcdelp, sizeof(struct vfstable));
 564                 numused_vfsslots--;
 565         } else {                                                        /* N */
 566                 /*
 567                  * This entry was dynamically allocated; we must free it;
 568                  * we would prefer to have just linked the caller's
 569                  * vfsconf onto our list, but it may not be persistent
 570                  * because of the previous (copying) implementation.
 571                  */
 572                  FREE(vcdelp, M_TEMP);
 573         }
 574
 575         return(0);
 576 }
 577
 578 void
 579 SPECHASH_LOCK(void)
 580 {
 581         lck_mtx_lock(spechash_mtx_lock);
 582 }
 583
 584 void
 585 SPECHASH_UNLOCK(void)
 586 {
 587         lck_mtx_unlock(spechash_mtx_lock);
 588 }
 589