]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/vfs/vfs_init.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_init.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed
34 * to Berkeley by John Heidemann of the UCLA Ficus project.
35 *
36 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_init.c 8.5 (Berkeley) 5/11/95
67 */
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75
76#include <sys/param.h>
77#include <sys/mount_internal.h>
78#include <sys/time.h>
79#include <sys/vm.h>
80#include <sys/vnode_internal.h>
81#include <sys/stat.h>
82#include <sys/namei.h>
83#include <sys/ucred.h>
84#include <sys/errno.h>
85#include <sys/malloc.h>
86
87#include <vfs/vfs_journal.h> /* journal_init() */
88#if CONFIG_MACF
89#include <security/mac_framework.h>
90#include <sys/kauth.h>
91#endif
92#if QUOTA
93#include <sys/quota.h>
94#endif
95
96/*
97 * Sigh, such primitive tools are these...
98 */
99#if 0
100#define DODEBUG(A) A
101#else
102#define DODEBUG(A)
103#endif
104
105__private_extern__ void vntblinit(void) __attribute__((section("__TEXT, initcode")));
106
107extern struct vnodeopv_desc *vfs_opv_descs[];
108 /* a list of lists of vnodeops defns */
109extern struct vnodeop_desc *vfs_op_descs[];
110 /* and the operations they perform */
111/*
112 * This code doesn't work if the defn is **vnodop_defns with cc.
113 * The problem is because of the compiler sometimes putting in an
114 * extra level of indirection for arrays. It's an interesting
115 * "feature" of C.
116 */
117int vfs_opv_numops;
118
119typedef int (*PFIvp)(void *);
120
121/*
122 * A miscellaneous routine.
123 * A generic "default" routine that just returns an error.
124 */
125int
126vn_default_error(void)
127{
128
129 return (ENOTSUP);
130}
131
132/*
133 * vfs_init.c
134 *
135 * Allocate and fill in operations vectors.
136 *
137 * An undocumented feature of this approach to defining operations is that
138 * there can be multiple entries in vfs_opv_descs for the same operations
139 * vector. This allows third parties to extend the set of operations
140 * supported by another layer in a binary compatibile way. For example,
141 * assume that NFS needed to be modified to support Ficus. NFS has an entry
142 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
143 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
144 * listing those new operations Ficus adds to NFS, all without modifying the
145 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
146 * that is a(whole)nother story.) This is a feature.
147 */
148void
149vfs_opv_init(void)
150{
151 int i, j, k;
152 int (***opv_desc_vector_p)(void *);
153 int (**opv_desc_vector)(void *);
154 struct vnodeopv_entry_desc *opve_descp;
155
156 /*
157 * Allocate the dynamic vectors and fill them in.
158 */
159 for (i=0; vfs_opv_descs[i]; i++) {
160 opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
161 /*
162 * Allocate and init the vector, if it needs it.
163 * Also handle backwards compatibility.
164 */
165 if (*opv_desc_vector_p == NULL) {
166 MALLOC(*opv_desc_vector_p, PFIvp*,
167 vfs_opv_numops*sizeof(PFIvp), M_TEMP, M_WAITOK);
168 bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFIvp));
169 DODEBUG(printf("vector at %x allocated\n",
170 opv_desc_vector_p));
171 }
172 opv_desc_vector = *opv_desc_vector_p;
173 for (j=0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
174 opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
175
176 /*
177 * Sanity check: is this operation listed
178 * in the list of operations? We check this
179 * by seeing if its offest is zero. Since
180 * the default routine should always be listed
181 * first, it should be the only one with a zero
182 * offset. Any other operation with a zero
183 * offset is probably not listed in
184 * vfs_op_descs, and so is probably an error.
185 *
186 * A panic here means the layer programmer
187 * has committed the all-too common bug
188 * of adding a new operation to the layer's
189 * list of vnode operations but
190 * not adding the operation to the system-wide
191 * list of supported operations.
192 */
193 if (opve_descp->opve_op->vdesc_offset == 0 &&
194 opve_descp->opve_op->vdesc_offset !=
195 VOFFSET(vnop_default)) {
196 printf("operation %s not listed in %s.\n",
197 opve_descp->opve_op->vdesc_name,
198 "vfs_op_descs");
199 panic ("vfs_opv_init: bad operation");
200 }
201 /*
202 * Fill in this entry.
203 */
204 opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
205 opve_descp->opve_impl;
206 }
207 }
208 /*
209 * Finally, go back and replace unfilled routines
210 * with their default. (Sigh, an O(n^3) algorithm. I
211 * could make it better, but that'd be work, and n is small.)
212 */
213 for (i = 0; vfs_opv_descs[i]; i++) {
214 opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
215 /*
216 * Force every operations vector to have a default routine.
217 */
218 if (opv_desc_vector[VOFFSET(vnop_default)]==NULL) {
219 panic("vfs_opv_init: operation vector without default routine.");
220 }
221 for (k = 0; k<vfs_opv_numops; k++)
222 if (opv_desc_vector[k] == NULL)
223 opv_desc_vector[k] =
224 opv_desc_vector[VOFFSET(vnop_default)];
225 }
226}
227
228/*
229 * Initialize known vnode operations vectors.
230 */
231void
232vfs_op_init(void)
233{
234 int i;
235
236 DODEBUG(printf("Vnode_interface_init.\n"));
237 /*
238 * Set all vnode vectors to a well known value.
239 */
240 for (i = 0; vfs_opv_descs[i]; i++)
241 *(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
242 /*
243 * Figure out how many ops there are by counting the table,
244 * and assign each its offset.
245 */
246 for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
247 vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
248 vfs_opv_numops++;
249 }
250 DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops));
251}
252
253/*
254 * Routines having to do with the management of the vnode table.
255 */
256extern struct vnodeops dead_vnodeops;
257extern struct vnodeops spec_vnodeops;
258
259/* vars for vnode lock */
260lck_grp_t * vnode_lck_grp;
261lck_grp_attr_t * vnode_lck_grp_attr;
262lck_attr_t * vnode_lck_attr;
263
264
265/* vars for vnode list lock */
266lck_grp_t * vnode_list_lck_grp;
267lck_grp_attr_t * vnode_list_lck_grp_attr;
268lck_attr_t * vnode_list_lck_attr;
269lck_spin_t * vnode_list_spin_lock;
270lck_mtx_t * spechash_mtx_lock;
271
272/* vars for vfsconf lock */
273lck_grp_t * fsconf_lck_grp;
274lck_grp_attr_t * fsconf_lck_grp_attr;
275lck_attr_t * fsconf_lck_attr;
276
277
278/* vars for mount lock */
279lck_grp_t * mnt_lck_grp;
280lck_grp_attr_t * mnt_lck_grp_attr;
281lck_attr_t * mnt_lck_attr;
282
283/* vars for mount list lock */
284lck_grp_t * mnt_list_lck_grp;
285lck_grp_attr_t * mnt_list_lck_grp_attr;
286lck_attr_t * mnt_list_lck_attr;
287lck_mtx_t * mnt_list_mtx_lock;
288
289lck_mtx_t *pkg_extensions_lck;
290
291struct mount * dead_mountp;
292/*
293 * Initialize the vnode structures and initialize each file system type.
294 */
295void
296vfsinit(void)
297{
298 struct vfstable *vfsp;
299 int i, maxtypenum;
300 struct mount * mp;
301
302 /* Allocate vnode list lock group attribute and group */
303 vnode_list_lck_grp_attr = lck_grp_attr_alloc_init();
304
305 vnode_list_lck_grp = lck_grp_alloc_init("vnode list", vnode_list_lck_grp_attr);
306
307 /* Allocate vnode list lock attribute */
308 vnode_list_lck_attr = lck_attr_alloc_init();
309
310 /* Allocate vnode list lock */
311 vnode_list_spin_lock = lck_spin_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
312
313 /* Allocate spec hash list lock */
314 spechash_mtx_lock = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
315
316 /* Allocate the package extensions table lock */
317 pkg_extensions_lck = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
318
319 /* allocate vnode lock group attribute and group */
320 vnode_lck_grp_attr= lck_grp_attr_alloc_init();
321
322 vnode_lck_grp = lck_grp_alloc_init("vnode", vnode_lck_grp_attr);
323
324 /* Allocate vnode lock attribute */
325 vnode_lck_attr = lck_attr_alloc_init();
326
327 /* Allocate fs config lock group attribute and group */
328 fsconf_lck_grp_attr= lck_grp_attr_alloc_init();
329
330 fsconf_lck_grp = lck_grp_alloc_init("fs conf", fsconf_lck_grp_attr);
331
332 /* Allocate fs config lock attribute */
333 fsconf_lck_attr = lck_attr_alloc_init();
334
335 /* Allocate mount point related lock structures */
336
337 /* Allocate mount list lock group attribute and group */
338 mnt_list_lck_grp_attr= lck_grp_attr_alloc_init();
339
340 mnt_list_lck_grp = lck_grp_alloc_init("mount list", mnt_list_lck_grp_attr);
341
342 /* Allocate mount list lock attribute */
343 mnt_list_lck_attr = lck_attr_alloc_init();
344
345 /* Allocate mount list lock */
346 mnt_list_mtx_lock = lck_mtx_alloc_init(mnt_list_lck_grp, mnt_list_lck_attr);
347
348
349 /* allocate mount lock group attribute and group */
350 mnt_lck_grp_attr= lck_grp_attr_alloc_init();
351
352 mnt_lck_grp = lck_grp_alloc_init("mount", mnt_lck_grp_attr);
353
354 /* Allocate mount lock attribute */
355 mnt_lck_attr = lck_attr_alloc_init();
356
357 /*
358 * Initialize the vnode table
359 */
360 vntblinit();
361 /*
362 * Initialize the filesystem event mechanism.
363 */
364 vfs_event_init();
365 /*
366 * Initialize the vnode name cache
367 */
368 nchinit();
369
370#if JOURNALING
371 /*
372 * Initialize the journaling locks
373 */
374 journal_init();
375#endif
376
377 /*
378 * Build vnode operation vectors.
379 */
380 vfs_op_init();
381 vfs_opv_init(); /* finish the job */
382 /*
383 * Initialize each file system type in the static list,
384 * until the first NULL ->vfs_vfsops is encountered.
385 */
386 numused_vfsslots = maxtypenum = 0;
387 for (vfsp = vfsconf, i = 0; i < maxvfsslots; i++, vfsp++) {
388 struct vfsconf vfsc;
389 if (vfsp->vfc_vfsops == (struct vfsops *)0)
390 break;
391 if (i) vfsconf[i-1].vfc_next = vfsp;
392 if (maxtypenum <= vfsp->vfc_typenum)
393 maxtypenum = vfsp->vfc_typenum + 1;
394
395 bzero(&vfsc, sizeof(struct vfsconf));
396 vfsc.vfc_reserved1 = 0;
397 bcopy(vfsp->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name));
398 vfsc.vfc_typenum = vfsp->vfc_typenum;
399 vfsc.vfc_refcount = vfsp->vfc_refcount;
400 vfsc.vfc_flags = vfsp->vfc_flags;
401 vfsc.vfc_reserved2 = 0;
402 vfsc.vfc_reserved3 = 0;
403
404 (*vfsp->vfc_vfsops->vfs_init)(&vfsc);
405
406 numused_vfsslots++;
407 }
408 /* next vfc_typenum to be used */
409 maxvfsconf = maxtypenum;
410
411 /*
412 * Initialize the vnop authorization scope.
413 */
414 vnode_authorize_init();
415
416 /*
417 * Initialiize the quota system.
418 */
419#if QUOTA
420 dqinit();
421#endif
422
423 /*
424 * create a mount point for dead vnodes
425 */
426 MALLOC_ZONE(mp, struct mount *, sizeof(struct mount),
427 M_MOUNT, M_WAITOK);
428 bzero((char *)mp, sizeof(struct mount));
429 /* Initialize the default IO constraints */
430 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
431 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
432 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
433 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
434 mp->mnt_devblocksize = DEV_BSIZE;
435 mp->mnt_alignmentmask = PAGE_MASK;
436 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
437 mp->mnt_ioscale = 1;
438 mp->mnt_ioflags = 0;
439 mp->mnt_realrootvp = NULLVP;
440 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
441
442 TAILQ_INIT(&mp->mnt_vnodelist);
443 TAILQ_INIT(&mp->mnt_workerqueue);
444 TAILQ_INIT(&mp->mnt_newvnodes);
445 mp->mnt_flag = MNT_LOCAL;
446 mp->mnt_lflag = MNT_LDEAD;
447 mount_lock_init(mp);
448
449#if CONFIG_MACF
450 mac_mount_label_init(mp);
451 mac_mount_label_associate(vfs_context_kernel(), mp);
452#endif
453 dead_mountp = mp;
454}
455
456void
457vnode_list_lock(void)
458{
459 lck_spin_lock(vnode_list_spin_lock);
460}
461
462void
463vnode_list_unlock(void)
464{
465 lck_spin_unlock(vnode_list_spin_lock);
466}
467
468void
469mount_list_lock(void)
470{
471 lck_mtx_lock(mnt_list_mtx_lock);
472}
473
474void
475mount_list_unlock(void)
476{
477 lck_mtx_unlock(mnt_list_mtx_lock);
478}
479
480void
481mount_lock_init(mount_t mp)
482{
483 lck_mtx_init(&mp->mnt_mlock, mnt_lck_grp, mnt_lck_attr);
484 lck_mtx_init(&mp->mnt_renamelock, mnt_lck_grp, mnt_lck_attr);
485 lck_rw_init(&mp->mnt_rwlock, mnt_lck_grp, mnt_lck_attr);
486}
487
488void
489mount_lock_destroy(mount_t mp)
490{
491 lck_mtx_destroy(&mp->mnt_mlock, mnt_lck_grp);
492 lck_mtx_destroy(&mp->mnt_renamelock, mnt_lck_grp);
493 lck_rw_destroy(&mp->mnt_rwlock, mnt_lck_grp);
494}
495
496
497/*
498 * Name: vfstable_add
499 *
500 * Description: Add a filesystem to the vfsconf list at the first
501 * unused slot. If no slots are available, return an
502 * error.
503 *
504 * Parameter: nvfsp vfsconf for VFS to add
505 *
506 * Returns: 0 Success
507 * -1 Failure
508 *
509 * Notes: The vfsconf should be treated as a linked list by
510 * all external references, as the implementation is
511 * expected to change in the future. The linkage is
512 * through ->vfc_next, and the list is NULL terminated.
513 *
514 * Warning: This code assumes that vfsconf[0] is non-empty.
515 */
516struct vfstable *
517vfstable_add(struct vfstable *nvfsp)
518{
519 int slot;
520 struct vfstable *slotp, *allocated = NULL;
521
522 /*
523 * Find the next empty slot; we recognize an empty slot by a
524 * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must
525 * ensure we set the entry back to NULL.
526 */
527findslot:
528 mount_list_lock();
529 for (slot = 0; slot < maxvfsslots; slot++) {
530 if (vfsconf[slot].vfc_vfsops == NULL)
531 break;
532 }
533 if (slot == maxvfsslots) {
534 if (allocated == NULL) {
535 mount_list_unlock();
536 /* out of static slots; allocate one instead */
537 MALLOC(allocated, struct vfstable *, sizeof(struct vfstable),
538 M_TEMP, M_WAITOK);
539 goto findslot;
540 } else {
541 slotp = allocated;
542 allocated = NULL;
543 }
544 } else {
545 slotp = &vfsconf[slot];
546 }
547
548 /*
549 * Replace the contents of the next empty slot with the contents
550 * of the provided nvfsp.
551 *
552 * Note; Takes advantage of the fact that 'slot' was left
553 * with the value of 'maxvfslots' in the allocation case.
554 */
555 bcopy(nvfsp, slotp, sizeof(struct vfstable));
556 if (slot != 0) {
557 slotp->vfc_next = vfsconf[slot - 1].vfc_next;
558 vfsconf[slot - 1].vfc_next = slotp;
559 } else {
560 slotp->vfc_next = NULL;
561 }
562 numused_vfsslots++;
563
564 mount_list_unlock();
565
566 if (allocated != NULL) {
567 FREE(allocated, M_TEMP);
568 }
569
570 return(slotp);
571}
572
573/*
574 * Name: vfstable_del
575 *
576 * Description: Remove a filesystem from the vfsconf list by name.
577 * If no such filesystem exists, return an error.
578 *
579 * Parameter: fs_name name of VFS to remove
580 *
581 * Returns: 0 Success
582 * -1 Failure
583 *
584 * Notes: Hopefully all filesystems have unique names.
585 */
586int
587vfstable_del(struct vfstable * vtbl)
588{
589 struct vfstable **vcpp;
590 struct vfstable *vcdelp;
591
592#if DEBUG
593 lck_mtx_assert(mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
594#endif /* DEBUG */
595
596 /*
597 * Traverse the list looking for vtbl; if found, *vcpp
598 * will contain the address of the pointer to the entry to
599 * be removed.
600 */
601 for( vcpp = &vfsconf; *vcpp; vcpp = &(*vcpp)->vfc_next) {
602 if (*vcpp == vtbl)
603 break;
604 }
605
606 if (*vcpp == NULL)
607 return(ESRCH); /* vtbl not on vfsconf list */
608
609 /* Unlink entry */
610 vcdelp = *vcpp;
611 *vcpp = (*vcpp)->vfc_next;
612
613 /*
614 * Is this an entry from our static table? We find out by
615 * seeing if the pointer to the object to be deleted places
616 * the object in the address space containing the table (or not).
617 */
618 if (vcdelp >= vfsconf && vcdelp < (vfsconf + maxvfsslots)) { /* Y */
619 /* Mark as empty for vfscon_add() */
620 bzero(vcdelp, sizeof(struct vfstable));
621 numused_vfsslots--;
622 } else { /* N */
623 /*
624 * This entry was dynamically allocated; we must free it;
625 * we would prefer to have just linked the caller's
626 * vfsconf onto our list, but it may not be persistent
627 * because of the previous (copying) implementation.
628 */
629 mount_list_unlock();
630 FREE(vcdelp, M_TEMP);
631 mount_list_lock();
632 }
633
634#if DEBUG
635 lck_mtx_assert(mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
636#endif /* DEBUG */
637
638 return(0);
639}
640
641void
642SPECHASH_LOCK(void)
643{
644 lck_mtx_lock(spechash_mtx_lock);
645}
646
647void
648SPECHASH_UNLOCK(void)
649{
650 lck_mtx_unlock(spechash_mtx_lock);
651}
652