bsd/miscfs/nullfs/null_vnops.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  31 /*
  32  * Copyright (c) 1992, 1993
  33  *      The Regents of the University of California.  All rights reserved.
  34  *
  35  * This code is derived from software contributed to Berkeley by
  36  * John Heidemann of the UCLA Ficus project.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)null_vnops.c        8.6 (Berkeley) 5/27/95
  67  *
  68  * Ancestors:
  69  *      @(#)lofs_vnops.c        1.2 (Berkeley) 6/18/92
  70  *      ...and...
  71  *      @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
  72  */
  73
  74 /*
  75  * Null Layer
  76  *
  77  * (See mount_null(8) for more information.)
  78  *
  79  * The null layer duplicates a portion of the file system
  80  * name space under a new name.  In this respect, it is
  81  * similar to the loopback file system.  It differs from
  82  * the loopback fs in two respects:  it is implemented using
  83  * a stackable layers techniques, and it's "null-node"s stack above
  84  * all lower-layer vnodes, not just over directory vnodes.
  85  *
  86  * The null layer has two purposes.  First, it serves as a demonstration
  87  * of layering by proving a layer which does nothing.  (It actually
  88  * does everything the loopback file system does, which is slightly
  89  * more than nothing.)  Second, the null layer can serve as a prototype
  90  * layer.  Since it provides all necessary layer framework,
  91  * new file system layers can be created very easily be starting
  92  * with a null layer.
  93  *
  94  * The remainder of this man page examines the null layer as a basis
  95  * for constructing new layers.
  96  *
  97  *
  98  * INSTANTIATING NEW NULL LAYERS
  99  *
 100  * New null layers are created with mount_null(8).
 101  * Mount_null(8) takes two arguments, the pathname
 102  * of the lower vfs (target-pn) and the pathname where the null
 103  * layer will appear in the namespace (alias-pn).  After
 104  * the null layer is put into place, the contents
 105  * of target-pn subtree will be aliased under alias-pn.
 106  *
 107  *
 108  * OPERATION OF A NULL LAYER
 109  *
 110  * The null layer is the minimum file system layer,
 111  * simply bypassing all possible operations to the lower layer
 112  * for processing there.  The majority of its activity centers
 113  * on the bypass routine, though which nearly all vnode operations
 114  * pass.
 115  *
 116  * The bypass routine accepts arbitrary vnode operations for
 117  * handling by the lower layer.  It begins by examing vnode
 118  * operation arguments and replacing any null-nodes by their
 119  * lower-layer equivlants.  It then invokes the operation
 120  * on the lower layer.  Finally, it replaces the null-nodes
 121  * in the arguments and, if a vnode is return by the operation,
 122  * stacks a null-node on top of the returned vnode.
 123  *
 124  * Although bypass handles most operations, vnop_getattr, vnop_lock,
 125  * vnop_unlock, vnop_inactive, vnop_reclaim, and vnop_print are not
 126  * bypassed. Vop_getattr must change the fsid being returned.
 127  * Vop_lock and vnop_unlock must handle any locking for the
 128  * current vnode as well as pass the lock request down.
 129  * Vop_inactive and vnop_reclaim are not bypassed so that
 130  * they can handle freeing null-layer specific data. Vop_print
 131  * is not bypassed to avoid excessive debugging information.
 132  * Also, certain vnode operations change the locking state within
 133  * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
 134  * and symlink). Ideally these operations should not change the
 135  * lock state, but should be changed to let the caller of the
 136  * function unlock them. Otherwise all intermediate vnode layers
 137  * (such as union, umapfs, etc) must catch these functions to do
 138  * the necessary locking at their layer.
 139  *
 140  *
 141  * INSTANTIATING VNODE STACKS
 142  *
 143  * Mounting associates the null layer with a lower layer,
 144  * effect stacking two VFSes.  Vnode stacks are instead
 145  * created on demand as files are accessed.
 146  *
 147  * The initial mount creates a single vnode stack for the
 148  * root of the new null layer.  All other vnode stacks
 149  * are created as a result of vnode operations on
 150  * this or other null vnode stacks.
 151  *
 152  * New vnode stacks come into existance as a result of
 153  * an operation which returns a vnode.
 154  * The bypass routine stacks a null-node above the new
 155  * vnode before returning it to the caller.
 156  *
 157  * For example, imagine mounting a null layer with
 158  * "mount_null /usr/include /dev/layer/null".
 159  * Changing directory to /dev/layer/null will assign
 160  * the root null-node (which was created when the null layer was mounted).
 161  * Now consider opening "sys".  A vnop_lookup would be
 162  * done on the root null-node.  This operation would bypass through
 163  * to the lower layer which would return a vnode representing
 164  * the UFS "sys".  Null_bypass then builds a null-node
 165  * aliasing the UFS "sys" and returns this to the caller.
 166  * Later operations on the null-node "sys" will repeat this
 167  * process when constructing other vnode stacks.
 168  *
 169  *
 170  * CREATING OTHER FILE SYSTEM LAYERS
 171  *
 172  * One of the easiest ways to construct new file system layers is to make
 173  * a copy of the null layer, rename all files and variables, and
 174  * then begin modifing the copy.  Sed can be used to easily rename
 175  * all variables.
 176  *
 177  * The umap layer is an example of a layer descended from the
 178  * null layer.
 179  *
 180  *
 181  * INVOKING OPERATIONS ON LOWER LAYERS
 182  *
 183  * There are two techniques to invoke operations on a lower layer
 184  * when the operation cannot be completely bypassed.  Each method
 185  * is appropriate in different situations.  In both cases,
 186  * it is the responsibility of the aliasing layer to make
 187  * the operation arguments "correct" for the lower layer
 188  * by mapping an vnode arguments to the lower layer.
 189  *
 190  * The first approach is to call the aliasing layer's bypass routine.
 191  * This method is most suitable when you wish to invoke the operation
 192  * currently being hanldled on the lower layer.  It has the advantage
 193  * that the bypass routine already must do argument mapping.
 194  * An example of this is null_getattrs in the null layer.
 195  *
 196  * A second approach is to directly invoked vnode operations on
 197  * the lower layer with the VOP_OPERATIONNAME interface.
 198  * The advantage of this method is that it is easy to invoke
 199  * arbitrary operations on the lower layer.  The disadvantage
 200  * is that vnodes arguments must be manualy mapped.
 201  *
 202  */
 203
 204 #include <sys/param.h>
 205 #include <sys/systm.h>
 206 #include <sys/proc.h>
 207 #include <sys/kauth.h>
 208 #include <sys/time.h>
 209 #include <sys/types.h>
 210 #include <sys/vnode.h>
 211 #include <sys/mount_internal.h>
 212 #include <sys/namei.h>
 213 #include <sys/malloc.h>
 214 #include <sys/buf.h>
 215 #include <miscfs/nullfs/null.h>
 216
 217
 218 int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
 219
 220 /*
 221  * This is the 10-Apr-92 bypass routine.
 222  *    This version has been optimized for speed, throwing away some
 223  * safety checks.  It should still always work, but it's not as
 224  * robust to programmer errors.
 225  *    Define SAFETY to include some error checking code.
 226  *
 227  * In general, we map all vnodes going down and unmap them on the way back.
 228  * As an exception to this, vnodes can be marked "unmapped" by setting
 229  * the Nth bit in operation's vdesc_flags.
 230  *
 231  * Also, some BSD vnode operations have the side effect of node_put'ing
 232  * their arguments.  With stacking, the reference counts are held
 233  * by the upper node, not the lower one, so we must handle these
 234  * side-effects here.  This is not of concern in Sun-derived systems
 235  * since there are no such side-effects.
 236  *
 237  * This makes the following assumptions:
 238  * - only one returned vpp
 239  * - no INOUT vpp's (Sun's vnop_open has one of these)
 240  * - the vnode operation vector of the first vnode should be used
 241  *   to determine what implementation of the op should be invoked
 242  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
 243  *   problems on rmdir'ing mount points and renaming?)
 244  */
 245 int
 246 null_bypass(ap)
 247         struct vnop_generic_args /* {
 248                 struct vnodeop_desc *a_desc;
 249                 <other random data follows, presumably>
 250         } */ *ap;
 251 {
 252         extern int (**null_vnodeop_p)(void *);  /* not extern, really "forward" */
 253         register struct vnode **this_vp_p;
 254         int error;
 255         struct vnode *old_vps[VDESC_MAX_VPS];
 256         struct vnode **vps_p[VDESC_MAX_VPS];
 257         struct vnode ***vppp;
 258         struct vnodeop_desc *descp = ap->a_desc;
 259         int reles, i;
 260
 261         if (null_bug_bypass)
 262                 printf ("null_bypass: %s\n", descp->vdesc_name);
 263
 264 #ifdef SAFETY
 265         /*
 266          * We require at least one vp.
 267          */
 268         if (descp->vdesc_vp_offsets == NULL ||
 269             descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
 270                 panic ("null_bypass: no vp's in map.\n");
 271 #endif
 272
 273         /*
 274          * Map the vnodes going in.
 275          * Later, we'll invoke the operation based on
 276          * the first mapped vnode's operation vector.
 277          */
 278         reles = descp->vdesc_flags;
 279         for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
 280                 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
 281                         break;   /* bail out at end of list */
 282                 vps_p[i] = this_vp_p =
 283                         VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
 284                 /*
 285                  * We're not guaranteed that any but the first vnode
 286                  * are of our type.  Check for and don't map any
 287                  * that aren't.  (We must always map first vp or vclean fails.)
 288                  */
 289                 if (i && (*this_vp_p == NULL ||
 290                     (*this_vp_p)->v_op != null_vnodeop_p)) {
 291                         old_vps[i] = NULL;
 292                 } else {
 293                         old_vps[i] = *this_vp_p;
 294                         *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
 295                         /*
 296                          * XXX - Several operations have the side effect
 297                          * of vnode_put'ing their vp's.  We must account for
 298                          * that.  (This should go away in the future.)
 299                          */
 300                         if (reles & 1)
 301                                 vnode_get(*this_vp_p);
 302                 }
 303
 304         }
 305
 306         /*
 307          * Call the operation on the lower layer
 308          * with the modified argument structure.
 309          */
 310         error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
 311
 312         /*
 313          * Maintain the illusion of call-by-value
 314          * by restoring vnodes in the argument structure
 315          * to their original value.
 316          */
 317         reles = descp->vdesc_flags;
 318         for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
 319                 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
 320                         break;   /* bail out at end of list */
 321                 if (old_vps[i]) {
 322                         *(vps_p[i]) = old_vps[i];
 323                         if (reles & 1)
 324                                 vnode_put(*(vps_p[i]));
 325                 }
 326         }
 327
 328         /*
 329          * Map the possible out-going vpp
 330          * (Assumes that the lower layer always returns
 331          * a vnode_get'ed vpp unless it gets an error.)
 332          */
 333         if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
 334             !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
 335             !error) {
 336                 /*
 337                  * XXX - even though some ops have vpp returned vp's,
 338                  * several ops actually vnode_put this before returning.
 339                  * We must avoid these ops.
 340                  * (This should go away when these ops are regularized.)
 341                  */
 342                 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
 343                         goto out;
 344                 vppp = VOPARG_OFFSETTO(struct vnode***,
 345                                  descp->vdesc_vpp_offset,ap);
 346                 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
 347         }
 348
 349  out:
 350         return (error);
 351 }
 352
 353 /*
 354  * We have to carry on the locking protocol on the null layer vnodes
 355  * as we progress through the tree. We also have to enforce read-only
 356  * if this layer is mounted read-only.
 357  */
 358 null_lookup(ap)
 359         struct vnop_lookup_args /* {
 360                 struct vnode * a_dvp;
 361                 struct vnode ** a_vpp;
 362                 struct componentname * a_cnp;
 363                 vfs_context_t a_context;
 364         } */ *ap;
 365 {
 366         struct componentname *cnp = ap->a_cnp;
 367         struct proc *p = cnp->cn_proc;
 368         int flags = cnp->cn_flags;
 369         struct vnode *dvp, *vp;
 370         int error;
 371
 372         error = null_bypass(ap);
 373
 374         /*
 375          * We must do the same locking and unlocking at this layer as
 376          * is done in the layers below us. We could figure this out
 377          * based on the error return and the LASTCN, LOCKPARENT, and
 378          * LOCKLEAF flags. However, it is more expidient to just find
 379          * out the state of the lower level vnodes and set ours to the
 380          * same state.
 381          */
 382         dvp = ap->a_dvp;
 383         vp = *ap->a_vpp;
 384         if (dvp == vp)
 385                 return (error);
 386         return (error);
 387 }
 388
 389 /*
 390  * Setattr call.
 391  */
 392 int
 393 null_setattr(
 394         struct vnop_setattr_args /* {
 395                 struct vnodeop_desc *a_desc;
 396                 struct vnode *a_vp;
 397                 struct vnode_attr *a_vap;
 398                 kauth_cred_t a_cred;
 399                 struct proc *a_p;
 400         } */ *ap)
 401 {
 402         struct vnode *vp = ap->a_vp;
 403         struct vnode_attr *vap = ap->a_vap;
 404
 405         if (VATTR_IS_ACTIVE(vap, va_data_size)) {
 406                 switch (vp->v_type) {
 407                 case VDIR:
 408                         return (EISDIR);
 409                 case VCHR:
 410                 case VBLK:
 411                 case VSOCK:
 412                 case VFIFO:
 413                         return (0);
 414                 case VREG:
 415                 case VLNK:
 416                 default:
 417                 }
 418         }
 419         return (null_bypass(ap));
 420 }
 421
 422 /*
 423  *  We handle getattr only to change the fsid.
 424  */
 425 int
 426 null_getattr(ap)
 427         struct vnop_getattr_args /* {
 428                 struct vnode *a_vp;
 429                 struct vnode_attr *a_vap;
 430                 vfs_context_t a_context;
 431         } */ *ap;
 432 {
 433         int error;
 434
 435         if (error = null_bypass(ap))
 436                 return (error);
 437         /* Requires that arguments be restored. */
 438         VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]);
 439         return (0);
 440 }
 441
 442 int
 443 null_access(ap)
 444         struct vnop_access_args /* {
 445                 struct vnode *a_vp;
 446                 int  a_action;
 447                 vfs_context_t a_context;
 448         } */ *ap;
 449 {
 450         return (null_bypass(ap));
 451 }
 452
 453 int
 454 null_inactive(ap)
 455         struct vnop_inactive_args /* {
 456                 struct vnode *a_vp;
 457                 vfs_context_t a_context;
 458         } */ *ap;
 459 {
 460         /*
 461          * Do nothing (and _don't_ bypass).
 462          * Wait to vnode_put lowervp until reclaim,
 463          * so that until then our null_node is in the
 464          * cache and reusable.
 465          *
 466          * NEEDSWORK: Someday, consider inactive'ing
 467          * the lowervp and then trying to reactivate it
 468          * with capabilities (v_id)
 469          * like they do in the name lookup cache code.
 470          * That's too much work for now.
 471          */
 472         return (0);
 473 }
 474
 475 int
 476 null_reclaim(ap)
 477         struct vnop_reclaim_args /* {
 478                 struct vnode *a_vp;
 479                 vfs_context_t a_context;
 480         } */ *ap;
 481 {
 482         struct vnode *vp = ap->a_vp;
 483         struct null_node *xp = VTONULL(vp);
 484         struct vnode *lowervp = xp->null_lowervp;
 485
 486         /*
 487          * Note: in vnop_reclaim, vp->v_op == dead_vnodeop_p,
 488          * so we can't call VOPs on ourself.
 489          */
 490         /* After this assignment, this node will not be re-used. */
 491         xp->null_lowervp = NULL;
 492         LIST_REMOVE(xp, null_hash);
 493         FREE(vp->v_data, M_TEMP);
 494         vp->v_data = NULL;
 495         vnode_put (lowervp);
 496         return (0);
 497 }
 498
 499 /*
 500  * XXX - vnop_strategy must be hand coded because it has no
 501  * vnode in its arguments.
 502  * This goes away with a merged VM/buffer cache.
 503  */
 504 int
 505 null_strategy(ap)
 506         struct vnop_strategy_args /* {
 507                 struct buf *a_bp;
 508         } */ *ap;
 509 {
 510         struct buf *bp = ap->a_bp;
 511         int error;
 512         struct vnode *savedvp;
 513
 514         savedvp = vnode(bp);
 515         buf_setvnode(bp, NULLVPTOLOWERVP(savedvp));
 516
 517         error = VNOP_STRATEGY(bp);
 518
 519         buf_setvnode(bp, savedvp);
 520
 521         return (error);
 522 }
 523
 524 /*
 525  * XXX - like vnop_strategy, vnop_bwrite must be hand coded because it has no
 526  * vnode in its arguments.
 527  * This goes away with a merged VM/buffer cache.
 528  */
 529 int
 530 null_bwrite(ap)
 531         struct vnop_bwrite_args /* {
 532                 struct buf *a_bp;
 533         } */ *ap;
 534 {
 535         struct buf *bp = ap->a_bp;
 536         int error;
 537         struct vnode *savedvp;
 538
 539         savedvp = buf_vnode(bp);
 540         buf_setvnode(bp, NULLVPTOLOWERVP(savedvp));
 541
 542         error = VNOP_BWRITE(bp);
 543
 544         buf_setvnode(bp, savedvp);
 545
 546         return (error);
 547 }
 548
 549 /*
 550  * Global vfs data structures
 551  */
 552
 553 #define VOPFUNC int (*)(void *)
 554
 555 int (**null_vnodeop_p)(void *);
 556 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
 557         { &vnop_default_desc, (VOPFUNC)null_bypass },
 558
 559         { &vnop_lookup_desc, (VOPFUNC)null_lookup },
 560         { &vnop_setattr_desc, (VOPFUNC)null_setattr },
 561         { &vnop_getattr_desc, (VOPFUNC)null_getattr },
 562         { &vnop_access_desc, (VOPFUNC)null_access },
 563         { &vnop_inactive_desc, (VOPFUNC)null_inactive },
 564         { &vnop_reclaim_desc, (VOPFUNC)null_reclaim },
 565
 566         { &vnop_strategy_desc, (VOPFUNC)null_strategy },
 567         { &vnop_bwrite_desc, (VOPFUNC)null_bwrite },
 568
 569         { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 570 };
 571 struct vnodeopv_desc null_vnodeop_opv_desc =
 572         { &null_vnodeop_p, null_vnodeop_entries };