bsd/miscfs/nullfs/null_vnops.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  24 /*
  25  * Copyright (c) 1992, 1993
  26  *      The Regents of the University of California.  All rights reserved.
  27  *
  28  * This code is derived from software contributed to Berkeley by
  29  * John Heidemann of the UCLA Ficus project.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. All advertising materials mentioning features or use of this software
  40  *    must display the following acknowledgement:
  41  *      This product includes software developed by the University of
  42  *      California, Berkeley and its contributors.
  43  * 4. Neither the name of the University nor the names of its contributors
  44  *    may be used to endorse or promote products derived from this software
  45  *    without specific prior written permission.
  46  *
  47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  57  * SUCH DAMAGE.
  58  *
  59  *      @(#)null_vnops.c        8.6 (Berkeley) 5/27/95
  60  *
  61  * Ancestors:
  62  *      @(#)lofs_vnops.c        1.2 (Berkeley) 6/18/92
  63  *      ...and...
  64  *      @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
  65  */
  66
  67 /*
  68  * Null Layer
  69  *
  70  * (See mount_null(8) for more information.)
  71  *
  72  * The null layer duplicates a portion of the file system
  73  * name space under a new name.  In this respect, it is
  74  * similar to the loopback file system.  It differs from
  75  * the loopback fs in two respects:  it is implemented using
  76  * a stackable layers techniques, and it's "null-node"s stack above
  77  * all lower-layer vnodes, not just over directory vnodes.
  78  *
  79  * The null layer has two purposes.  First, it serves as a demonstration
  80  * of layering by proving a layer which does nothing.  (It actually
  81  * does everything the loopback file system does, which is slightly
  82  * more than nothing.)  Second, the null layer can serve as a prototype
  83  * layer.  Since it provides all necessary layer framework,
  84  * new file system layers can be created very easily be starting
  85  * with a null layer.
  86  *
  87  * The remainder of this man page examines the null layer as a basis
  88  * for constructing new layers.
  89  *
  90  *
  91  * INSTANTIATING NEW NULL LAYERS
  92  *
  93  * New null layers are created with mount_null(8).
  94  * Mount_null(8) takes two arguments, the pathname
  95  * of the lower vfs (target-pn) and the pathname where the null
  96  * layer will appear in the namespace (alias-pn).  After
  97  * the null layer is put into place, the contents
  98  * of target-pn subtree will be aliased under alias-pn.
  99  *
 100  *
 101  * OPERATION OF A NULL LAYER
 102  *
 103  * The null layer is the minimum file system layer,
 104  * simply bypassing all possible operations to the lower layer
 105  * for processing there.  The majority of its activity centers
 106  * on the bypass routine, though which nearly all vnode operations
 107  * pass.
 108  *
 109  * The bypass routine accepts arbitrary vnode operations for
 110  * handling by the lower layer.  It begins by examing vnode
 111  * operation arguments and replacing any null-nodes by their
 112  * lower-layer equivlants.  It then invokes the operation
 113  * on the lower layer.  Finally, it replaces the null-nodes
 114  * in the arguments and, if a vnode is return by the operation,
 115  * stacks a null-node on top of the returned vnode.
 116  *
 117  * Although bypass handles most operations, vnop_getattr, vnop_lock,
 118  * vnop_unlock, vnop_inactive, vnop_reclaim, and vnop_print are not
 119  * bypassed. Vop_getattr must change the fsid being returned.
 120  * Vop_lock and vnop_unlock must handle any locking for the
 121  * current vnode as well as pass the lock request down.
 122  * Vop_inactive and vnop_reclaim are not bypassed so that
 123  * they can handle freeing null-layer specific data. Vop_print
 124  * is not bypassed to avoid excessive debugging information.
 125  * Also, certain vnode operations change the locking state within
 126  * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
 127  * and symlink). Ideally these operations should not change the
 128  * lock state, but should be changed to let the caller of the
 129  * function unlock them. Otherwise all intermediate vnode layers
 130  * (such as union, umapfs, etc) must catch these functions to do
 131  * the necessary locking at their layer.
 132  *
 133  *
 134  * INSTANTIATING VNODE STACKS
 135  *
 136  * Mounting associates the null layer with a lower layer,
 137  * effect stacking two VFSes.  Vnode stacks are instead
 138  * created on demand as files are accessed.
 139  *
 140  * The initial mount creates a single vnode stack for the
 141  * root of the new null layer.  All other vnode stacks
 142  * are created as a result of vnode operations on
 143  * this or other null vnode stacks.
 144  *
 145  * New vnode stacks come into existance as a result of
 146  * an operation which returns a vnode.
 147  * The bypass routine stacks a null-node above the new
 148  * vnode before returning it to the caller.
 149  *
 150  * For example, imagine mounting a null layer with
 151  * "mount_null /usr/include /dev/layer/null".
 152  * Changing directory to /dev/layer/null will assign
 153  * the root null-node (which was created when the null layer was mounted).
 154  * Now consider opening "sys".  A vnop_lookup would be
 155  * done on the root null-node.  This operation would bypass through
 156  * to the lower layer which would return a vnode representing
 157  * the UFS "sys".  Null_bypass then builds a null-node
 158  * aliasing the UFS "sys" and returns this to the caller.
 159  * Later operations on the null-node "sys" will repeat this
 160  * process when constructing other vnode stacks.
 161  *
 162  *
 163  * CREATING OTHER FILE SYSTEM LAYERS
 164  *
 165  * One of the easiest ways to construct new file system layers is to make
 166  * a copy of the null layer, rename all files and variables, and
 167  * then begin modifing the copy.  Sed can be used to easily rename
 168  * all variables.
 169  *
 170  * The umap layer is an example of a layer descended from the
 171  * null layer.
 172  *
 173  *
 174  * INVOKING OPERATIONS ON LOWER LAYERS
 175  *
 176  * There are two techniques to invoke operations on a lower layer
 177  * when the operation cannot be completely bypassed.  Each method
 178  * is appropriate in different situations.  In both cases,
 179  * it is the responsibility of the aliasing layer to make
 180  * the operation arguments "correct" for the lower layer
 181  * by mapping an vnode arguments to the lower layer.
 182  *
 183  * The first approach is to call the aliasing layer's bypass routine.
 184  * This method is most suitable when you wish to invoke the operation
 185  * currently being hanldled on the lower layer.  It has the advantage
 186  * that the bypass routine already must do argument mapping.
 187  * An example of this is null_getattrs in the null layer.
 188  *
 189  * A second approach is to directly invoked vnode operations on
 190  * the lower layer with the VOP_OPERATIONNAME interface.
 191  * The advantage of this method is that it is easy to invoke
 192  * arbitrary operations on the lower layer.  The disadvantage
 193  * is that vnodes arguments must be manualy mapped.
 194  *
 195  */
 196
 197 #include <sys/param.h>
 198 #include <sys/systm.h>
 199 #include <sys/proc.h>
 200 #include <sys/kauth.h>
 201 #include <sys/time.h>
 202 #include <sys/types.h>
 203 #include <sys/vnode.h>
 204 #include <sys/mount_internal.h>
 205 #include <sys/namei.h>
 206 #include <sys/malloc.h>
 207 #include <sys/buf.h>
 208 #include <miscfs/nullfs/null.h>
 209
 210
 211 int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
 212
 213 /*
 214  * This is the 10-Apr-92 bypass routine.
 215  *    This version has been optimized for speed, throwing away some
 216  * safety checks.  It should still always work, but it's not as
 217  * robust to programmer errors.
 218  *    Define SAFETY to include some error checking code.
 219  *
 220  * In general, we map all vnodes going down and unmap them on the way back.
 221  * As an exception to this, vnodes can be marked "unmapped" by setting
 222  * the Nth bit in operation's vdesc_flags.
 223  *
 224  * Also, some BSD vnode operations have the side effect of node_put'ing
 225  * their arguments.  With stacking, the reference counts are held
 226  * by the upper node, not the lower one, so we must handle these
 227  * side-effects here.  This is not of concern in Sun-derived systems
 228  * since there are no such side-effects.
 229  *
 230  * This makes the following assumptions:
 231  * - only one returned vpp
 232  * - no INOUT vpp's (Sun's vnop_open has one of these)
 233  * - the vnode operation vector of the first vnode should be used
 234  *   to determine what implementation of the op should be invoked
 235  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
 236  *   problems on rmdir'ing mount points and renaming?)
 237  */
 238 int
 239 null_bypass(ap)
 240         struct vnop_generic_args /* {
 241                 struct vnodeop_desc *a_desc;
 242                 <other random data follows, presumably>
 243         } */ *ap;
 244 {
 245         extern int (**null_vnodeop_p)(void *);  /* not extern, really "forward" */
 246         register struct vnode **this_vp_p;
 247         int error;
 248         struct vnode *old_vps[VDESC_MAX_VPS];
 249         struct vnode **vps_p[VDESC_MAX_VPS];
 250         struct vnode ***vppp;
 251         struct vnodeop_desc *descp = ap->a_desc;
 252         int reles, i;
 253
 254         if (null_bug_bypass)
 255                 printf ("null_bypass: %s\n", descp->vdesc_name);
 256
 257 #ifdef SAFETY
 258         /*
 259          * We require at least one vp.
 260          */
 261         if (descp->vdesc_vp_offsets == NULL ||
 262             descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
 263                 panic ("null_bypass: no vp's in map.\n");
 264 #endif
 265
 266         /*
 267          * Map the vnodes going in.
 268          * Later, we'll invoke the operation based on
 269          * the first mapped vnode's operation vector.
 270          */
 271         reles = descp->vdesc_flags;
 272         for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
 273                 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
 274                         break;   /* bail out at end of list */
 275                 vps_p[i] = this_vp_p =
 276                         VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
 277                 /*
 278                  * We're not guaranteed that any but the first vnode
 279                  * are of our type.  Check for and don't map any
 280                  * that aren't.  (We must always map first vp or vclean fails.)
 281                  */
 282                 if (i && (*this_vp_p == NULL ||
 283                     (*this_vp_p)->v_op != null_vnodeop_p)) {
 284                         old_vps[i] = NULL;
 285                 } else {
 286                         old_vps[i] = *this_vp_p;
 287                         *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
 288                         /*
 289                          * XXX - Several operations have the side effect
 290                          * of vnode_put'ing their vp's.  We must account for
 291                          * that.  (This should go away in the future.)
 292                          */
 293                         if (reles & 1)
 294                                 vnode_get(*this_vp_p);
 295                 }
 296
 297         }
 298
 299         /*
 300          * Call the operation on the lower layer
 301          * with the modified argument structure.
 302          */
 303         error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
 304
 305         /*
 306          * Maintain the illusion of call-by-value
 307          * by restoring vnodes in the argument structure
 308          * to their original value.
 309          */
 310         reles = descp->vdesc_flags;
 311         for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
 312                 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
 313                         break;   /* bail out at end of list */
 314                 if (old_vps[i]) {
 315                         *(vps_p[i]) = old_vps[i];
 316                         if (reles & 1)
 317                                 vnode_put(*(vps_p[i]));
 318                 }
 319         }
 320
 321         /*
 322          * Map the possible out-going vpp
 323          * (Assumes that the lower layer always returns
 324          * a vnode_get'ed vpp unless it gets an error.)
 325          */
 326         if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
 327             !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
 328             !error) {
 329                 /*
 330                  * XXX - even though some ops have vpp returned vp's,
 331                  * several ops actually vnode_put this before returning.
 332                  * We must avoid these ops.
 333                  * (This should go away when these ops are regularized.)
 334                  */
 335                 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
 336                         goto out;
 337                 vppp = VOPARG_OFFSETTO(struct vnode***,
 338                                  descp->vdesc_vpp_offset,ap);
 339                 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
 340         }
 341
 342  out:
 343         return (error);
 344 }
 345
 346 /*
 347  * We have to carry on the locking protocol on the null layer vnodes
 348  * as we progress through the tree. We also have to enforce read-only
 349  * if this layer is mounted read-only.
 350  */
 351 null_lookup(ap)
 352         struct vnop_lookup_args /* {
 353                 struct vnode * a_dvp;
 354                 struct vnode ** a_vpp;
 355                 struct componentname * a_cnp;
 356                 vfs_context_t a_context;
 357         } */ *ap;
 358 {
 359         struct componentname *cnp = ap->a_cnp;
 360         struct proc *p = cnp->cn_proc;
 361         int flags = cnp->cn_flags;
 362         struct vnode *dvp, *vp;
 363         int error;
 364
 365         error = null_bypass(ap);
 366
 367         /*
 368          * We must do the same locking and unlocking at this layer as
 369          * is done in the layers below us. We could figure this out
 370          * based on the error return and the LASTCN, LOCKPARENT, and
 371          * LOCKLEAF flags. However, it is more expidient to just find
 372          * out the state of the lower level vnodes and set ours to the
 373          * same state.
 374          */
 375         dvp = ap->a_dvp;
 376         vp = *ap->a_vpp;
 377         if (dvp == vp)
 378                 return (error);
 379         return (error);
 380 }
 381
 382 /*
 383  * Setattr call.
 384  */
 385 int
 386 null_setattr(
 387         struct vnop_setattr_args /* {
 388                 struct vnodeop_desc *a_desc;
 389                 struct vnode *a_vp;
 390                 struct vnode_attr *a_vap;
 391                 kauth_cred_t a_cred;
 392                 struct proc *a_p;
 393         } */ *ap)
 394 {
 395         struct vnode *vp = ap->a_vp;
 396         struct vnode_attr *vap = ap->a_vap;
 397
 398         if (VATTR_IS_ACTIVE(vap, va_data_size)) {
 399                 switch (vp->v_type) {
 400                 case VDIR:
 401                         return (EISDIR);
 402                 case VCHR:
 403                 case VBLK:
 404                 case VSOCK:
 405                 case VFIFO:
 406                         return (0);
 407                 case VREG:
 408                 case VLNK:
 409                 default:
 410                 }
 411         }
 412         return (null_bypass(ap));
 413 }
 414
 415 /*
 416  *  We handle getattr only to change the fsid.
 417  */
 418 int
 419 null_getattr(ap)
 420         struct vnop_getattr_args /* {
 421                 struct vnode *a_vp;
 422                 struct vnode_attr *a_vap;
 423                 vfs_context_t a_context;
 424         } */ *ap;
 425 {
 426         int error;
 427
 428         if (error = null_bypass(ap))
 429                 return (error);
 430         /* Requires that arguments be restored. */
 431         VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]);
 432         return (0);
 433 }
 434
 435 int
 436 null_access(ap)
 437         struct vnop_access_args /* {
 438                 struct vnode *a_vp;
 439                 int  a_action;
 440                 vfs_context_t a_context;
 441         } */ *ap;
 442 {
 443         return (null_bypass(ap));
 444 }
 445
 446 int
 447 null_inactive(ap)
 448         struct vnop_inactive_args /* {
 449                 struct vnode *a_vp;
 450                 vfs_context_t a_context;
 451         } */ *ap;
 452 {
 453         /*
 454          * Do nothing (and _don't_ bypass).
 455          * Wait to vnode_put lowervp until reclaim,
 456          * so that until then our null_node is in the
 457          * cache and reusable.
 458          *
 459          * NEEDSWORK: Someday, consider inactive'ing
 460          * the lowervp and then trying to reactivate it
 461          * with capabilities (v_id)
 462          * like they do in the name lookup cache code.
 463          * That's too much work for now.
 464          */
 465         return (0);
 466 }
 467
 468 int
 469 null_reclaim(ap)
 470         struct vnop_reclaim_args /* {
 471                 struct vnode *a_vp;
 472                 vfs_context_t a_context;
 473         } */ *ap;
 474 {
 475         struct vnode *vp = ap->a_vp;
 476         struct null_node *xp = VTONULL(vp);
 477         struct vnode *lowervp = xp->null_lowervp;
 478
 479         /*
 480          * Note: in vnop_reclaim, vp->v_op == dead_vnodeop_p,
 481          * so we can't call VOPs on ourself.
 482          */
 483         /* After this assignment, this node will not be re-used. */
 484         xp->null_lowervp = NULL;
 485         LIST_REMOVE(xp, null_hash);
 486         FREE(vp->v_data, M_TEMP);
 487         vp->v_data = NULL;
 488         vnode_put (lowervp);
 489         return (0);
 490 }
 491
 492 /*
 493  * XXX - vnop_strategy must be hand coded because it has no
 494  * vnode in its arguments.
 495  * This goes away with a merged VM/buffer cache.
 496  */
 497 int
 498 null_strategy(ap)
 499         struct vnop_strategy_args /* {
 500                 struct buf *a_bp;
 501         } */ *ap;
 502 {
 503         struct buf *bp = ap->a_bp;
 504         int error;
 505         struct vnode *savedvp;
 506
 507         savedvp = vnode(bp);
 508         buf_setvnode(bp, NULLVPTOLOWERVP(savedvp));
 509
 510         error = VNOP_STRATEGY(bp);
 511
 512         buf_setvnode(bp, savedvp);
 513
 514         return (error);
 515 }
 516
 517 /*
 518  * XXX - like vnop_strategy, vnop_bwrite must be hand coded because it has no
 519  * vnode in its arguments.
 520  * This goes away with a merged VM/buffer cache.
 521  */
 522 int
 523 null_bwrite(ap)
 524         struct vnop_bwrite_args /* {
 525                 struct buf *a_bp;
 526         } */ *ap;
 527 {
 528         struct buf *bp = ap->a_bp;
 529         int error;
 530         struct vnode *savedvp;
 531
 532         savedvp = buf_vnode(bp);
 533         buf_setvnode(bp, NULLVPTOLOWERVP(savedvp));
 534
 535         error = VNOP_BWRITE(bp);
 536
 537         buf_setvnode(bp, savedvp);
 538
 539         return (error);
 540 }
 541
 542 /*
 543  * Global vfs data structures
 544  */
 545
 546 #define VOPFUNC int (*)(void *)
 547
 548 int (**null_vnodeop_p)(void *);
 549 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
 550         { &vnop_default_desc, (VOPFUNC)null_bypass },
 551
 552         { &vnop_lookup_desc, (VOPFUNC)null_lookup },
 553         { &vnop_setattr_desc, (VOPFUNC)null_setattr },
 554         { &vnop_getattr_desc, (VOPFUNC)null_getattr },
 555         { &vnop_access_desc, (VOPFUNC)null_access },
 556         { &vnop_inactive_desc, (VOPFUNC)null_inactive },
 557         { &vnop_reclaim_desc, (VOPFUNC)null_reclaim },
 558
 559         { &vnop_strategy_desc, (VOPFUNC)null_strategy },
 560         { &vnop_bwrite_desc, (VOPFUNC)null_bwrite },
 561
 562         { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 563 };
 564 struct vnodeopv_desc null_vnodeop_opv_desc =
 565         { &null_vnodeop_p, null_vnodeop_entries };