bsd/miscfs/union/union_subr.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1994 Jan-Simon Pendry
  31  * Copyright (c) 1994
  32  *      The Regents of the University of California.  All rights reserved.
  33  *
  34  * This code is derived from software contributed to Berkeley by
  35  * Jan-Simon Pendry.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)union_subr.c        8.20 (Berkeley) 5/20/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/proc_internal.h>
  77 #include <sys/kauth.h>
  78 #include <sys/time.h>
  79 #include <sys/kernel.h>
  80 #include <sys/vnode_internal.h>
  81 #include <sys/namei.h>
  82 #include <sys/malloc.h>
  83 #include <sys/file_internal.h>
  84 #include <sys/filedesc.h>
  85 #include <sys/queue.h>
  86 #include <sys/mount_internal.h>
  87 #include <sys/stat.h>
  88 #include <sys/ubc.h>
  89 #include <sys/uio_internal.h>
  90 #include <miscfs/union/union.h>
  91 #include <sys/lock.h>
  92 #include <sys/kdebug.h>
  93 #if CONFIG_MACF
  94 #include <security/mac_framework.h>
  95 #endif
  96
  97
  98 static int union_vn_close(struct vnode *vp, int fmode, vfs_context_t ctx);
  99
 100 /* must be power of two, otherwise change UNION_HASH() */
 101 #define NHASH 32
 102
 103 /* unsigned int ... */
 104 #define UNION_HASH(u, l) \
 105         (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
 106
 107 static LIST_HEAD(unhead, union_node) unhead[NHASH];
 108 static int unvplock[NHASH];
 109
 110 static lck_grp_t * union_lck_grp;
 111 static lck_grp_attr_t * union_lck_grp_attr;
 112 static lck_attr_t * union_lck_attr;
 113 static lck_mtx_t *  union_mtxp;
 114
 115 static int union_dircheck(struct vnode **, struct fileproc *, vfs_context_t ctx);
 116 static void union_newlower(struct union_node *, struct vnode *);
 117 static void union_newupper(struct union_node *, struct vnode *);
 118
 119
 120 int
 121 union_init(__unused struct vfsconf *vfsp)
 122 {
 123         int i;
 124
 125         union_lck_grp_attr= lck_grp_attr_alloc_init();
 126 #if DIAGNOSTIC
 127         lck_grp_attr_setstat(union_lck_grp_attr);
 128 #endif
 129         union_lck_grp = lck_grp_alloc_init("union",  union_lck_grp_attr);
 130         union_lck_attr = lck_attr_alloc_init();
 131 #if DIAGNOSTIC
 132         lck_attr_setdebug(union_lck_attr);
 133 #endif
 134         union_mtxp = lck_mtx_alloc_init(union_lck_grp, union_lck_attr);
 135
 136         for (i = 0; i < NHASH; i++)
 137                 LIST_INIT(&unhead[i]);
 138         bzero((caddr_t) unvplock, sizeof(unvplock));
 139         /* add the hook for getdirentries */
 140         union_dircheckp = union_dircheck;
 141
 142         return (0);
 143 }
 144
 145 void
 146 union_lock()
 147 {
 148         lck_mtx_lock(union_mtxp);
 149 }
 150
 151 void
 152 union_unlock()
 153 {
 154         lck_mtx_unlock(union_mtxp);
 155 }
 156
 157
 158 static int
 159 union_list_lock(int ix)
 160 {
 161
 162         if (unvplock[ix] & UNVP_LOCKED) {
 163                 unvplock[ix] |= UNVP_WANT;
 164                 msleep((caddr_t) &unvplock[ix], union_mtxp, PINOD, "union_list_lock", NULL);
 165                 return (1);
 166         }
 167
 168         unvplock[ix] |= UNVP_LOCKED;
 169
 170         return (0);
 171 }
 172
 173 static void
 174 union_list_unlock(int ix)
 175 {
 176
 177         unvplock[ix] &= ~UNVP_LOCKED;
 178
 179         if (unvplock[ix] & UNVP_WANT) {
 180                 unvplock[ix] &= ~UNVP_WANT;
 181                 wakeup((caddr_t) &unvplock[ix]);
 182         }
 183 }
 184
 185 /*
 186  *      union_updatevp:
 187  *
 188  *      The uppervp, if not NULL, must be referenced and not locked by us
 189  *      The lowervp, if not NULL, must be referenced.
 190  *
 191  *      If uppervp and lowervp match pointers already installed, then
 192  *      nothing happens. The passed vp's (when matching) are not adjusted.
 193  *
 194  *      This routine may only be called by union_newupper() and
 195  *      union_newlower().
 196  */
 197
 198 /* always called with union lock held */
 199 void
 200 union_updatevp(struct union_node *un, struct vnode *uppervp,
 201                 struct vnode *lowervp)
 202 {
 203         int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
 204         int nhash = UNION_HASH(uppervp, lowervp);
 205         int docache = (lowervp != NULLVP || uppervp != NULLVP);
 206         int lhash, uhash;
 207         vnode_t freevp;
 208         vnode_t freedirvp;
 209         caddr_t freepath;
 210
 211         /*
 212          * Ensure locking is ordered from lower to higher
 213          * to avoid deadlocks.
 214          */
 215         if (nhash < ohash) {
 216                 lhash = nhash;
 217                 uhash = ohash;
 218         } else {
 219                 lhash = ohash;
 220                 uhash = nhash;
 221         }
 222
 223         if (lhash != uhash) {
 224                 while (union_list_lock(lhash))
 225                         continue;
 226         }
 227
 228         while (union_list_lock(uhash))
 229                 continue;
 230
 231         if (ohash != nhash || !docache) {
 232                 if (un->un_flags & UN_CACHED) {
 233                         un->un_flags &= ~UN_CACHED;
 234                         LIST_REMOVE(un, un_cache);
 235                 }
 236         }
 237
 238         if (ohash != nhash)
 239                 union_list_unlock(ohash);
 240
 241         if (un->un_lowervp != lowervp) {
 242                 freevp = freedirvp = NULLVP;
 243                 freepath = (caddr_t)0;
 244                 if (un->un_lowervp) {
 245                         freevp = un->un_lowervp;
 246                         un->un_lowervp = lowervp;
 247                         if (un->un_path) {
 248                                 freepath = un->un_path;
 249                                 un->un_path = 0;
 250                         }
 251                         if (un->un_dirvp) {
 252                                 freedirvp = un->un_dirvp;
 253                                 un->un_dirvp = NULLVP;
 254                         }
 255                         union_unlock();
 256                         if (freevp)
 257                                 vnode_put(freevp);
 258                         if (freedirvp)
 259                                 vnode_put(freedirvp);
 260                         if (freepath)
 261                                 _FREE(un->un_path, M_TEMP);
 262                         union_lock();
 263                 } else
 264                         un->un_lowervp = lowervp;
 265                 if (lowervp != NULLVP)
 266                         un->un_lowervid = vnode_vid(lowervp);
 267                 un->un_lowersz = VNOVAL;
 268         }
 269
 270         if (un->un_uppervp != uppervp) {
 271                 freevp = NULLVP;
 272                 if (un->un_uppervp) {
 273                         freevp = un->un_uppervp;
 274                 }
 275                 un->un_uppervp = uppervp;
 276                 if (uppervp != NULLVP)
 277                         un->un_uppervid = vnode_vid(uppervp);
 278                 un->un_uppersz = VNOVAL;
 279                 union_unlock();
 280                 if (freevp)
 281                         vnode_put(freevp);
 282                 union_lock();
 283         }
 284
 285         if (docache && (ohash != nhash)) {
 286                 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
 287                 un->un_flags |= UN_CACHED;
 288         }
 289
 290         union_list_unlock(nhash);
 291 }
 292
 293 /*
 294  * Set a new lowervp.  The passed lowervp must be referenced and will be
 295  * stored in the vp in a referenced state.
 296  */
 297 /* always called with union lock held */
 298
 299 static void
 300 union_newlower(un, lowervp)
 301         struct union_node *un;
 302         struct vnode *lowervp;
 303 {
 304         union_updatevp(un, un->un_uppervp, lowervp);
 305 }
 306
 307 /*
 308  * Set a new uppervp.  The passed uppervp must be locked and will be
 309  * stored in the vp in a locked state.  The caller should not unlock
 310  * uppervp.
 311  */
 312
 313 /* always called with union lock held */
 314 static void
 315 union_newupper(un, uppervp)
 316         struct union_node *un;
 317         struct vnode *uppervp;
 318 {
 319         union_updatevp(un, uppervp, un->un_lowervp);
 320 }
 321
 322 /*
 323  * Keep track of size changes in the underlying vnodes.
 324  * If the size changes, then callback to the vm layer
 325  * giving priority to the upper layer size.
 326  */
 327 /* always called with union lock held */
 328 void
 329 union_newsize(vp, uppersz, lowersz)
 330         struct vnode *vp;
 331         off_t uppersz, lowersz;
 332 {
 333         struct union_node *un;
 334         off_t sz;
 335
 336         /* only interested in regular files */
 337         if (vp->v_type != VREG)
 338                 return;
 339
 340         un = VTOUNION(vp);
 341         sz = VNOVAL;
 342
 343         if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
 344                 un->un_uppersz = uppersz;
 345                 if (sz == VNOVAL)
 346                         sz = un->un_uppersz;
 347         }
 348
 349         if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
 350                 un->un_lowersz = lowersz;
 351                 if (sz == VNOVAL)
 352                         sz = un->un_lowersz;
 353         }
 354
 355         if (sz != VNOVAL) {
 356 #ifdef UNION_DIAGNOSTIC
 357                 printf("union: %s size now %ld\n",
 358                         uppersz != VNOVAL ? "upper" : "lower", (long) sz);
 359 #endif
 360                 union_unlock();
 361                 ubc_setsize(vp, sz);
 362                 union_lock();
 363         }
 364 }
 365
 366 /*
 367  *      union_allocvp:  allocate a union_node and associate it with a
 368  *                      parent union_node and one or two vnodes.
 369  *
 370  *      vpp     Holds the returned vnode locked and referenced if no
 371  *              error occurs.
 372  *
 373  *      mp      Holds the mount point.  mp may or may not be busied.
 374  *              allocvp() makes no changes to mp.
 375  *
 376  *      dvp     Holds the parent union_node to the one we wish to create.
 377  *              XXX may only be used to traverse an uncopied lowervp-based
 378  *              tree?  XXX
 379  *
 380  *              dvp may or may not be locked.  allocvp() makes no changes
 381  *              to dvp.
 382  *
 383  *      upperdvp Holds the parent vnode to uppervp, generally used along
 384  *              with path component information to create a shadow of
 385  *              lowervp when uppervp does not exist.
 386  *
 387  *              upperdvp is referenced but unlocked on entry, and will be
 388  *              dereferenced on return.
 389  *
 390  *      uppervp Holds the new uppervp vnode to be stored in the
 391  *              union_node we are allocating.  uppervp is referenced but
 392  *              not locked, and will be dereferenced on return.
 393  *
 394  *      lowervp Holds the new lowervp vnode to be stored in the
 395  *              union_node we are allocating.  lowervp is referenced but
 396  *              not locked, and will be dereferenced on return.
 397  *
 398  *      cnp     Holds path component information to be coupled with
 399  *              lowervp and upperdvp to allow unionfs to create an uppervp
 400  *              later on.  Only used if lowervp is valid.  The contents
 401  *              of cnp is only valid for the duration of the call.
 402  *
 403  *      docache Determine whether this node should be entered in the
 404  *              cache or whether it should be destroyed as soon as possible.
 405  *
 406  * All union_nodes are maintained on a singly-linked
 407  * list.  New nodes are only allocated when they cannot
 408  * be found on this list.  Entries on the list are
 409  * removed when the vfs reclaim entry is called.
 410  *
 411  * A single lock is kept for the entire list.  This is
 412  * needed because the getnewvnode() function can block
 413  * waiting for a vnode to become free, in which case there
 414  * may be more than one process trying to get the same
 415  * vnode.  This lock is only taken if we are going to
 416  * call getnewvnode(), since the kernel itself is single-threaded.
 417  *
 418  * If an entry is found on the list, then call vget() to
 419  * take a reference.  This is done because there may be
 420  * zero references to it and so it needs to removed from
 421  * the vnode free list.
 422  */
 423
 424 /* always called with union lock held */
 425
 426 int
 427 union_allocvp(struct vnode **vpp,
 428         struct mount *mp,
 429         struct vnode *undvp,
 430         struct vnode *dvp,
 431         struct componentname *cnp,
 432         struct vnode *uppervp,
 433         struct vnode *lowervp,
 434         int docache)
 435 {
 436         int error;
 437         struct union_node *un = NULL;
 438         struct union_node *unp;
 439         struct vnode *xlowervp = NULLVP;
 440         struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
 441         int hash = 0;           /* protected by docache */
 442         int markroot;
 443         int try;
 444         struct vnode_fsparam vfsp;
 445         enum vtype vtype;
 446
 447         if (uppervp == NULLVP && lowervp == NULLVP)
 448                 panic("union: unidentifiable allocation");
 449
 450         /*
 451          * if both upper and lower vp are provided and are off different type
 452          * consider lowervp as NULL
 453          */
 454         if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
 455                 xlowervp = lowervp;
 456                 lowervp = NULLVP;
 457         }
 458
 459         /* detect the root vnode (and aliases) */
 460         markroot = 0;
 461         if ((uppervp == um->um_uppervp) &&
 462             ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
 463                 if (lowervp == NULLVP) {
 464                         lowervp = um->um_lowervp;
 465                         if (lowervp != NULLVP) {
 466                                 union_unlock();
 467                                 vnode_get(lowervp);
 468                                 union_lock();
 469                         }
 470                 }
 471                 markroot = VROOT;
 472         }
 473
 474 loop:
 475         if (!docache) {
 476                 un = NULL;
 477         } else for (try = 0; try < 3; try++) {
 478                 switch (try) {
 479                 case 0:
 480                         if (lowervp == NULLVP)
 481                                 continue;
 482                         hash = UNION_HASH(uppervp, lowervp);
 483                         break;
 484
 485                 case 1:
 486                         if (uppervp == NULLVP)
 487                                 continue;
 488                         hash = UNION_HASH(uppervp, NULLVP);
 489                         break;
 490
 491                 case 2:
 492                         if (lowervp == NULLVP)
 493                                 continue;
 494                         /* Not sure how this path gets exercised ? */
 495                         hash = UNION_HASH(NULLVP, lowervp);
 496                         break;
 497                 }
 498
 499                 while (union_list_lock(hash))
 500                         continue;
 501
 502                 for (un = unhead[hash].lh_first; un != 0;
 503                                         un = un->un_cache.le_next) {
 504                         if ((un->un_lowervp == lowervp ||
 505                              un->un_lowervp == NULLVP) &&
 506                             (un->un_uppervp == uppervp ||
 507                              un->un_uppervp == NULLVP) &&
 508                             (un->un_mount == mp)) {
 509                                 break;
 510                         }
 511                 }
 512
 513                 union_list_unlock(hash);
 514
 515                 if (un)
 516                         break;
 517         }
 518
 519         if (un) {
 520                 /*
 521                  * Obtain a lock on the union_node.
 522                  * uppervp is locked, though un->un_uppervp
 523                  * may not be.  this doesn't break the locking
 524                  * hierarchy since in the case that un->un_uppervp
 525                  * is not yet locked it will be vnode_put'd and replaced
 526                  * with uppervp.
 527                  */
 528
 529                 if (un->un_flags & UN_LOCKED) {
 530                         un->un_flags |= UN_WANT;
 531                         msleep((caddr_t) &un->un_flags, union_mtxp, PINOD, "union node locked", 0);
 532                         goto loop;
 533                 }
 534                 un->un_flags |= UN_LOCKED;
 535
 536                 union_unlock();
 537                 if (UNIONTOV(un) == NULLVP)
 538                         panic("null vnode in union node\n");
 539                 if (vnode_get(UNIONTOV(un))) {
 540                         union_lock();
 541                         un->un_flags &= ~UN_LOCKED;
 542                         if ((un->un_flags & UN_WANT) == UN_WANT) {
 543                                 un->un_flags &=  ~UN_LOCKED;
 544                                 wakeup(&un->un_flags);
 545                         }
 546                         goto loop;
 547                 }
 548                 union_lock();
 549
 550                 /*
 551                  * At this point, the union_node is locked,
 552                  * un->un_uppervp may not be locked, and uppervp
 553                  * is locked or nil.
 554                  */
 555
 556                 /*
 557                  * Save information about the upper layer.
 558                  */
 559                 if (uppervp != un->un_uppervp) {
 560                         union_newupper(un, uppervp);
 561                 } else if (uppervp) {
 562                         union_unlock();
 563                         vnode_put(uppervp);
 564                         union_lock();
 565                 }
 566
 567                 /*
 568                  * Save information about the lower layer.
 569                  * This needs to keep track of pathname
 570                  * and directory information which union_vn_create
 571                  * might need.
 572                  */
 573                 if (lowervp != un->un_lowervp) {
 574                         union_newlower(un, lowervp);
 575                         if (cnp && (lowervp != NULLVP)) {
 576                                 un->un_hash = cnp->cn_hash;
 577                                 union_unlock();
 578                                 MALLOC(un->un_path, caddr_t, cnp->cn_namelen+1,
 579                                                 M_TEMP, M_WAITOK);
 580                                 bcopy(cnp->cn_nameptr, un->un_path,
 581                                                 cnp->cn_namelen);
 582                                 vnode_get(dvp);
 583                                 union_lock();
 584                                 un->un_path[cnp->cn_namelen] = '\0';
 585                                 un->un_dirvp = dvp;
 586                         }
 587                 } else if (lowervp) {
 588                         union_unlock();
 589                         vnode_put(lowervp);
 590                         union_lock();
 591                 }
 592                 *vpp = UNIONTOV(un);
 593                 un->un_flags &= ~UN_LOCKED;
 594                 if ((un->un_flags & UN_WANT) == UN_WANT) {
 595                         un->un_flags &= ~UN_WANT;
 596                         wakeup(&un->un_flags);
 597                 }
 598                 return (0);
 599         }
 600
 601         if (docache) {
 602                 /*
 603                  * otherwise lock the vp list while we call getnewvnode
 604                  * since that can block.
 605                  */
 606                 hash = UNION_HASH(uppervp, lowervp);
 607
 608                 if (union_list_lock(hash))
 609                         goto loop;
 610         }
 611
 612         union_unlock();
 613         MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK);
 614         union_lock();
 615
 616         bzero(unp, sizeof(struct union_node));
 617         un = unp;
 618         un->un_uppervp = uppervp;
 619         if (uppervp != NULLVP)
 620                 un->un_uppervid = vnode_vid(uppervp);
 621         un->un_uppersz = VNOVAL;
 622         un->un_lowervp = lowervp;
 623         if (lowervp != NULLVP)
 624                 un->un_lowervid = vnode_vid(lowervp);
 625         un->un_lowersz = VNOVAL;
 626         un->un_pvp = undvp;
 627         if (undvp != NULLVP)
 628                 vnode_get(undvp);
 629         un->un_dircache = 0;
 630         un->un_openl = 0;
 631         un->un_mount = mp;
 632         un->un_flags = UN_LOCKED;
 633 #ifdef FAULTFS
 634         if (UNION_FAULTIN(um))
 635                 un->un_flags |= UN_FAULTFS;
 636 #endif
 637
 638         if (docache) {
 639                 /* Insert with lock held */
 640                 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
 641                 un->un_flags |= UN_CACHED;
 642                 union_list_unlock(hash);
 643         }
 644
 645         union_unlock();
 646
 647         if (uppervp)
 648                 vtype = uppervp->v_type;
 649         else
 650                 vtype = lowervp->v_type;
 651
 652         bzero(&vfsp, sizeof(struct vnode_fsparam));
 653         vfsp.vnfs_mp = mp;
 654         vfsp.vnfs_vtype = vtype;
 655         vfsp.vnfs_str = "unionfs";
 656         vfsp.vnfs_dvp = undvp;
 657         vfsp.vnfs_fsnode = unp;
 658         vfsp.vnfs_cnp = cnp;
 659         vfsp.vnfs_vops = union_vnodeop_p;
 660         vfsp.vnfs_rdev = 0;
 661         vfsp.vnfs_filesize = 0;
 662         vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
 663         vfsp.vnfs_marksystem = 0;
 664         vfsp.vnfs_markroot = markroot;
 665
 666         error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp);
 667         if (error) {
 668                 /*  XXXXX Is this right ????  XXXXXXX */
 669                 if (uppervp) {
 670                         vnode_put(uppervp);
 671                 }
 672                 if (lowervp)
 673                         vnode_put(lowervp);
 674
 675                 union_lock();
 676                 if (un->un_flags & UN_CACHED) {
 677                         un->un_flags &= ~UN_CACHED;
 678                         LIST_REMOVE(un, un_cache);
 679                 }
 680                 if (docache)
 681                         union_list_unlock(hash);
 682
 683                 FREE(unp, M_TEMP);
 684
 685                 return (error);
 686         }
 687
 688         if (cnp && (lowervp != NULLVP)) {
 689                 un->un_hash = cnp->cn_hash;
 690                 un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
 691                 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
 692                 un->un_path[cnp->cn_namelen] = '\0';
 693                 vnode_get(dvp);
 694                 un->un_dirvp = dvp;
 695         } else {
 696                 un->un_hash = 0;
 697                 un->un_path = 0;
 698                 un->un_dirvp = 0;
 699         }
 700
 701         if (xlowervp)
 702                 vnode_put(xlowervp);
 703
 704         union_lock();
 705
 706         vnode_settag(*vpp, VT_UNION);
 707         un->un_vnode = *vpp;
 708         if (un->un_vnode->v_type == VDIR) {
 709                 if (un->un_uppervp == NULLVP) {
 710                         panic("faulting fs and no upper vp for dir?");
 711                 }
 712
 713         }
 714
 715
 716         un->un_flags &= ~UN_LOCKED;
 717         if ((un->un_flags & UN_WANT) == UN_WANT) {
 718                 un->un_flags &=  ~UN_WANT;
 719                 wakeup(&un->un_flags);
 720         }
 721
 722         return(error);
 723
 724 }
 725
 726 /* always called with union lock held */
 727 int
 728 union_freevp(struct vnode *vp)
 729 {
 730         struct union_node *un = VTOUNION(vp);
 731
 732         if (un->un_flags & UN_CACHED) {
 733                 un->un_flags &= ~UN_CACHED;
 734                 LIST_REMOVE(un, un_cache);
 735         }
 736
 737         union_unlock();
 738         if (un->un_pvp != NULLVP)
 739                 vnode_put(un->un_pvp);
 740         if (un->un_uppervp != NULLVP)
 741                 vnode_put(un->un_uppervp);
 742         if (un->un_lowervp != NULLVP)
 743                 vnode_put(un->un_lowervp);
 744         if (un->un_dirvp != NULLVP)
 745                 vnode_put(un->un_dirvp);
 746         if (un->un_path)
 747                 _FREE(un->un_path, M_TEMP);
 748
 749         FREE(vp->v_data, M_TEMP);
 750         vp->v_data = 0;
 751         union_lock();
 752
 753         return (0);
 754 }
 755
 756 /*
 757  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
 758  * using a sequence of reads and writes.  both (fvp)
 759  * and (tvp) are locked on entry and exit.
 760  */
 761 /* called with no union lock held */
 762 int
 763 union_copyfile(struct vnode *fvp, struct vnode *tvp, vfs_context_t context)
 764 {
 765         char *bufp;
 766         struct uio uio;
 767         struct iovec_32 iov;
 768         int error = 0;
 769
 770         /*
 771          * strategy:
 772          * allocate a buffer of size MAXPHYSIO.
 773          * loop doing reads and writes, keeping track
 774          * of the current uio offset.
 775          * give up at the first sign of trouble.
 776          */
 777
 778
 779 #if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
 780         uio.uio_segflg = UIO_SYSSPACE;
 781 #else
 782         uio.uio_segflg = UIO_SYSSPACE32;
 783 #endif
 784         uio.uio_offset = 0;
 785
 786         bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK);
 787
 788         /* ugly loop follows... */
 789         do {
 790                 off_t offset = uio.uio_offset;
 791
 792                 uio.uio_iovs.iov32p = &iov;
 793                 uio.uio_iovcnt = 1;
 794                 iov.iov_base = (uintptr_t)bufp;
 795                 iov.iov_len = MAXPHYSIO;
 796                 uio_setresid(&uio, iov.iov_len);
 797                 uio.uio_rw = UIO_READ;
 798                 error = VNOP_READ(fvp, &uio, 0, context);
 799
 800                 if (error == 0) {
 801                         uio.uio_iovs.iov32p = &iov;
 802                         uio.uio_iovcnt = 1;
 803                         iov.iov_base = (uintptr_t)bufp;
 804                         iov.iov_len = MAXPHYSIO - uio_resid(&uio);
 805                         uio.uio_offset = offset;
 806                         uio.uio_rw = UIO_WRITE;
 807                         uio_setresid(&uio, iov.iov_len);
 808
 809                         if (uio_resid(&uio) == 0)
 810                                 break;
 811
 812                         do {
 813                                 error = VNOP_WRITE(tvp, &uio, 0, context);
 814                         } while ((uio_resid(&uio) > 0) && (error == 0));
 815                 }
 816
 817         } while (error == 0);
 818
 819         _FREE(bufp, M_TEMP);
 820         return (error);
 821 }
 822
 823 /*
 824  * (un) is assumed to be locked on entry and remains
 825  * locked on exit.
 826  */
 827 /* always called with union lock held */
 828 int
 829 union_copyup(struct union_node *un, int docopy, vfs_context_t context)
 830 {
 831         int error;
 832         struct vnode *lvp, *uvp;
 833         struct vnode_attr vattr;
 834         mode_t  cmode = 0;
 835
 836
 837         lvp = un->un_lowervp;
 838
 839         union_unlock();
 840
 841         if (UNNODE_FAULTIN(un)) {
 842                 /* Need to inherit exec mode in faulting fs */
 843                 VATTR_INIT(&vattr);
 844                 VATTR_WANTED(&vattr, va_flags);
 845                 if (vnode_getattr(lvp, &vattr, context) == 0 )
 846                         cmode = vattr.va_mode;
 847
 848         }
 849         error = union_vn_create(&uvp, un, cmode, context);
 850         if (error) {
 851                 union_lock();
 852                 if (error == EEXIST) {
 853                         if (uvp != NULLVP) {
 854                                 union_newupper(un, uvp);
 855                                 error = 0;
 856                         }
 857                 }
 858                 return (error);
 859         }
 860
 861         union_lock();
 862         /* at this point, uppervp is locked */
 863         union_newupper(un, uvp);
 864         union_unlock();
 865
 866
 867         if (docopy) {
 868                 /*
 869                  * XX - should not ignore errors
 870                  * from vnop_close
 871                  */
 872                 error = VNOP_OPEN(lvp, FREAD, context);
 873                 if (error == 0) {
 874                         error = union_copyfile(lvp, uvp, context);
 875                         (void) VNOP_CLOSE(lvp, FREAD, context);
 876                 }
 877 #ifdef UNION_DIAGNOSTIC
 878                 if (error == 0)
 879                         uprintf("union: copied up %s\n", un->un_path);
 880 #endif
 881
 882         }
 883         union_vn_close(uvp, FWRITE, context);
 884
 885         /*
 886          * Subsequent IOs will go to the top layer, so
 887          * call close on the lower vnode and open on the
 888          * upper vnode to ensure that the filesystem keeps
 889          * its references counts right.  This doesn't do
 890          * the right thing with (cred) and (FREAD) though.
 891          * Ignoring error returns is not right, either.
 892          */
 893
 894         /* No need to hold the lock as the union node should be locked for this(it is in faultin mode) */
 895         if (error == 0) {
 896                 int i;
 897
 898                 for (i = 0; i < un->un_openl; i++) {
 899                         (void) VNOP_CLOSE(lvp, FREAD, context);
 900                         (void) VNOP_OPEN(uvp, FREAD, context);
 901                 }
 902                 un->un_openl = 0;
 903         }
 904
 905         union_lock();
 906
 907         return (error);
 908
 909 }
 910
 911
 912 int
 913 union_faultin_copyup(struct vnode **vpp, vnode_t udvp, vnode_t lvp, struct componentname * cnp, vfs_context_t context)
 914 {
 915         int error;
 916         struct vnode *uvp;
 917         struct vnode_attr vattr;
 918         struct vnode_attr *vap;
 919         mode_t  cmode = 0;
 920         int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
 921         struct proc * p = vfs_context_proc(context);
 922         struct componentname cn;
 923
 924
 925         vap = &vattr;
 926         VATTR_INIT(vap);
 927         VATTR_WANTED(vap, va_flags);
 928         if (vnode_getattr(lvp, vap, context) == 0 )
 929                 cmode = vattr.va_mode;
 930
 931         *vpp = NULLVP;
 932
 933
 934         if (cmode == (mode_t)0)
 935                 cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
 936         else
 937                 cmode = cmode & ~p->p_fd->fd_cmask;
 938
 939
 940         /*
 941          * Build a new componentname structure (for the same
 942          * reasons outlines in union_mkshadow()).
 943          * The difference here is that the file is owned by
 944          * the current user, rather than by the person who
 945          * did the mount, since the current user needs to be
 946          * able to write the file (that's why it is being
 947          * copied in the first place).
 948          */
 949         bzero(&cn, sizeof(struct componentname));
 950
 951         cn.cn_namelen = cnp->cn_namelen;
 952         cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
 953                                                 M_NAMEI, M_WAITOK);
 954         cn.cn_pnlen = cn.cn_namelen+1;
 955         bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cn.cn_namelen+1);
 956         cn.cn_nameiop = CREATE;
 957         cn.cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN|UNIONCREATED);
 958         cn.cn_context = context;
 959         cn.cn_nameptr = cn.cn_pnbuf;
 960         cn.cn_hash = 0;
 961         cn.cn_consume = 0;
 962
 963         /*
 964          * Pass dvp unlocked and referenced on call to relookup().
 965          *
 966          * If an error occurs, dvp will be returned unlocked and dereferenced.
 967          */
 968         if ((error = relookup(udvp, &uvp, &cn)) != 0) {
 969                 goto out;
 970         }
 971
 972         /*
 973          * If no error occurs, dvp will be returned locked with the reference
 974          * left as before, and vpp will be returned referenced and locked.
 975          */
 976         if (uvp) {
 977                 *vpp = uvp;
 978                 error = EEXIST;
 979                 goto out;
 980         }
 981
 982         /*
 983          * Good - there was no race to create the file
 984          * so go ahead and create it.  The permissions
 985          * on the file will be 0666 modified by the
 986          * current user's umask.  Access to the file, while
 987          * it is unioned, will require access to the top *and*
 988          * bottom files.  Access when not unioned will simply
 989          * require access to the top-level file.
 990          *
 991          * TODO: confirm choice of access permissions.
 992          *       decide on authorisation behaviour
 993          */
 994
 995         VATTR_INIT(vap);
 996         VATTR_SET(vap, va_type, VREG);
 997         VATTR_SET(vap, va_mode, cmode);
 998
 999         cn.cn_flags |= (UNIONCREATED);
1000         if ((error = vn_create(udvp, &uvp, &cn, vap, 0, context)) != 0) {
1001                 goto out;
1002         }
1003
1004
1005         if ((error = VNOP_OPEN(uvp, fmode, context)) != 0) {
1006                 vn_clearunionwait(uvp, 0);
1007                 vnode_recycle(uvp);
1008                 vnode_put(uvp);
1009                 goto out;
1010         }
1011
1012         error = vnode_ref_ext(uvp, fmode);
1013         if (error ) {
1014                 vn_clearunionwait(uvp, 0);
1015                 VNOP_CLOSE(uvp, fmode, context);
1016                 vnode_recycle(uvp);
1017                 vnode_put(uvp);
1018                 goto out;
1019         }
1020
1021
1022         /*
1023          * XX - should not ignore errors
1024          * from vnop_close
1025          */
1026         error = VNOP_OPEN(lvp, FREAD, context);
1027         if (error == 0) {
1028                 error = union_copyfile(lvp, uvp, context);
1029                 (void) VNOP_CLOSE(lvp, FREAD, context);
1030         }
1031
1032         VNOP_CLOSE(uvp, fmode, context);
1033         vnode_rele_ext(uvp, fmode, 0);
1034         vn_clearunionwait(uvp, 0);
1035
1036         *vpp = uvp;
1037 out:
1038         if ((cn.cn_flags & HASBUF) == HASBUF) {
1039                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
1040                 cn.cn_flags &= ~HASBUF;
1041         }
1042         return (error);
1043 }
1044
1045
1046 /*
1047  *      union_relookup:
1048  *
1049  *      dvp should be locked on entry and will be locked on return.  No
1050  *      net change in the ref count will occur.
1051  *
1052  *      If an error is returned, *vpp will be invalid, otherwise it
1053  *      will hold a locked, referenced vnode.  If *vpp == dvp then
1054  *      remember that only one exclusive lock is held.
1055  */
1056
1057 /* No union lock held for this call */
1058 static int
1059 union_relookup(
1060 #ifdef XXX_HELP_ME
1061         struct union_mount *um,
1062 #else   /* !XXX_HELP_ME */
1063         __unused struct union_mount *um,
1064 #endif  /* !XXX_HELP_ME */
1065         struct vnode *dvp,
1066         struct vnode **vpp,
1067         struct componentname *cnp,
1068         struct componentname *cn,
1069         char *path,
1070         int pathlen)
1071 {
1072         int error;
1073
1074         /*
1075          * A new componentname structure must be faked up because
1076          * there is no way to know where the upper level cnp came
1077          * from or what it is being used for.  This must duplicate
1078          * some of the work done by NDINIT, some of the work done
1079          * by namei, some of the work done by lookup and some of
1080          * the work done by vnop_lookup when given a CREATE flag.
1081          * Conclusion: Horrible.
1082          */
1083         cn->cn_namelen = pathlen;
1084         cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
1085         cn->cn_pnlen = cn->cn_namelen+1;
1086         bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
1087         cn->cn_pnbuf[cn->cn_namelen] = '\0';
1088
1089         cn->cn_nameiop = CREATE;
1090         cn->cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN );
1091 #ifdef XXX_HELP_ME
1092         cn->cn_proc = cnp->cn_proc;
1093         if (um->um_op == UNMNT_ABOVE)
1094                 cn->cn_cred = cnp->cn_cred;
1095         else
1096                 cn->cn_cred = um->um_cred;
1097 #endif
1098         cn->cn_context = cnp->cn_context;       /* XXX !UNMNT_ABOVE  case ??? */
1099         cn->cn_nameptr = cn->cn_pnbuf;
1100         cn->cn_hash = 0;
1101         cn->cn_consume = cnp->cn_consume;
1102
1103         vnode_get(dvp);
1104         error = relookup(dvp, vpp, cn);
1105         vnode_put(dvp);
1106
1107         return (error);
1108 }
1109
1110 /*
1111  * Create a shadow directory in the upper layer.
1112  * The new vnode is returned locked.
1113  *
1114  * (um) points to the union mount structure for access to the
1115  * the mounting process's credentials.
1116  * (dvp) is the directory in which to create the shadow directory,
1117  * It is locked (but not ref'd) on entry and return.
1118  * (cnp) is the component name to be created.
1119  * (vpp) is the returned newly created shadow directory, which
1120  * is returned locked and ref'd
1121  */
1122 /* No union lock held for this call */
1123 int
1124 union_mkshadow(um, dvp, cnp, vpp)
1125         struct union_mount *um;
1126         struct vnode *dvp;
1127         struct componentname *cnp;
1128         struct vnode **vpp;
1129 {
1130         int error;
1131         struct vnode_attr va;
1132         struct componentname cn;
1133
1134         bzero(&cn, sizeof(struct componentname));
1135
1136
1137         error = union_relookup(um, dvp, vpp, cnp, &cn,
1138                         cnp->cn_nameptr, cnp->cn_namelen);
1139         if (error)
1140                 goto out;
1141
1142         if (*vpp) {
1143                 error = EEXIST;
1144                 goto out;
1145         }
1146
1147         /*
1148          * Policy: when creating the shadow directory in the
1149          * upper layer, create it owned by the user who did
1150          * the mount, group from parent directory, and mode
1151          * 777 modified by umask (ie mostly identical to the
1152          * mkdir syscall).  (jsp, kb)
1153          */
1154
1155         VATTR_INIT(&va);
1156         VATTR_SET(&va, va_type, VDIR);
1157         VATTR_SET(&va, va_mode, um->um_cmode);
1158
1159         error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context);
1160 out:
1161         if ((cn.cn_flags & HASBUF) == HASBUF) {
1162                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
1163                 cn.cn_flags &= ~HASBUF;
1164         }
1165         return (error);
1166 }
1167
1168 /*
1169  * Create a whiteout entry in the upper layer.
1170  *
1171  * (um) points to the union mount structure for access to the
1172  * the mounting process's credentials.
1173  * (dvp) is the directory in which to create the whiteout.
1174  * it is locked on entry and exit.
1175  * (cnp) is the componentname to be created.
1176  */
1177 /* No union lock held for this call */
1178 int
1179 union_mkwhiteout(um, dvp, cnp, path)
1180         struct union_mount *um;
1181         struct vnode *dvp;
1182         struct componentname *cnp;
1183         char *path;
1184 {
1185         int error;
1186         struct vnode *wvp;
1187         struct componentname cn;
1188
1189         bzero(&cn, sizeof(struct componentname));
1190
1191         error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
1192         if (error) {
1193                 goto out;
1194         }
1195         if (wvp) {
1196                 error = EEXIST;
1197                 goto out;
1198         }
1199
1200         error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context);
1201
1202 out:
1203         if ((cn.cn_flags & HASBUF) == HASBUF) {
1204                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
1205                 cn.cn_flags &= ~HASBUF;
1206         }
1207         return (error);
1208 }
1209
1210
1211 /*
1212  * union_vn_create: creates and opens a new shadow file
1213  * on the upper union layer.  This function is similar
1214  * in spirit to calling vn_open() but it avoids calling namei().
1215  * The problem with calling namei() is that a) it locks too many
1216  * things, and b) it doesn't start at the "right" directory,
1217  * whereas relookup() is told where to start.
1218  *
1219  * On entry, the vnode associated with un is locked.  It remains locked
1220  * on return.
1221  *
1222  * If no error occurs, *vpp contains a locked referenced vnode for your
1223  * use.  If an error occurs *vpp iis undefined.
1224  */
1225 /* called with no union lock held */
1226 int
1227 union_vn_create(struct vnode **vpp, struct union_node *un, mode_t cmode, vfs_context_t  context)
1228 {
1229         struct vnode *vp;
1230         struct vnode_attr vat;
1231         struct vnode_attr *vap = &vat;
1232         int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
1233         int error;
1234         struct proc * p = vfs_context_proc(context);
1235         struct componentname cn;
1236
1237         bzero(&cn, sizeof(struct componentname));
1238         *vpp = NULLVP;
1239
1240         if (cmode == (mode_t)0)
1241                 cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
1242         else
1243                 cmode = cmode & ~p->p_fd->fd_cmask;
1244
1245
1246         /*
1247          * Build a new componentname structure (for the same
1248          * reasons outlines in union_mkshadow()).
1249          * The difference here is that the file is owned by
1250          * the current user, rather than by the person who
1251          * did the mount, since the current user needs to be
1252          * able to write the file (that's why it is being
1253          * copied in the first place).
1254          */
1255         cn.cn_namelen = strlen(un->un_path);
1256         cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
1257                                                 M_NAMEI, M_WAITOK);
1258         cn.cn_pnlen = cn.cn_namelen+1;
1259         bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
1260         cn.cn_nameiop = CREATE;
1261         if (UNNODE_FAULTIN(un))
1262                 cn.cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN|UNIONCREATED);
1263         else
1264                 cn.cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN);
1265         cn.cn_context = context;
1266         cn.cn_nameptr = cn.cn_pnbuf;
1267         cn.cn_hash = un->un_hash;
1268         cn.cn_consume = 0;
1269
1270         /*
1271          * Pass dvp unlocked and referenced on call to relookup().
1272          *
1273          * If an error occurs, dvp will be returned unlocked and dereferenced.
1274          */
1275         vnode_get(un->un_dirvp);
1276         if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0) {
1277                 vnode_put(un->un_dirvp);
1278                 goto out;
1279         }
1280         vnode_put(un->un_dirvp);
1281
1282         /*
1283          * If no error occurs, dvp will be returned locked with the reference
1284          * left as before, and vpp will be returned referenced and locked.
1285          */
1286         if (vp) {
1287                 *vpp = vp;
1288                 error = EEXIST;
1289                 goto out;
1290         }
1291
1292         /*
1293          * Good - there was no race to create the file
1294          * so go ahead and create it.  The permissions
1295          * on the file will be 0666 modified by the
1296          * current user's umask.  Access to the file, while
1297          * it is unioned, will require access to the top *and*
1298          * bottom files.  Access when not unioned will simply
1299          * require access to the top-level file.
1300          *
1301          * TODO: confirm choice of access permissions.
1302          *       decide on authorisation behaviour
1303          */
1304
1305         VATTR_INIT(vap);
1306         VATTR_SET(vap, va_type, VREG);
1307         VATTR_SET(vap, va_mode, cmode);
1308
1309         if ((error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, context)) != 0) {
1310                 goto out;
1311         }
1312
1313         if ((error = VNOP_OPEN(vp, fmode, context)) != 0) {
1314                 vnode_put(vp);
1315                 goto out;
1316         }
1317
1318         vnode_lock(vp);
1319         if (++vp->v_writecount <= 0)
1320                 panic("union: v_writecount");
1321         vnode_unlock(vp);
1322         *vpp = vp;
1323         error = 0;
1324
1325 out:
1326         if ((cn.cn_flags & HASBUF) == HASBUF) {
1327                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
1328                 cn.cn_flags &= ~HASBUF;
1329         }
1330         return(error);
1331 }
1332
1333 /* called with no union lock held */
1334 static int
1335 union_vn_close(struct vnode *vp, int fmode, vfs_context_t context)
1336 {
1337
1338         if (fmode & FWRITE) {
1339                 vnode_lock(vp);
1340                 --vp->v_writecount;
1341                 vnode_unlock(vp);
1342         }
1343         return (VNOP_CLOSE(vp, fmode, context));
1344 }
1345
1346 /*
1347  *      union_removed_upper:
1348  *
1349  *      An upper-only file/directory has been removed; un-cache it so
1350  *      that unionfs vnode gets reclaimed and the last uppervp reference
1351  *      disappears.
1352  *
1353  *      Called with union_node unlocked.
1354  */
1355 /* always called with union lock held */
1356 void
1357 union_removed_upper(un)
1358         struct union_node *un;
1359 {
1360         union_newupper(un, NULLVP);
1361         if (un->un_flags & UN_CACHED) {
1362                 un->un_flags &= ~UN_CACHED;
1363                 LIST_REMOVE(un, un_cache);
1364         }
1365
1366 }
1367
1368 #if 0
1369 struct vnode *
1370 union_lowervp(vp)
1371         struct vnode *vp;
1372 {
1373         struct union_node *un = VTOUNION(vp);
1374
1375         if ((un->un_lowervp != NULLVP) &&
1376             (vp->v_type == un->un_lowervp->v_type)) {
1377                 if (vnode_get(un->un_lowervp) == 0)
1378                         return (un->un_lowervp);
1379         }
1380
1381         return (NULLVP);
1382 }
1383 #endif
1384
1385 /*
1386  * Determine whether a whiteout is needed
1387  * during a remove/rmdir operation.
1388  */
1389 /* called with no union lock held */
1390 int
1391 union_dowhiteout(struct union_node *un, vfs_context_t ctx)
1392 {
1393         struct vnode_attr va;
1394
1395         if (UNNODE_FAULTIN(un))
1396                 return(0);
1397
1398         if ((un->un_lowervp != NULLVP) )
1399                 return (1);
1400
1401         VATTR_INIT(&va);
1402         VATTR_WANTED(&va, va_flags);
1403         if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 &&
1404             (va.va_flags & OPAQUE))
1405                 return (1);
1406
1407         return (0);
1408 }
1409
1410 /* called with no union lock held */
1411 static void
1412 union_dircache_r(struct vnode *vp, struct vnode ***vppp, int *cntp)
1413 {
1414         struct union_node *un;
1415
1416         if (vp->v_op != union_vnodeop_p) {
1417                 if (vppp) {
1418                         vnode_get(vp);
1419                         *(*vppp)++ = vp;
1420                         if (--(*cntp) == 0)
1421                                 panic("union: dircache table too small");
1422                 } else {
1423                         (*cntp)++;
1424                 }
1425
1426                 return;
1427         }
1428
1429         un = VTOUNION(vp);
1430         if (un->un_uppervp != NULLVP)
1431                 union_dircache_r(un->un_uppervp, vppp, cntp);
1432         if (un->un_lowervp != NULLVP)
1433                 union_dircache_r(un->un_lowervp, vppp, cntp);
1434 }
1435
1436 /* called with no union lock held */
1437 struct vnode *
1438 union_dircache(struct vnode *vp, __unused vfs_context_t context)
1439 {
1440         int count;
1441         struct vnode *nvp, *lvp;
1442         struct vnode **vpp;
1443         struct vnode **dircache, **newdircache;
1444         struct union_node *un;
1445         int error;
1446         int alloced = 0;
1447
1448         union_lock();
1449         newdircache = NULL;
1450
1451         nvp = NULLVP;
1452         un = VTOUNION(vp);
1453
1454         dircache = un->un_dircache;
1455         if (dircache == 0) {
1456                 union_unlock();
1457                 count = 0;
1458                 union_dircache_r(vp, 0, &count);
1459                 count++;
1460 #if 0
1461                 /* too bad; we need Union now! */
1462 #if MAC_XXX
1463                 panic("MAC Framework doesn't support unionfs (yet)\n");
1464 #endif /* MAC */
1465 #endif
1466
1467                 dircache = (struct vnode **)
1468                                 _MALLOC(count * sizeof(struct vnode *),
1469                                         M_TEMP, M_WAITOK);
1470                 newdircache = dircache;
1471                 alloced = 1;
1472                 vpp = dircache;
1473                 union_dircache_r(vp, &vpp, &count);
1474                 *vpp = NULLVP;
1475                 vpp = dircache + 1;
1476                 union_lock();
1477         } else {
1478                 vpp = dircache;
1479                 do {
1480                         if (*vpp++ == un->un_uppervp)
1481                                 break;
1482                 } while (*vpp != NULLVP);
1483         }
1484
1485         lvp = *vpp;
1486         union_unlock();
1487         if (lvp == NULLVP) {
1488                 goto out;
1489         }
1490
1491         vnode_get(lvp);
1492         union_lock();
1493
1494         error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, lvp, NULLVP, 0);
1495         if (error) {
1496                 union_unlock();
1497                 vnode_put(lvp);
1498                 goto out;
1499         }
1500
1501         un->un_dircache = 0;
1502         un = VTOUNION(nvp);
1503 #if 0
1504         if ((alloced != 0) && (un->un_dircache != 0)) {
1505                 union_unlock();
1506                 for (vpp = newdircache; *vpp != NULLVP; vpp++)
1507                         vnode_put(*vpp);
1508                 _FREE(newdircache, M_TEMP);
1509                 newdircache = NULL;
1510                 union_lock();
1511                 if (nvp != NULLVP)
1512                         union_freevp(nvp);
1513                 goto loop;
1514         }
1515 #endif
1516         un->un_dircache = dircache;
1517         un->un_flags |= UN_DIRENVN;
1518
1519         newdircache = NULL;
1520         union_unlock();
1521         return (nvp);
1522
1523 out:
1524         /*
1525          * If we allocated a new dircache and couldn't attach
1526          * it to a new vp, free the resources we allocated.
1527          */
1528         if (newdircache) {
1529                 for (vpp = newdircache; *vpp != NULLVP; vpp++)
1530                         vnode_put(*vpp);
1531                 _FREE(newdircache, M_TEMP);
1532         }
1533         return (NULLVP);
1534 }
1535
1536 /*
1537  * Module glue to remove #ifdef UNION from vfs_syscalls.c
1538  */
1539 /* Called with no union lock, the union_dircache takes locks when necessary */
1540 static int
1541 union_dircheck(struct vnode **vpp, struct fileproc *fp, vfs_context_t ctx)
1542 {
1543         int error = 0;
1544         vnode_t vp = *vpp;
1545
1546         if (vp->v_op == union_vnodeop_p) {
1547                 struct vnode *lvp;
1548
1549                 lvp = union_dircache(vp, ctx);
1550                 if (lvp != NULLVP) {
1551                         struct vnode_attr va;
1552                         /*
1553                          * If the directory is opaque,
1554                          * then don't show lower entries
1555                          */
1556                         VATTR_INIT(&va);
1557                         VATTR_WANTED(&va, va_flags);
1558                         error = vnode_getattr(vp, &va, ctx);
1559                         if (va.va_flags & OPAQUE) {
1560                                 vnode_put(lvp);
1561                                 lvp = NULL;
1562                         }
1563                 }
1564
1565                 if (lvp != NULLVP) {
1566 #if CONFIG_MACF
1567                         error = mac_vnode_check_open(ctx, lvp, FREAD);
1568                         if (error) {
1569                                 vnode_put(lvp);
1570                                 return(error);
1571                         }
1572 #endif /* MAC */
1573                         error = VNOP_OPEN(lvp, FREAD, ctx);
1574                         if (error) {
1575                                 vnode_put(lvp);
1576                                 return(error);
1577                         }
1578                         vnode_ref(lvp);
1579                         fp->f_fglob->fg_data = (caddr_t) lvp;
1580                         fp->f_fglob->fg_offset = 0;
1581
1582                         error = VNOP_CLOSE(vp, FREAD, ctx);
1583                         vnode_rele(vp);
1584                         vnode_put(vp);
1585                         if (error)
1586                                 return(error);
1587
1588                         *vpp = lvp;
1589                         return -1;      /* goto unionread */
1590                 }
1591         }
1592         return error;
1593 }
1594
1595 /*  called from inactive with union lock held */
1596 void
1597 union_dircache_free(struct union_node *un)
1598 {
1599         struct vnode **vpp;
1600
1601         vpp = un->un_dircache;
1602         un->un_dircache = NULL;
1603         union_unlock();
1604
1605         for (; *vpp != NULLVP; vpp++)
1606                 vnode_put(*vpp);
1607         _FREE(un->un_dircache, M_TEMP);
1608         union_lock();
1609 }
1610