bsd/miscfs/union/union_subr.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1994 Jan-Simon Pendry
  25  * Copyright (c) 1994
  26  *      The Regents of the University of California.  All rights reserved.
  27  *
  28  * This code is derived from software contributed to Berkeley by
  29  * Jan-Simon Pendry.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. All advertising materials mentioning features or use of this software
  40  *    must display the following acknowledgement:
  41  *      This product includes software developed by the University of
  42  *      California, Berkeley and its contributors.
  43  * 4. Neither the name of the University nor the names of its contributors
  44  *    may be used to endorse or promote products derived from this software
  45  *    without specific prior written permission.
  46  *
  47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  57  * SUCH DAMAGE.
  58  *
  59  *      @(#)union_subr.c        8.20 (Berkeley) 5/20/95
  60  */
  61
  62 #include <sys/param.h>
  63 #include <sys/systm.h>
  64 #include <sys/proc_internal.h>
  65 #include <sys/kauth.h>
  66 #include <sys/time.h>
  67 #include <sys/kernel.h>
  68 #include <sys/vnode_internal.h>
  69 #include <sys/namei.h>
  70 #include <sys/malloc.h>
  71 #include <sys/file.h>
  72 #include <sys/filedesc.h>
  73 #include <sys/queue.h>
  74 #include <sys/mount_internal.h>
  75 #include <sys/stat.h>
  76 #include <sys/ubc.h>
  77 #include <sys/uio_internal.h>
  78 #include <miscfs/union/union.h>
  79
  80 #if DIAGNOSTIC
  81 #include <sys/proc.h>
  82 #endif
  83
  84 /* must be power of two, otherwise change UNION_HASH() */
  85 #define NHASH 32
  86
  87 /* unsigned int ... */
  88 #define UNION_HASH(u, l) \
  89         (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
  90
  91 static LIST_HEAD(unhead, union_node) unhead[NHASH];
  92 static int unvplock[NHASH];
  93
  94 int
  95 union_init()
  96 {
  97         int i;
  98
  99         for (i = 0; i < NHASH; i++)
 100                 LIST_INIT(&unhead[i]);
 101         bzero((caddr_t) unvplock, sizeof(unvplock));
 102 }
 103
 104 static int
 105 union_list_lock(ix)
 106         int ix;
 107 {
 108
 109         if (unvplock[ix] & UN_LOCKED) {
 110                 unvplock[ix] |= UN_WANT;
 111                 sleep((caddr_t) &unvplock[ix], PINOD);
 112                 return (1);
 113         }
 114
 115         unvplock[ix] |= UN_LOCKED;
 116
 117         return (0);
 118 }
 119
 120 static void
 121 union_list_unlock(ix)
 122         int ix;
 123 {
 124
 125         unvplock[ix] &= ~UN_LOCKED;
 126
 127         if (unvplock[ix] & UN_WANT) {
 128                 unvplock[ix] &= ~UN_WANT;
 129                 wakeup((caddr_t) &unvplock[ix]);
 130         }
 131 }
 132
 133 void
 134 union_updatevp(un, uppervp, lowervp)
 135         struct union_node *un;
 136         struct vnode *uppervp;
 137         struct vnode *lowervp;
 138 {
 139         int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
 140         int nhash = UNION_HASH(uppervp, lowervp);
 141         int docache = (lowervp != NULLVP || uppervp != NULLVP);
 142         int lhash, uhash;
 143
 144         /*
 145          * Ensure locking is ordered from lower to higher
 146          * to avoid deadlocks.
 147          */
 148         if (nhash < ohash) {
 149                 lhash = nhash;
 150                 uhash = ohash;
 151         } else {
 152                 lhash = ohash;
 153                 uhash = nhash;
 154         }
 155
 156         if (lhash != uhash)
 157                 while (union_list_lock(lhash))
 158                         continue;
 159
 160         while (union_list_lock(uhash))
 161                 continue;
 162
 163         if (ohash != nhash || !docache) {
 164                 if (un->un_flags & UN_CACHED) {
 165                         un->un_flags &= ~UN_CACHED;
 166                         LIST_REMOVE(un, un_cache);
 167                 }
 168         }
 169
 170         if (ohash != nhash)
 171                 union_list_unlock(ohash);
 172
 173         if (un->un_lowervp != lowervp) {
 174                 if (un->un_lowervp) {
 175                         vnode_put(un->un_lowervp);
 176                         if (un->un_path) {
 177                                 _FREE(un->un_path, M_TEMP);
 178                                 un->un_path = 0;
 179                         }
 180                         if (un->un_dirvp) {
 181                                 vnode_put(un->un_dirvp);
 182                                 un->un_dirvp = NULLVP;
 183                         }
 184                 }
 185                 un->un_lowervp = lowervp;
 186                 un->un_lowersz = VNOVAL;
 187         }
 188
 189         if (un->un_uppervp != uppervp) {
 190                 if (un->un_uppervp)
 191                         vnode_put(un->un_uppervp);
 192
 193                 un->un_uppervp = uppervp;
 194                 un->un_uppersz = VNOVAL;
 195         }
 196
 197         if (docache && (ohash != nhash)) {
 198                 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
 199                 un->un_flags |= UN_CACHED;
 200         }
 201
 202         union_list_unlock(nhash);
 203 }
 204
 205 void
 206 union_newlower(un, lowervp)
 207         struct union_node *un;
 208         struct vnode *lowervp;
 209 {
 210
 211         union_updatevp(un, un->un_uppervp, lowervp);
 212 }
 213
 214 void
 215 union_newupper(un, uppervp)
 216         struct union_node *un;
 217         struct vnode *uppervp;
 218 {
 219
 220         union_updatevp(un, uppervp, un->un_lowervp);
 221 }
 222
 223 /*
 224  * Keep track of size changes in the underlying vnodes.
 225  * If the size changes, then callback to the vm layer
 226  * giving priority to the upper layer size.
 227  */
 228 void
 229 union_newsize(vp, uppersz, lowersz)
 230         struct vnode *vp;
 231         off_t uppersz, lowersz;
 232 {
 233         struct union_node *un;
 234         off_t sz;
 235
 236         /* only interested in regular files */
 237         if (vp->v_type != VREG)
 238                 return;
 239
 240         un = VTOUNION(vp);
 241         sz = VNOVAL;
 242
 243         if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
 244                 un->un_uppersz = uppersz;
 245                 if (sz == VNOVAL)
 246                         sz = un->un_uppersz;
 247         }
 248
 249         if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
 250                 un->un_lowersz = lowersz;
 251                 if (sz == VNOVAL)
 252                         sz = un->un_lowersz;
 253         }
 254
 255         if (sz != VNOVAL) {
 256 #ifdef UNION_DIAGNOSTIC
 257                 printf("union: %s size now %ld\n",
 258                         uppersz != VNOVAL ? "upper" : "lower", (long) sz);
 259 #endif
 260                 ubc_setsize(vp, sz);
 261         }
 262 }
 263
 264 /*
 265  * allocate a union_node/vnode pair.  the vnode is
 266  * referenced and locked.  the new vnode is returned
 267  * via (vpp).  (mp) is the mountpoint of the union filesystem,
 268  * (dvp) is the parent directory where the upper layer object
 269  * should exist (but doesn't) and (cnp) is the componentname
 270  * information which is partially copied to allow the upper
 271  * layer object to be created at a later time.  (uppervp)
 272  * and (lowervp) reference the upper and lower layer objects
 273  * being mapped.  either, but not both, can be nil.
 274  * if supplied, (uppervp) is locked.
 275  * the reference is either maintained in the new union_node
 276  * object which is allocated, or they are vnode_put'd.
 277  *
 278  * all union_nodes are maintained on a singly-linked
 279  * list.  new nodes are only allocated when they cannot
 280  * be found on this list.  entries on the list are
 281  * removed when the vfs reclaim entry is called.
 282  *
 283  * a single lock is kept for the entire list.  this is
 284  * needed because the getnewvnode() function can block
 285  * waiting for a vnode to become free, in which case there
 286  * may be more than one process trying to get the same
 287  * vnode.  this lock is only taken if we are going to
 288  * call getnewvnode, since the kernel itself is single-threaded.
 289  *
 290  * if an entry is found on the list, then call vnode_get() to
 291  * take a reference.  this is done because there may be
 292  * zero references to it and so it needs to removed from
 293  * the vnode free list.
 294  */
 295 int
 296 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
 297         struct vnode **vpp;
 298         struct mount *mp;
 299         struct vnode *undvp;            /* parent union vnode */
 300         struct vnode *dvp;              /* may be null */
 301         struct componentname *cnp;      /* may be null */
 302         struct vnode *uppervp;          /* may be null */
 303         struct vnode *lowervp;          /* may be null */
 304         int docache;
 305 {
 306         int error;
 307         struct union_node *un;
 308         struct union_node **pp;
 309         struct vnode *xlowervp = NULLVP;
 310         struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
 311         int hash;
 312         int markroot;
 313         int try;
 314         struct union_node *unp;
 315         struct vnode_fsparam vfsp;
 316         enum vtype vtype;
 317
 318         if (uppervp == NULLVP && lowervp == NULLVP)
 319                 panic("union: unidentifiable allocation");
 320
 321         if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
 322                 xlowervp = lowervp;
 323                 lowervp = NULLVP;
 324         }
 325
 326         /* detect the root vnode (and aliases) */
 327         markroot = 0;
 328         if ((uppervp == um->um_uppervp) &&
 329             ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
 330                 if (lowervp == NULLVP) {
 331                         lowervp = um->um_lowervp;
 332                         if (lowervp != NULLVP)
 333                                 vnode_get(lowervp);
 334                 }
 335                 markroot = VROOT;
 336         }
 337
 338 loop:
 339         if (!docache) {
 340                 un = 0;
 341         } else for (try = 0; try < 3; try++) {
 342                 switch (try) {
 343                 case 0:
 344                         if (lowervp == NULLVP)
 345                                 continue;
 346                         hash = UNION_HASH(uppervp, lowervp);
 347                         break;
 348
 349                 case 1:
 350                         if (uppervp == NULLVP)
 351                                 continue;
 352                         hash = UNION_HASH(uppervp, NULLVP);
 353                         break;
 354
 355                 case 2:
 356                         if (lowervp == NULLVP)
 357                                 continue;
 358                         hash = UNION_HASH(NULLVP, lowervp);
 359                         break;
 360                 }
 361
 362                 while (union_list_lock(hash))
 363                         continue;
 364
 365                 for (un = unhead[hash].lh_first; un != 0;
 366                                         un = un->un_cache.le_next) {
 367                         if ((un->un_lowervp == lowervp ||
 368                              un->un_lowervp == NULLVP) &&
 369                             (un->un_uppervp == uppervp ||
 370                              un->un_uppervp == NULLVP) &&
 371                             (UNIONTOV(un)->v_mount == mp)) {
 372                                 if (vnode_get(UNIONTOV(un))) {
 373                                         union_list_unlock(hash);
 374                                         goto loop;
 375                                 }
 376                                 break;
 377                         }
 378                 }
 379
 380                 union_list_unlock(hash);
 381
 382                 if (un)
 383                         break;
 384         }
 385
 386         if (un) {
 387                 /*
 388                  * Obtain a lock on the union_node.
 389                  * uppervp is locked, though un->un_uppervp
 390                  * may not be.  this doesn't break the locking
 391                  * hierarchy since in the case that un->un_uppervp
 392                  * is not yet locked it will be vnode_put'd and replaced
 393                  * with uppervp.
 394                  */
 395
 396                 if ((dvp != NULLVP) && (uppervp == dvp)) {
 397                         /*
 398                          * Access ``.'', so (un) will already
 399                          * be locked.  Since this process has
 400                          * the lock on (uppervp) no other
 401                          * process can hold the lock on (un).
 402                          */
 403 #if DIAGNOSTIC
 404                         if ((un->un_flags & UN_LOCKED) == 0)
 405                                 panic("union: . not locked");
 406                         else if (current_proc() && un->un_pid != current_proc()->p_pid &&
 407                                     un->un_pid > -1 && current_proc()->p_pid > -1)
 408                                 panic("union: allocvp not lock owner");
 409 #endif
 410                 } else {
 411                         if (un->un_flags & UN_LOCKED) {
 412                                 vnode_put(UNIONTOV(un));
 413                                 un->un_flags |= UN_WANT;
 414                                 sleep((caddr_t) &un->un_flags, PINOD);
 415                                 goto loop;
 416                         }
 417                         un->un_flags |= UN_LOCKED;
 418
 419 #if DIAGNOSTIC
 420                         if (current_proc())
 421                                 un->un_pid = current_proc()->p_pid;
 422                         else
 423                                 un->un_pid = -1;
 424 #endif
 425                 }
 426
 427                 /*
 428                  * At this point, the union_node is locked,
 429                  * un->un_uppervp may not be locked, and uppervp
 430                  * is locked or nil.
 431                  */
 432
 433                 /*
 434                  * Save information about the upper layer.
 435                  */
 436                 if (uppervp != un->un_uppervp) {
 437                         union_newupper(un, uppervp);
 438                 } else if (uppervp) {
 439                         vnode_put(uppervp);
 440                 }
 441
 442                 if (un->un_uppervp) {
 443                         un->un_flags |= UN_ULOCK;
 444                         un->un_flags &= ~UN_KLOCK;
 445                 }
 446
 447                 /*
 448                  * Save information about the lower layer.
 449                  * This needs to keep track of pathname
 450                  * and directory information which union_vn_create
 451                  * might need.
 452                  */
 453                 if (lowervp != un->un_lowervp) {
 454                         union_newlower(un, lowervp);
 455                         if (cnp && (lowervp != NULLVP)) {
 456                                 un->un_hash = cnp->cn_hash;
 457                                 MALLOC(un->un_path, caddr_t, cnp->cn_namelen+1,
 458                                                 M_TEMP, M_WAITOK);
 459                                 bcopy(cnp->cn_nameptr, un->un_path,
 460                                                 cnp->cn_namelen);
 461                                 un->un_path[cnp->cn_namelen] = '\0';
 462                                 vnode_get(dvp);
 463                                 un->un_dirvp = dvp;
 464                         }
 465                 } else if (lowervp) {
 466                         vnode_put(lowervp);
 467                 }
 468                 *vpp = UNIONTOV(un);
 469                 return (0);
 470         }
 471
 472         if (docache) {
 473                 /*
 474                  * otherwise lock the vp list while we call getnewvnode
 475                  * since that can block.
 476                  */
 477                 hash = UNION_HASH(uppervp, lowervp);
 478
 479                 if (union_list_lock(hash))
 480                         goto loop;
 481         }
 482
 483         MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK);
 484
 485         if (uppervp)
 486                 vtype = uppervp->v_type;
 487         else
 488                 vtype = lowervp->v_type;
 489         //bzero(&vfsp, sizeof(struct vnode_fsparam));
 490         vfsp.vnfs_mp = mp;
 491         vfsp.vnfs_vtype = vtype;
 492         vfsp.vnfs_str = "unionfs";
 493         vfsp.vnfs_dvp = dvp;
 494         vfsp.vnfs_fsnode = unp;
 495         vfsp.vnfs_cnp = cnp;
 496         vfsp.vnfs_vops = union_vnodeop_p;
 497         vfsp.vnfs_rdev = 0;
 498         vfsp.vnfs_filesize = 0;
 499         vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
 500         vfsp.vnfs_marksystem = 0;
 501         vfsp.vnfs_markroot = markroot;
 502
 503         error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp);
 504         if (error) {
 505                 FREE(unp, M_TEMP);
 506                 if (uppervp) {
 507                         vnode_put(uppervp);
 508                 }
 509                 if (lowervp)
 510                         vnode_put(lowervp);
 511
 512                 goto out;
 513         }
 514
 515         (*vpp)->v_tag = VT_UNION;
 516         un = VTOUNION(*vpp);
 517         un->un_vnode = *vpp;
 518         un->un_uppervp = uppervp;
 519         un->un_uppersz = VNOVAL;
 520         un->un_lowervp = lowervp;
 521         un->un_lowersz = VNOVAL;
 522         un->un_pvp = undvp;
 523         if (undvp != NULLVP)
 524                 vnode_get(undvp);
 525         un->un_dircache = 0;
 526         un->un_openl = 0;
 527         un->un_flags = UN_LOCKED;
 528         if (un->un_uppervp)
 529                 un->un_flags |= UN_ULOCK;
 530 #if DIAGNOSTIC
 531         if (current_proc())
 532                 un->un_pid = current_proc()->p_pid;
 533         else
 534                 un->un_pid = -1;
 535 #endif
 536         if (cnp && (lowervp != NULLVP)) {
 537                 un->un_hash = cnp->cn_hash;
 538                 un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
 539                 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
 540                 un->un_path[cnp->cn_namelen] = '\0';
 541                 vnode_get(dvp);
 542                 un->un_dirvp = dvp;
 543         } else {
 544                 un->un_hash = 0;
 545                 un->un_path = 0;
 546                 un->un_dirvp = 0;
 547         }
 548
 549         if (docache) {
 550                 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
 551                 un->un_flags |= UN_CACHED;
 552         }
 553
 554         if (xlowervp)
 555                 vnode_put(xlowervp);
 556
 557 out:
 558         if (docache)
 559                 union_list_unlock(hash);
 560
 561         return (error);
 562 }
 563
 564 int
 565 union_freevp(vp)
 566         struct vnode *vp;
 567 {
 568         struct union_node *un = VTOUNION(vp);
 569
 570         if (un->un_flags & UN_CACHED) {
 571                 un->un_flags &= ~UN_CACHED;
 572                 LIST_REMOVE(un, un_cache);
 573         }
 574
 575         if (un->un_pvp != NULLVP)
 576                 vnode_put(un->un_pvp);
 577         if (un->un_uppervp != NULLVP)
 578                 vnode_put(un->un_uppervp);
 579         if (un->un_lowervp != NULLVP)
 580                 vnode_put(un->un_lowervp);
 581         if (un->un_dirvp != NULLVP)
 582                 vnode_put(un->un_dirvp);
 583         if (un->un_path)
 584                 _FREE(un->un_path, M_TEMP);
 585
 586         FREE(vp->v_data, M_TEMP);
 587         vp->v_data = 0;
 588
 589         return (0);
 590 }
 591
 592 /*
 593  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
 594  * using a sequence of reads and writes.  both (fvp)
 595  * and (tvp) are locked on entry and exit.
 596  */
 597 int
 598 union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred,
 599         struct proc *p)
 600 {
 601         char *bufp;
 602         struct uio uio;
 603         struct iovec_32 iov;
 604         struct vfs_context context;
 605         int error = 0;
 606
 607         /*
 608          * strategy:
 609          * allocate a buffer of size MAXPHYSIO.
 610          * loop doing reads and writes, keeping track
 611          * of the current uio offset.
 612          * give up at the first sign of trouble.
 613          */
 614
 615         context.vc_proc = p;
 616         context.vc_ucred = cred;
 617
 618 #if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
 619         uio.uio_segflg = UIO_SYSSPACE;
 620 #else
 621         uio.uio_segflg = UIO_SYSSPACE32;
 622 #endif
 623         uio.uio_offset = 0;
 624
 625         bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK);
 626
 627         /* ugly loop follows... */
 628         do {
 629                 off_t offset = uio.uio_offset;
 630
 631                 uio.uio_iovs.iov32p = &iov;
 632                 uio.uio_iovcnt = 1;
 633                 iov.iov_base = (uintptr_t)bufp;
 634                 iov.iov_len = MAXPHYSIO;
 635                 uio_setresid(&uio, iov.iov_len);
 636                 uio.uio_rw = UIO_READ;
 637                 error = VNOP_READ(fvp, &uio, 0, &context);
 638
 639                 if (error == 0) {
 640                         uio.uio_iovs.iov32p = &iov;
 641                         uio.uio_iovcnt = 1;
 642                         iov.iov_base = (uintptr_t)bufp;
 643                         iov.iov_len = MAXPHYSIO - uio_resid(&uio);
 644                         uio.uio_offset = offset;
 645                         uio.uio_rw = UIO_WRITE;
 646                         uio_setresid(&uio, iov.iov_len);
 647
 648                         if (uio_resid(&uio) == 0)
 649                                 break;
 650
 651                         do {
 652                                 error = VNOP_WRITE(tvp, &uio, 0, &context);
 653                         } while ((uio_resid(&uio) > 0) && (error == 0));
 654                 }
 655
 656         } while (error == 0);
 657
 658         _FREE(bufp, M_TEMP);
 659         return (error);
 660 }
 661
 662 /*
 663  * (un) is assumed to be locked on entry and remains
 664  * locked on exit.
 665  */
 666 int
 667 union_copyup(struct union_node *un, int docopy, kauth_cred_t cred,
 668         struct proc *p)
 669 {
 670         int error;
 671         struct vnode *lvp, *uvp;
 672         struct vfs_context context;
 673
 674         error = union_vn_create(&uvp, un, p);
 675         if (error)
 676                 return (error);
 677
 678         context.vc_proc = p;
 679         context.vc_ucred = cred;
 680
 681         /* at this point, uppervp is locked */
 682         union_newupper(un, uvp);
 683         un->un_flags |= UN_ULOCK;
 684
 685         lvp = un->un_lowervp;
 686
 687         if (docopy) {
 688                 /*
 689                  * XX - should not ignore errors
 690                  * from vnop_close
 691                  */
 692                 error = VNOP_OPEN(lvp, FREAD, &context);
 693                 if (error == 0) {
 694                         error = union_copyfile(lvp, uvp, cred, p);
 695                         (void) VNOP_CLOSE(lvp, FREAD, &context);
 696                 }
 697 #ifdef UNION_DIAGNOSTIC
 698                 if (error == 0)
 699                         uprintf("union: copied up %s\n", un->un_path);
 700 #endif
 701
 702         }
 703         un->un_flags &= ~UN_ULOCK;
 704         union_vn_close(uvp, FWRITE, cred, p);
 705         un->un_flags |= UN_ULOCK;
 706
 707         /*
 708          * Subsequent IOs will go to the top layer, so
 709          * call close on the lower vnode and open on the
 710          * upper vnode to ensure that the filesystem keeps
 711          * its references counts right.  This doesn't do
 712          * the right thing with (cred) and (FREAD) though.
 713          * Ignoring error returns is not right, either.
 714          */
 715         if (error == 0) {
 716                 int i;
 717
 718                 for (i = 0; i < un->un_openl; i++) {
 719                         (void) VNOP_CLOSE(lvp, FREAD, &context);
 720                         (void) VNOP_OPEN(uvp, FREAD, &context);
 721                 }
 722                 un->un_openl = 0;
 723         }
 724
 725         return (error);
 726
 727 }
 728
 729 static int
 730 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
 731         struct union_mount *um;
 732         struct vnode *dvp;
 733         struct vnode **vpp;
 734         struct componentname *cnp;
 735         struct componentname *cn;
 736         char *path;
 737         int pathlen;
 738 {
 739         int error;
 740
 741         /*
 742          * A new componentname structure must be faked up because
 743          * there is no way to know where the upper level cnp came
 744          * from or what it is being used for.  This must duplicate
 745          * some of the work done by NDINIT, some of the work done
 746          * by namei, some of the work done by lookup and some of
 747          * the work done by vnop_lookup when given a CREATE flag.
 748          * Conclusion: Horrible.
 749          */
 750         cn->cn_namelen = pathlen;
 751         cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
 752         cn->cn_pnlen = cn->cn_namelen+1;
 753         bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
 754         cn->cn_pnbuf[cn->cn_namelen] = '\0';
 755
 756         cn->cn_nameiop = CREATE;
 757         cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
 758 #ifdef XXX_HELP_ME
 759         cn->cn_proc = cnp->cn_proc;
 760         if (um->um_op == UNMNT_ABOVE)
 761                 cn->cn_cred = cnp->cn_cred;
 762         else
 763                 cn->cn_cred = um->um_cred;
 764 #endif
 765         cn->cn_context = cnp->cn_context;       /* XXX !UNMNT_ABOVE  case ??? */
 766         cn->cn_nameptr = cn->cn_pnbuf;
 767         cn->cn_hash = cnp->cn_hash;
 768         cn->cn_consume = cnp->cn_consume;
 769
 770         vnode_get(dvp);
 771         error = relookup(dvp, vpp, cn);
 772         if (!error)
 773                 vnode_put(dvp);
 774
 775         return (error);
 776 }
 777
 778 /*
 779  * Create a shadow directory in the upper layer.
 780  * The new vnode is returned locked.
 781  *
 782  * (um) points to the union mount structure for access to the
 783  * the mounting process's credentials.
 784  * (dvp) is the directory in which to create the shadow directory.
 785  * it is unlocked on entry and exit.
 786  * (cnp) is the componentname to be created.
 787  * (vpp) is the returned newly created shadow directory, which
 788  * is returned locked.
 789  */
 790 int
 791 union_mkshadow(um, dvp, cnp, vpp)
 792         struct union_mount *um;
 793         struct vnode *dvp;
 794         struct componentname *cnp;
 795         struct vnode **vpp;
 796 {
 797         int error;
 798         struct vnode_attr va;
 799         struct componentname cn;
 800
 801         error = union_relookup(um, dvp, vpp, cnp, &cn,
 802                         cnp->cn_nameptr, cnp->cn_namelen);
 803         if (error)
 804                 return (error);
 805
 806         if (*vpp) {
 807                 vnode_put(*vpp);
 808                 *vpp = NULLVP;
 809                 return (EEXIST);
 810         }
 811
 812         /*
 813          * policy: when creating the shadow directory in the
 814          * upper layer, create it owned by the user who did
 815          * the mount, group from parent directory, and mode
 816          * 777 modified by umask (ie mostly identical to the
 817          * mkdir syscall).  (jsp, kb)
 818          */
 819         VATTR_INIT(&va);
 820         VATTR_SET(&va, va_type, VDIR);
 821         VATTR_SET(&va, va_mode, um->um_cmode);
 822
 823         error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context);
 824         return (error);
 825 }
 826
 827 /*
 828  * Create a whiteout entry in the upper layer.
 829  *
 830  * (um) points to the union mount structure for access to the
 831  * the mounting process's credentials.
 832  * (dvp) is the directory in which to create the whiteout.
 833  * it is locked on entry and exit.
 834  * (cnp) is the componentname to be created.
 835  */
 836 int
 837 union_mkwhiteout(um, dvp, cnp, path)
 838         struct union_mount *um;
 839         struct vnode *dvp;
 840         struct componentname *cnp;
 841         char *path;
 842 {
 843         int error;
 844         struct vnode *wvp;
 845         struct componentname cn;
 846
 847         error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
 848         if (error) {
 849                 return (error);
 850         }
 851         if (wvp) {
 852                 vnode_put(dvp);
 853                 vnode_put(wvp);
 854                 return (EEXIST);
 855         }
 856
 857         error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context);
 858
 859         vnode_put(dvp);
 860
 861         return (error);
 862 }
 863
 864 /*
 865  * union_vn_create: creates and opens a new shadow file
 866  * on the upper union layer.  this function is similar
 867  * in spirit to calling vn_open but it avoids calling namei().
 868  * the problem with calling namei is that a) it locks too many
 869  * things, and b) it doesn't start at the "right" directory,
 870  * whereas relookup is told where to start.
 871  */
 872 int
 873 union_vn_create(vpp, un, p)
 874         struct vnode **vpp;
 875         struct union_node *un;
 876         struct proc *p;
 877 {
 878         struct vnode *vp;
 879         kauth_cred_t cred = p->p_ucred;
 880         struct vnode_attr vat;
 881         struct vnode_attr *vap = &vat;
 882         struct vfs_context context;
 883         int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
 884         int error;
 885         int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
 886         char *cp;
 887         struct componentname cn;
 888
 889         *vpp = NULLVP;
 890
 891         context.vc_proc = p;
 892         context.vc_ucred = p->p_ucred;
 893
 894         /*
 895          * Build a new componentname structure (for the same
 896          * reasons outlines in union_mkshadow).
 897          * The difference here is that the file is owned by
 898          * the current user, rather than by the person who
 899          * did the mount, since the current user needs to be
 900          * able to write the file (that's why it is being
 901          * copied in the first place).
 902          */
 903         cn.cn_namelen = strlen(un->un_path);
 904         cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
 905                                                 M_NAMEI, M_WAITOK);
 906         cn.cn_pnlen = cn.cn_namelen+1;
 907         bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
 908         cn.cn_nameiop = CREATE;
 909         cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
 910         cn.cn_context = &context;
 911         cn.cn_nameptr = cn.cn_pnbuf;
 912         cn.cn_hash = un->un_hash;
 913         cn.cn_consume = 0;
 914
 915         vnode_get(un->un_dirvp);
 916         if (error = relookup(un->un_dirvp, &vp, &cn))
 917                 return (error);
 918         vnode_put(un->un_dirvp);
 919
 920         if (vp) {
 921                 vnode_put(un->un_dirvp);
 922                 vnode_put(vp);
 923                 return (EEXIST);
 924         }
 925
 926         /*
 927          * Good - there was no race to create the file
 928          * so go ahead and create it.  The permissions
 929          * on the file will be 0666 modified by the
 930          * current user's umask.  Access to the file, while
 931          * it is unioned, will require access to the top *and*
 932          * bottom files.  Access when not unioned will simply
 933          * require access to the top-level file.
 934          *
 935          * TODO: confirm choice of access permissions.
 936          *       decide on authorisation behaviour
 937          */
 938
 939         VATTR_INIT(vap);
 940         VATTR_SET(vap, va_type, VREG);
 941         VATTR_SET(vap, va_mode, cmode);
 942
 943         if (error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, &context))
 944                 return (error);
 945
 946         if (error = VNOP_OPEN(vp, fmode, &context)) {
 947                 vnode_put(vp);
 948                 return (error);
 949         }
 950
 951         vnode_lock(vp);
 952         if (++vp->v_writecount <= 0)
 953                 panic("union: v_writecount");
 954         vnode_unlock(vp);
 955         *vpp = vp;
 956         return (0);
 957 }
 958
 959 int
 960 union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred,
 961         struct proc *p)
 962 {
 963         struct vfs_context context;
 964
 965         context.vc_proc = p;
 966         context.vc_ucred = cred;
 967
 968         if (fmode & FWRITE) {
 969                 vnode_lock(vp);
 970                 --vp->v_writecount;
 971                 vnode_unlock(vp);
 972         }
 973         return (VNOP_CLOSE(vp, fmode, &context));
 974 }
 975
 976 void
 977 union_removed_upper(un)
 978         struct union_node *un;
 979 {
 980         struct proc *p = current_proc();        /* XXX */
 981
 982         union_newupper(un, NULLVP);
 983         if (un->un_flags & UN_CACHED) {
 984                 un->un_flags &= ~UN_CACHED;
 985                 LIST_REMOVE(un, un_cache);
 986         }
 987
 988         if (un->un_flags & UN_ULOCK) {
 989                 un->un_flags &= ~UN_ULOCK;
 990         }
 991 }
 992
 993 #if 0
 994 struct vnode *
 995 union_lowervp(vp)
 996         struct vnode *vp;
 997 {
 998         struct union_node *un = VTOUNION(vp);
 999
1000         if ((un->un_lowervp != NULLVP) &&
1001             (vp->v_type == un->un_lowervp->v_type)) {
1002                 if (vnode_get(un->un_lowervp) == 0)
1003                         return (un->un_lowervp);
1004         }
1005
1006         return (NULLVP);
1007 }
1008 #endif
1009
1010 /*
1011  * determine whether a whiteout is needed
1012  * during a remove/rmdir operation.
1013  */
1014 int
1015 union_dowhiteout(struct union_node *un, vfs_context_t ctx)
1016 {
1017         struct vnode_attr va;
1018
1019         if (un->un_lowervp != NULLVP)
1020                 return (1);
1021
1022         VATTR_INIT(&va);
1023         VATTR_WANTED(&va, va_flags);
1024         if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 &&
1025             (va.va_flags & OPAQUE))
1026                 return (1);
1027
1028         return (0);
1029 }
1030
1031 static void
1032 union_dircache_r(vp, vppp, cntp)
1033         struct vnode *vp;
1034         struct vnode ***vppp;
1035         int *cntp;
1036 {
1037         struct union_node *un;
1038
1039         if (vp->v_op != union_vnodeop_p) {
1040                 if (vppp) {
1041                         vnode_get(vp);
1042                         *(*vppp)++ = vp;
1043                         if (--(*cntp) == 0)
1044                                 panic("union: dircache table too small");
1045                 } else {
1046                         (*cntp)++;
1047                 }
1048
1049                 return;
1050         }
1051
1052         un = VTOUNION(vp);
1053         if (un->un_uppervp != NULLVP)
1054                 union_dircache_r(un->un_uppervp, vppp, cntp);
1055         if (un->un_lowervp != NULLVP)
1056                 union_dircache_r(un->un_lowervp, vppp, cntp);
1057 }
1058
1059 struct vnode *
1060 union_dircache(vp, p)
1061         struct vnode *vp;
1062         struct proc *p;
1063 {
1064         int count;
1065         struct vnode *nvp;
1066         struct vnode **vpp;
1067         struct vnode **dircache;
1068         struct union_node *un;
1069         int error;
1070
1071         dircache = VTOUNION(vp)->un_dircache;
1072
1073         nvp = NULLVP;
1074
1075         if (dircache == 0) {
1076                 count = 0;
1077                 union_dircache_r(vp, 0, &count);
1078                 count++;
1079                 dircache = (struct vnode **)
1080                                 _MALLOC(count * sizeof(struct vnode *),
1081                                         M_TEMP, M_WAITOK);
1082                 vpp = dircache;
1083                 union_dircache_r(vp, &vpp, &count);
1084                 *vpp = NULLVP;
1085                 vpp = dircache + 1;
1086         } else {
1087                 vpp = dircache;
1088                 do {
1089                         if (*vpp++ == VTOUNION(vp)->un_uppervp)
1090                                 break;
1091                 } while (*vpp != NULLVP);
1092         }
1093
1094         if (*vpp == NULLVP)
1095                 goto out;
1096
1097         vnode_get(*vpp);
1098         error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1099         if (error)
1100                 goto out;
1101
1102         VTOUNION(vp)->un_dircache = 0;
1103         un = VTOUNION(nvp);
1104         un->un_dircache = dircache;
1105
1106 out:
1107         return (nvp);
1108 }