bsd/ufs/ufs/ufs_lookup.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993
  25  *      The Regents of the University of California.  All rights reserved.
  26  * (c) UNIX System Laboratories, Inc.
  27  * All or some portions of this file are derived from material licensed
  28  * to the University of California by American Telephone and Telegraph
  29  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30  * the permission of UNIX System Laboratories, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)ufs_lookup.c        8.15 (Berkeley) 6/16/95
  61  */
  62 #include <rev_endian_fs.h>
  63 #include <sys/param.h>
  64 #include <sys/namei.h>
  65 #include <sys/buf.h>
  66 #include <sys/file.h>
  67 #include <sys/mount.h>
  68 #include <sys/vnode.h>
  69 #include <sys/quota.h>
  70
  71 #include <ufs/ufs/quota.h>
  72 #include <ufs/ufs/inode.h>
  73 #include <ufs/ufs/dir.h>
  74 #include <ufs/ufs/ufsmount.h>
  75 #include <ufs/ufs/ufs_extern.h>
  76 #if REV_ENDIAN_FS
  77 #include <ufs/ufs/ufs_byte_order.h>
  78 #include <architecture/byte_order.h>
  79 #endif /* REV_ENDIAN_FS */
  80
  81 extern struct   nchstats nchstats;
  82 #if DIAGNOSTIC
  83 int     dirchk = 1;
  84 #else
  85 int     dirchk = 0;
  86 #endif
  87
  88 #define FSFMT(vp)       ((vp)->v_mount->mnt_maxsymlinklen <= 0)
  89
  90 /*
  91  * Convert a component of a pathname into a pointer to a locked inode.
  92  * This is a very central and rather complicated routine.
  93  * If the file system is not maintained in a strict tree hierarchy,
  94  * this can result in a deadlock situation (see comments in code below).
  95  *
  96  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  97  * on whether the name is to be looked up, created, renamed, or deleted.
  98  * When CREATE, RENAME, or DELETE is specified, information usable in
  99  * creating, renaming, or deleting a directory entry may be calculated.
 100  * If flag has LOCKPARENT or'ed into it and the target of the pathname
 101  * exists, lookup returns both the target and its parent directory locked.
 102  * When creating or renaming and LOCKPARENT is specified, the target may
 103  * not be ".".  When deleting and LOCKPARENT is specified, the target may
 104  * be "."., but the caller must check to ensure it does an vrele and vput
 105  * instead of two vputs.
 106  *
 107  * Overall outline of ufs_lookup:
 108  *
 109  *      check accessibility of directory
 110  *      look for name in cache, if found, then if at end of path
 111  *        and deleting or creating, drop it, else return name
 112  *      search for name in directory, to found or notfound
 113  * notfound:
 114  *      if creating, return locked directory, leaving info on available slots
 115  *      else return error
 116  * found:
 117  *      if at end of path and deleting, return information to allow delete
 118  *      if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 119  *        inode and return info to allow rewrite
 120  *      if not at end, add name to cache; if at end and neither creating
 121  *        nor deleting, add name to cache
 122  */
 123 int
 124 ufs_lookup(ap)
 125         struct vop_lookup_args /* {
 126                 struct vnode *a_dvp;
 127                 struct vnode **a_vpp;
 128                 struct componentname *a_cnp;
 129         } */ *ap;
 130 {
 131         register struct vnode *vdp;     /* vnode for directory being searched */
 132         register struct inode *dp;      /* inode for directory being searched */
 133         struct buf *bp;                 /* a buffer of directory entries */
 134         register struct direct *ep;     /* the current directory entry */
 135         int entryoffsetinblock;         /* offset of ep in bp's buffer */
 136         enum {NONE, COMPACT, FOUND} slotstatus;
 137         doff_t slotoffset;              /* offset of area with free space */
 138         int slotsize;                   /* size of area at slotoffset */
 139         int slotfreespace;              /* amount of space free in slot */
 140         int slotneeded;                 /* size of the entry we're seeking */
 141         int numdirpasses;               /* strategy for directory search */
 142         doff_t endsearch;               /* offset to end directory search */
 143         doff_t prevoff;                 /* prev entry dp->i_offset */
 144         struct vnode *pdp;              /* saved dp during symlink work */
 145         struct vnode *tdp;              /* returned by VFS_VGET */
 146         doff_t enduseful;               /* pointer past last used dir slot */
 147         u_long bmask;                   /* block offset mask */
 148         int lockparent;                 /* 1 => lockparent flag is set */
 149         int wantparent;                 /* 1 => wantparent or lockparent flag */
 150         int namlen, error;
 151         struct vnode **vpp = ap->a_vpp;
 152         struct componentname *cnp = ap->a_cnp;
 153         struct ucred *cred = cnp->cn_cred;
 154         int flags = cnp->cn_flags;
 155         int nameiop = cnp->cn_nameiop;
 156         struct proc *p = cnp->cn_proc;
 157 #if REV_ENDIAN_FS
 158         int rev_endian=0;
 159 #endif /* REV_ENDIAN_FS */
 160
 161
 162         bp = NULL;
 163         slotoffset = -1;
 164         *vpp = NULL;
 165         vdp = ap->a_dvp;
 166         dp = VTOI(vdp);
 167         lockparent = flags & LOCKPARENT;
 168         wantparent = flags & (LOCKPARENT|WANTPARENT);
 169 #if REV_ENDIAN_FS
 170         rev_endian=(vdp->v_mount->mnt_flag & MNT_REVEND);
 171 #endif /* REV_ENDIAN_FS */
 172
 173         /*
 174          * Check accessiblity of directory.
 175          */
 176         if ((dp->i_mode & IFMT) != IFDIR)
 177                 return (ENOTDIR);
 178         if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
 179                 return (error);
 180         if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
 181             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 182                 return (EROFS);
 183
 184         /*
 185          * We now have a segment name to search for, and a directory to search.
 186          *
 187          * Before tediously performing a linear scan of the directory,
 188          * check the name cache to see if the directory/name pair
 189          * we are looking for is known already.
 190          */
 191         if (error = cache_lookup(vdp, vpp, cnp)) {
 192                 int vpid;       /* capability number of vnode */
 193
 194                 if (error == ENOENT)
 195                         return (error);
 196                 /*
 197                  * Get the next vnode in the path.
 198                  * See comment below starting `Step through' for
 199                  * an explaination of the locking protocol.
 200                  */
 201                 pdp = vdp;
 202                 dp = VTOI(*vpp);
 203                 vdp = *vpp;
 204                 vpid = vdp->v_id;
 205                 if (pdp == vdp) {   /* lookup on "." */
 206                         VREF(vdp);
 207                         error = 0;
 208                 } else if (flags & ISDOTDOT) {
 209                         VOP_UNLOCK(pdp, 0, p);
 210                         error = vget(vdp, LK_EXCLUSIVE, p);
 211                         if (!error && lockparent && (flags & ISLASTCN))
 212                                 error = vn_lock(pdp, LK_EXCLUSIVE, p);
 213                 } else {
 214                         error = vget(vdp, LK_EXCLUSIVE, p);
 215                         if (!lockparent || error || !(flags & ISLASTCN))
 216                                 VOP_UNLOCK(pdp, 0, p);
 217                 }
 218                 /*
 219                  * Check that the capability number did not change
 220                  * while we were waiting for the lock.
 221                  */
 222                 if (!error) {
 223                         if (vpid == vdp->v_id)
 224                                 return (0);
 225                         vput(vdp);
 226                         if (lockparent && pdp != vdp && (flags & ISLASTCN))
 227                                 VOP_UNLOCK(pdp, 0, p);
 228                 }
 229                 if (error = vn_lock(pdp, LK_EXCLUSIVE, p))
 230                         return (error);
 231                 vdp = pdp;
 232                 dp = VTOI(pdp);
 233                 *vpp = NULL;
 234         }
 235
 236         /*
 237          * Suppress search for slots unless creating
 238          * file and at end of pathname, in which case
 239          * we watch for a place to put the new file in
 240          * case it doesn't already exist.
 241          */
 242         slotstatus = FOUND;
 243         slotfreespace = slotsize = slotneeded = 0;
 244         if ((nameiop == CREATE || nameiop == RENAME) &&
 245             (flags & ISLASTCN)) {
 246                 slotstatus = NONE;
 247                 slotneeded = (sizeof(struct direct) - MAXNAMLEN +
 248                         cnp->cn_namelen + 3) &~ 3;
 249         }
 250
 251         /*
 252          * If there is cached information on a previous search of
 253          * this directory, pick up where we last left off.
 254          * We cache only lookups as these are the most common
 255          * and have the greatest payoff. Caching CREATE has little
 256          * benefit as it usually must search the entire directory
 257          * to determine that the entry does not exist. Caching the
 258          * location of the last DELETE or RENAME has not reduced
 259          * profiling time and hence has been removed in the interest
 260          * of simplicity.
 261          */
 262         bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
 263         if (nameiop != LOOKUP || dp->i_diroff == 0 ||
 264             dp->i_diroff > dp->i_size) {
 265                 entryoffsetinblock = 0;
 266                 dp->i_offset = 0;
 267                 numdirpasses = 1;
 268         } else {
 269                 dp->i_offset = dp->i_diroff;
 270                 if ((entryoffsetinblock = dp->i_offset & bmask) &&
 271                     (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
 272                         return (error);
 273                 numdirpasses = 2;
 274                 nchstats.ncs_2passes++;
 275         }
 276         prevoff = dp->i_offset;
 277         endsearch = roundup(dp->i_size, DIRBLKSIZ);
 278         enduseful = 0;
 279
 280 searchloop:
 281         while (dp->i_offset < endsearch) {
 282                 /*
 283                  * If necessary, get the next directory block.
 284                  */
 285                 if ((dp->i_offset & bmask) == 0) {
 286                         if (bp != NULL)  {
 287 #if REV_ENDIAN_FS
 288                                 if (rev_endian)
 289                                         byte_swap_dir_block_out(bp);
 290 #endif /* REV_ENDIAN_FS */
 291                                 brelse(bp);
 292                         }
 293                         if (error =
 294                             VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))
 295                                 return (error);
 296                         entryoffsetinblock = 0;
 297                 }
 298                 /*
 299                  * If still looking for a slot, and at a DIRBLKSIZE
 300                  * boundary, have to start looking for free space again.
 301                  */
 302                 if (slotstatus == NONE &&
 303                     (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
 304                         slotoffset = -1;
 305                         slotfreespace = 0;
 306                 }
 307                 /*
 308                  * Get pointer to next entry.
 309                  * Full validation checks are slow, so we only check
 310                  * enough to insure forward progress through the
 311                  * directory. Complete checks can be run by patching
 312                  * "dirchk" to be true.
 313                  */
 314                 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
 315                 if (ep->d_reclen == 0 ||
 316                     dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) {
 317                         int i;
 318
 319                         ufs_dirbad(dp, dp->i_offset, "mangled entry");
 320                         i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
 321                         dp->i_offset += i;
 322                         entryoffsetinblock += i;
 323                         continue;
 324                 }
 325
 326                 /*
 327                  * If an appropriate sized slot has not yet been found,
 328                  * check to see if one is available. Also accumulate space
 329                  * in the current block so that we can determine if
 330                  * compaction is viable.
 331                  */
 332                 if (slotstatus != FOUND) {
 333                         int size = ep->d_reclen;
 334
 335                         if (ep->d_ino != 0)
 336                                 size -= DIRSIZ(FSFMT(vdp), ep);
 337                         if (size > 0) {
 338                                 if (size >= slotneeded) {
 339                                         slotstatus = FOUND;
 340                                         slotoffset = dp->i_offset;
 341                                         slotsize = ep->d_reclen;
 342                                 } else if (slotstatus == NONE) {
 343                                         slotfreespace += size;
 344                                         if (slotoffset == -1)
 345                                                 slotoffset = dp->i_offset;
 346                                         if (slotfreespace >= slotneeded) {
 347                                                 slotstatus = COMPACT;
 348                                                 slotsize = dp->i_offset +
 349                                                       ep->d_reclen - slotoffset;
 350                                         }
 351                                 }
 352                         }
 353                 }
 354
 355                 /*
 356                  * Check for a name match.
 357                  */
 358                 if (ep->d_ino) {
 359 #                       if (BYTE_ORDER == LITTLE_ENDIAN)
 360                                 if (vdp->v_mount->mnt_maxsymlinklen > 0)
 361                                         namlen = ep->d_namlen;
 362                                 else
 363                                         namlen = ep->d_type;
 364 #                       else
 365                                 namlen = ep->d_namlen;
 366 #                       endif
 367                         if (namlen == cnp->cn_namelen &&
 368                             !bcmp(cnp->cn_nameptr, ep->d_name,
 369                                 (unsigned)namlen)) {
 370                                 /*
 371                                  * Save directory entry's inode number and
 372                                  * reclen in ndp->ni_ufs area, and release
 373                                  * directory buffer.
 374                                  */
 375                                 if (vdp->v_mount->mnt_maxsymlinklen > 0 &&
 376                                     ep->d_type == DT_WHT) {
 377                                         slotstatus = FOUND;
 378                                         slotoffset = dp->i_offset;
 379                                         slotsize = ep->d_reclen;
 380                                         dp->i_reclen = slotsize;
 381                                         enduseful = dp->i_size;
 382                                         ap->a_cnp->cn_flags |= ISWHITEOUT;
 383                                         numdirpasses--;
 384                                         goto notfound;
 385                                 }
 386                                 dp->i_ino = ep->d_ino;
 387                                 dp->i_reclen = ep->d_reclen;
 388 #if REV_ENDIAN_FS
 389                                 if (rev_endian)
 390                                         byte_swap_dir_block_out(bp);
 391 #endif /* REV_ENDIAN_FS */
 392                                 brelse(bp);
 393                                 goto found;
 394                         }
 395                 }
 396                 prevoff = dp->i_offset;
 397                 dp->i_offset += ep->d_reclen;
 398                 entryoffsetinblock += ep->d_reclen;
 399                 if (ep->d_ino)
 400                         enduseful = dp->i_offset;
 401         }
 402 notfound:
 403         /*
 404          * If we started in the middle of the directory and failed
 405          * to find our target, we must check the beginning as well.
 406          */
 407         if (numdirpasses == 2) {
 408                 numdirpasses--;
 409                 dp->i_offset = 0;
 410                 endsearch = dp->i_diroff;
 411                 goto searchloop;
 412         }
 413         if (bp != NULL) {
 414 #if REV_ENDIAN_FS
 415                 if (rev_endian)
 416                         byte_swap_dir_block_out(bp);
 417 #endif /* REV_ENDIAN_FS */
 418                 brelse(bp);
 419         }
 420         /*
 421          * If creating, and at end of pathname and current
 422          * directory has not been removed, then can consider
 423          * allowing file to be created.
 424          */
 425         if ((nameiop == CREATE || nameiop == RENAME ||
 426              (nameiop == DELETE &&
 427               (ap->a_cnp->cn_flags & DOWHITEOUT) &&
 428               (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
 429             (flags & ISLASTCN) && dp->i_nlink != 0) {
 430                 /*
 431                  * Access for write is interpreted as allowing
 432                  * creation of files in the directory.
 433                  */
 434                 if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
 435                         return (error);
 436                 /*
 437                  * Return an indication of where the new directory
 438                  * entry should be put.  If we didn't find a slot,
 439                  * then set dp->i_count to 0 indicating
 440                  * that the new slot belongs at the end of the
 441                  * directory. If we found a slot, then the new entry
 442                  * can be put in the range from dp->i_offset to
 443                  * dp->i_offset + dp->i_count.
 444                  */
 445                 if (slotstatus == NONE) {
 446                         dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
 447                         dp->i_count = 0;
 448                         enduseful = dp->i_offset;
 449                 } else if (nameiop == DELETE) {
 450                         dp->i_offset = slotoffset;
 451                         if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 452                                 dp->i_count = 0;
 453                         else
 454                                 dp->i_count = dp->i_offset - prevoff;
 455                 } else {
 456                         dp->i_offset = slotoffset;
 457                         dp->i_count = slotsize;
 458                         if (enduseful < slotoffset + slotsize)
 459                                 enduseful = slotoffset + slotsize;
 460                 }
 461                 dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
 462                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 463                 /*
 464                  * We return with the directory locked, so that
 465                  * the parameters we set up above will still be
 466                  * valid if we actually decide to do a direnter().
 467                  * We return ni_vp == NULL to indicate that the entry
 468                  * does not currently exist; we leave a pointer to
 469                  * the (locked) directory inode in ndp->ni_dvp.
 470                  * The pathname buffer is saved so that the name
 471                  * can be obtained later.
 472                  *
 473                  * NB - if the directory is unlocked, then this
 474                  * information cannot be used.
 475                  */
 476                 cnp->cn_flags |= SAVENAME;
 477                 if (!lockparent)
 478                         VOP_UNLOCK(vdp, 0, p);
 479                 return (EJUSTRETURN);
 480         }
 481         /*
 482          * Insert name into cache (as non-existent) if appropriate.
 483          */
 484         if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
 485                 cache_enter(vdp, *vpp, cnp);
 486         return (ENOENT);
 487
 488 found:
 489         if (numdirpasses == 2)
 490                 nchstats.ncs_pass2++;
 491         /*
 492          * Check that directory length properly reflects presence
 493          * of this entry.
 494          */
 495         if (entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep) > dp->i_size) {
 496                 ufs_dirbad(dp, dp->i_offset, "i_size too small");
 497                 dp->i_size = entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep);
 498                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 499         }
 500
 501         /*
 502          * Found component in pathname.
 503          * If the final component of path name, save information
 504          * in the cache as to where the entry was found.
 505          */
 506         if ((flags & ISLASTCN) && nameiop == LOOKUP)
 507                 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
 508
 509         /*
 510          * If deleting, and at end of pathname, return
 511          * parameters which can be used to remove file.
 512          * If the wantparent flag isn't set, we return only
 513          * the directory (in ndp->ni_dvp), otherwise we go
 514          * on and lock the inode, being careful with ".".
 515          */
 516         if (nameiop == DELETE && (flags & ISLASTCN)) {
 517                 /*
 518                  * Write access to directory required to delete files.
 519                  */
 520                 if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
 521                         return (error);
 522                 /*
 523                  * Return pointer to current entry in dp->i_offset,
 524                  * and distance past previous entry (if there
 525                  * is a previous entry in this block) in dp->i_count.
 526                  * Save directory inode pointer in ndp->ni_dvp for dirremove().
 527                  */
 528                 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 529                         dp->i_count = 0;
 530                 else
 531                         dp->i_count = dp->i_offset - prevoff;
 532                 if (dp->i_number == dp->i_ino) {
 533                         VREF(vdp);
 534                         *vpp = vdp;
 535                         return (0);
 536                 }
 537                 if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
 538                         return (error);
 539                 /*
 540                  * If directory is "sticky", then user must own
 541                  * the directory, or the file in it, else she
 542                  * may not delete it (unless she's root). This
 543                  * implements append-only directories.
 544                  */
 545                 if ((dp->i_mode & ISVTX) &&
 546                     cred->cr_uid != 0 &&
 547                     cred->cr_uid != dp->i_uid &&
 548                     tdp->v_type != VLNK &&
 549                     VTOI(tdp)->i_uid != cred->cr_uid) {
 550                         vput(tdp);
 551                         return (EPERM);
 552                 }
 553                 *vpp = tdp;
 554                 if (!lockparent)
 555                         VOP_UNLOCK(vdp, 0, p);
 556                 return (0);
 557         }
 558
 559         /*
 560          * If rewriting (RENAME), return the inode and the
 561          * information required to rewrite the present directory
 562          * Must get inode of directory entry to verify it's a
 563          * regular file, or empty directory.
 564          */
 565         if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 566                 if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
 567                         return (error);
 568                 /*
 569                  * Careful about locking second inode.
 570                  * This can only occur if the target is ".".
 571                  */
 572                 if (dp->i_number == dp->i_ino)
 573                         return (EISDIR);
 574                 if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
 575                         return (error);
 576                 *vpp = tdp;
 577                 cnp->cn_flags |= SAVENAME;
 578                 if (!lockparent)
 579                         VOP_UNLOCK(vdp, 0, p);
 580                 return (0);
 581         }
 582
 583         /*
 584          * Step through the translation in the name.  We do not `vput' the
 585          * directory because we may need it again if a symbolic link
 586          * is relative to the current directory.  Instead we save it
 587          * unlocked as "pdp".  We must get the target inode before unlocking
 588          * the directory to insure that the inode will not be removed
 589          * before we get it.  We prevent deadlock by always fetching
 590          * inodes from the root, moving down the directory tree. Thus
 591          * when following backward pointers ".." we must unlock the
 592          * parent directory before getting the requested directory.
 593          * There is a potential race condition here if both the current
 594          * and parent directories are removed before the VFS_VGET for the
 595          * inode associated with ".." returns.  We hope that this occurs
 596          * infrequently since we cannot avoid this race condition without
 597          * implementing a sophisticated deadlock detection algorithm.
 598          * Note also that this simple deadlock detection scheme will not
 599          * work if the file system has any hard links other than ".."
 600          * that point backwards in the directory structure.
 601          */
 602         pdp = vdp;
 603         if (flags & ISDOTDOT) {
 604                 VOP_UNLOCK(pdp, 0, p);  /* race to get the inode */
 605                 if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) {
 606                         vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
 607                         return (error);
 608                 }
 609                 if (lockparent && (flags & ISLASTCN) &&
 610                     (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
 611                         vput(tdp);
 612                         return (error);
 613                 }
 614                 *vpp = tdp;
 615         } else if (dp->i_number == dp->i_ino) {
 616                 VREF(vdp);      /* we want ourself, ie "." */
 617                 *vpp = vdp;
 618         } else {
 619                 if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
 620                         return (error);
 621                 if (!lockparent || !(flags & ISLASTCN))
 622                         VOP_UNLOCK(pdp, 0, p);
 623                 *vpp = tdp;
 624         }
 625
 626         /*
 627          * Insert name into cache if appropriate.
 628          */
 629         if (cnp->cn_flags & MAKEENTRY)
 630                 cache_enter(vdp, *vpp, cnp);
 631         return (0);
 632 }
 633
 634 void
 635 ufs_dirbad(ip, offset, how)
 636         struct inode *ip;
 637         doff_t offset;
 638         char *how;
 639 {
 640         struct mount *mp;
 641
 642         mp = ITOV(ip)->v_mount;
 643         (void)printf("%s: bad dir ino %d at offset %d: %s\n",
 644             mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
 645         if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
 646                 panic("bad dir");
 647 }
 648
 649 /*
 650  * Do consistency checking on a directory entry:
 651  *      record length must be multiple of 4
 652  *      entry must fit in rest of its DIRBLKSIZ block
 653  *      record must be large enough to contain entry
 654  *      name is not longer than MAXNAMLEN
 655  *      name must be as long as advertised, and null terminated
 656  */
 657 int
 658 ufs_dirbadentry(dp, ep, entryoffsetinblock)
 659         struct vnode *dp;
 660         register struct direct *ep;
 661         int entryoffsetinblock;
 662 {
 663         register int i;
 664         int namlen;
 665
 666 #       if (BYTE_ORDER == LITTLE_ENDIAN)
 667                 if (dp->v_mount->mnt_maxsymlinklen > 0)
 668                         namlen = ep->d_namlen;
 669                 else
 670                         namlen = ep->d_type;
 671 #       else
 672                 namlen = ep->d_namlen;
 673 #       endif
 674         if ((ep->d_reclen & 0x3) != 0 ||
 675             ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
 676             ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > MAXNAMLEN) {
 677                 /*return (1); */
 678                 printf("First bad\n");
 679                 goto bad;
 680         }
 681         if (ep->d_ino == 0)
 682                 return (0);
 683         for (i = 0; i < namlen; i++)
 684                 if (ep->d_name[i] == '\0') {
 685                         /*return (1); */
 686                         printf("Second bad\n");
 687                         goto bad;
 688         }
 689         if (ep->d_name[i])
 690                 goto bad;
 691         return (0);
 692 bad:
 693         return (1);
 694 }
 695
 696 /*
 697  * Write a directory entry after a call to namei, using the parameters
 698  * that it left in nameidata.  The argument ip is the inode which the new
 699  * directory entry will refer to.  Dvp is a pointer to the directory to
 700  * be written, which was left locked by namei. Remaining parameters
 701  * (dp->i_offset, dp->i_count) indicate how the space for the new
 702  * entry is to be obtained.
 703  */
 704 int
 705 ufs_direnter(ip, dvp, cnp)
 706         struct inode *ip;
 707         struct vnode *dvp;
 708         register struct componentname *cnp;
 709 {
 710         register struct inode *dp;
 711         struct direct newdir;
 712
 713 #if DIAGNOSTIC
 714         if ((cnp->cn_flags & SAVENAME) == 0)
 715                 panic("direnter: missing name");
 716 #endif
 717         dp = VTOI(dvp);
 718         newdir.d_ino = ip->i_number;
 719         newdir.d_namlen = cnp->cn_namelen;
 720         bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 721         if (dvp->v_mount->mnt_maxsymlinklen > 0)
 722                 newdir.d_type = IFTODT(ip->i_mode);
 723         else {
 724                 newdir.d_type = 0;
 725 #               if (BYTE_ORDER == LITTLE_ENDIAN)
 726                         { u_char tmp = newdir.d_namlen;
 727                         newdir.d_namlen = newdir.d_type;
 728                         newdir.d_type = tmp; }
 729 #               endif
 730         }
 731         return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc));
 732 }
 733
 734 /*
 735  * Common entry point for directory entry removal used by ufs_direnter
 736  * and ufs_whiteout
 737  */
 738 ufs_direnter2(dvp, dirp, cr, p)
 739         struct vnode *dvp;
 740         struct direct *dirp;
 741         struct ucred *cr;
 742         struct proc *p;
 743 {
 744         int newentrysize;
 745         struct inode *dp;
 746         struct buf *bp;
 747         struct iovec aiov;
 748         struct uio auio;
 749         u_int dsize;
 750         struct direct *ep, *nep;
 751         int error, loc, spacefree;
 752         char *dirbuf;
 753 #if REV_ENDIAN_FS
 754         struct mount *mp=dvp->v_mount;
 755         int rev_endian=(mp->mnt_flag & MNT_REVEND);
 756 #endif /* REV_ENDIAN_FS */
 757
 758         dp = VTOI(dvp);
 759         newentrysize = DIRSIZ(FSFMT(dvp), dirp);
 760
 761         if (dp->i_count == 0) {
 762                 /*
 763                  * If dp->i_count is 0, then namei could find no
 764                  * space in the directory. Here, dp->i_offset will
 765                  * be on a directory block boundary and we will write the
 766                  * new entry into a fresh block.
 767                  */
 768                 if (dp->i_offset & (DIRBLKSIZ - 1))
 769                         panic("ufs_direnter2: newblk");
 770                 auio.uio_offset = dp->i_offset;
 771                 dirp->d_reclen = DIRBLKSIZ;
 772                 auio.uio_resid = newentrysize;
 773                 aiov.iov_len = newentrysize;
 774                 aiov.iov_base = (caddr_t)dirp;
 775                 auio.uio_iov = &aiov;
 776                 auio.uio_iovcnt = 1;
 777                 auio.uio_rw = UIO_WRITE;
 778                 auio.uio_segflg = UIO_SYSSPACE;
 779                 auio.uio_procp = (struct proc *)0;
 780                 error = VOP_WRITE(dvp, &auio, IO_SYNC, cr);
 781                 if (DIRBLKSIZ >
 782                     VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
 783                         /* XXX should grow with balloc() */
 784                         panic("ufs_direnter2: frag size");
 785                 else if (!error) {
 786                         dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
 787                         dp->i_flag |= IN_CHANGE;
 788                 }
 789                 return (error);
 790         }
 791
 792         /*
 793          * If dp->i_count is non-zero, then namei found space
 794          * for the new entry in the range dp->i_offset to
 795          * dp->i_offset + dp->i_count in the directory.
 796          * To use this space, we may have to compact the entries located
 797          * there, by copying them together towards the beginning of the
 798          * block, leaving the free space in one usable chunk at the end.
 799          */
 800
 801         /*
 802          * Increase size of directory if entry eats into new space.
 803          * This should never push the size past a new multiple of
 804          * DIRBLKSIZE.
 805          *
 806          * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 807          */
 808         if (dp->i_offset + dp->i_count > dp->i_size)
 809                 dp->i_size = dp->i_offset + dp->i_count;
 810         /*
 811          * Get the block containing the space for the new directory entry.
 812          */
 813         if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp))
 814                 return (error);
 815         /*
 816          * Find space for the new entry. In the simple case, the entry at
 817          * offset base will have the space. If it does not, then namei
 818          * arranged that compacting the region dp->i_offset to
 819          * dp->i_offset + dp->i_count would yield the
 820          * space.
 821          */
 822         ep = (struct direct *)dirbuf;
 823         dsize = DIRSIZ(FSFMT(dvp), ep);
 824         spacefree = ep->d_reclen - dsize;
 825         for (loc = ep->d_reclen; loc < dp->i_count; ) {
 826                 nep = (struct direct *)(dirbuf + loc);
 827                 if (ep->d_ino) {
 828                         /* trim the existing slot */
 829                         ep->d_reclen = dsize;
 830                         ep = (struct direct *)((char *)ep + dsize);
 831                 } else {
 832                         /* overwrite; nothing there; header is ours */
 833                         spacefree += dsize;
 834                 }
 835                 dsize = DIRSIZ(FSFMT(dvp), nep);
 836                 spacefree += nep->d_reclen - dsize;
 837                 loc += nep->d_reclen;
 838                 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 839         }
 840         /*
 841          * Update the pointer fields in the previous entry (if any),
 842          * copy in the new entry, and write out the block.
 843          */
 844         if (ep->d_ino == 0 ||
 845             (ep->d_ino == WINO &&
 846              bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
 847                 if (spacefree + dsize < newentrysize)
 848                         panic("ufs_direnter2: compact1");
 849                 dirp->d_reclen = spacefree + dsize;
 850         } else {
 851                 if (spacefree < newentrysize)
 852                         panic("ufs_direnter2: compact2");
 853                 dirp->d_reclen = spacefree;
 854                 ep->d_reclen = dsize;
 855                 ep = (struct direct *)((char *)ep + dsize);
 856         }
 857         bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
 858 #if REV_ENDIAN_FS
 859         if (rev_endian)
 860                 byte_swap_dir_block_out(bp);
 861 #endif /* REV_ENDIAN_FS */
 862         error = VOP_BWRITE(bp);
 863         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 864         if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
 865                 error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p);
 866         return (error);
 867 }
 868
 869 /*
 870  * Remove a directory entry after a call to namei, using
 871  * the parameters which it left in nameidata. The entry
 872  * dp->i_offset contains the offset into the directory of the
 873  * entry to be eliminated.  The dp->i_count field contains the
 874  * size of the previous record in the directory.  If this
 875  * is 0, the first entry is being deleted, so we need only
 876  * zero the inode number to mark the entry as free.  If the
 877  * entry is not the first in the directory, we must reclaim
 878  * the space of the now empty record by adding the record size
 879  * to the size of the previous entry.
 880  */
 881 int
 882 ufs_dirremove(dvp, cnp)
 883         struct vnode *dvp;
 884         struct componentname *cnp;
 885 {
 886         register struct inode *dp;
 887         struct direct *ep;
 888         struct buf *bp;
 889         int error;
 890 #if REV_ENDIAN_FS
 891         struct mount *mp=dvp->v_mount;
 892         int rev_endian=(mp->mnt_flag & MNT_REVEND);
 893 #endif /* REV_ENDIAN_FS */
 894
 895         dp = VTOI(dvp);
 896
 897         if (cnp->cn_flags & DOWHITEOUT) {
 898                 /*
 899                  * Whiteout entry: set d_ino to WINO.
 900                  */
 901                 if (error =
 902                     VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
 903                         return (error);
 904                 ep->d_ino = WINO;
 905                 ep->d_type = DT_WHT;
 906 #if REV_ENDIAN_FS
 907                 if (rev_endian)
 908                         byte_swap_dir_block_out(bp);
 909 #endif /* REV_ENDIAN_FS */
 910                 error = VOP_BWRITE(bp);
 911                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 912                 return (error);
 913         }
 914
 915         if (dp->i_count == 0) {
 916                 /*
 917                  * First entry in block: set d_ino to zero.
 918                  */
 919                 if (error =
 920                     VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
 921                         return (error);
 922                 ep->d_ino = 0;
 923 #if REV_ENDIAN_FS
 924                 if (rev_endian)
 925                         byte_swap_dir_block_out(bp);
 926 #endif /* REV_ENDIAN_FS */
 927                 error = VOP_BWRITE(bp);
 928                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 929                 return (error);
 930         }
 931         /*
 932          * Collapse new free space into previous entry.
 933          */
 934         if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
 935             (char **)&ep, &bp))
 936                 return (error);
 937         ep->d_reclen += dp->i_reclen;
 938 #if REV_ENDIAN_FS
 939         if (rev_endian)
 940                 byte_swap_dir_block_out(bp);
 941 #endif /* REV_ENDIAN_FS */
 942         error = VOP_BWRITE(bp);
 943         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 944         return (error);
 945 }
 946
 947 /*
 948  * Rewrite an existing directory entry to point at the inode
 949  * supplied.  The parameters describing the directory entry are
 950  * set up by a call to namei.
 951  */
 952 int
 953 ufs_dirrewrite(dp, ip, cnp)
 954         struct inode *dp, *ip;
 955         struct componentname *cnp;
 956 {
 957         struct buf *bp;
 958         struct direct *ep;
 959         struct vnode *vdp = ITOV(dp);
 960         int error;
 961
 962         if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp))
 963                 return (error);
 964         ep->d_ino = ip->i_number;
 965         if (vdp->v_mount->mnt_maxsymlinklen > 0)
 966                 ep->d_type = IFTODT(ip->i_mode);
 967 #if REV_ENDIAN_FS
 968         if (vdp->v_mount->mnt_flag & MNT_REVEND)
 969                 byte_swap_dir_block_out(bp);
 970 #endif /* REV_ENDIAN_FS */
 971         error = VOP_BWRITE(bp);
 972         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 973         return (error);
 974 }
 975
 976 /*
 977  * Check if a directory is empty or not.
 978  * Inode supplied must be locked.
 979  *
 980  * Using a struct dirtemplate here is not precisely
 981  * what we want, but better than using a struct direct.
 982  *
 983  * NB: does not handle corrupted directories.
 984  */
 985 int
 986 ufs_dirempty(ip, parentino, cred)
 987         register struct inode *ip;
 988         ino_t parentino;
 989         struct ucred *cred;
 990 {
 991         register off_t off;
 992         struct dirtemplate dbuf;
 993         register struct direct *dp = (struct direct *)&dbuf;
 994         int error, count, namlen;
 995 #if REV_ENDIAN_FS
 996         struct vnode *vp=ITOV(ip);
 997         struct mount *mp=vp->v_mount;
 998         int rev_endian=(mp->mnt_flag & MNT_REVEND);
 999 #endif /* REV_ENDIAN_FS */
1000
1001 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1002
1003         for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1004                 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
1005                    UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
1006                 /*
1007                  * Since we read MINDIRSIZ, residual must
1008                  * be 0 unless we're at end of file.
1009                  */
1010                 if (error || count != 0)
1011                         return (0);
1012 #if 0 /*REV_ENDIAN_FS */
1013                 if (rev_endian)
1014                         byte_swap_minidir_in(dp);
1015 #endif /* REV_ENDIAN_FS */
1016                 /* avoid infinite loops */
1017                 if (dp->d_reclen == 0)
1018                         return (0);
1019                 /* skip empty entries */
1020                 if (dp->d_ino == 0 || dp->d_ino == WINO)
1021                         continue;
1022                 /* accept only "." and ".." */
1023 #               if (BYTE_ORDER == LITTLE_ENDIAN)
1024                         if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
1025                                 namlen = dp->d_namlen;
1026                         else
1027                                 namlen = dp->d_type;
1028 #               else
1029                         namlen = dp->d_namlen;
1030 #               endif
1031                 if (namlen > 2)
1032                         return (0);
1033                 if (dp->d_name[0] != '.')
1034                         return (0);
1035                 /*
1036                  * At this point namlen must be 1 or 2.
1037                  * 1 implies ".", 2 implies ".." if second
1038                  * char is also "."
1039                  */
1040                 if (namlen == 1)
1041                         continue;
1042                 if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1043                         continue;
1044                 return (0);
1045         }
1046         return (1);
1047 }
1048
1049 /*
1050  * Check if source directory is in the path of the target directory.
1051  * Target is supplied locked, source is unlocked.
1052  * The target is always vput before returning.
1053  */
1054 int
1055 ufs_checkpath(source, target, cred)
1056         struct inode *source, *target;
1057         struct ucred *cred;
1058 {
1059         struct vnode *vp;
1060         int error, rootino, namlen;
1061         struct dirtemplate dirbuf;
1062
1063         vp = ITOV(target);
1064         if (target->i_number == source->i_number) {
1065                 error = EEXIST;
1066                 goto out;
1067         }
1068         rootino = ROOTINO;
1069         error = 0;
1070         if (target->i_number == rootino)
1071                 goto out;
1072
1073         for (;;) {
1074                 if (vp->v_type != VDIR) {
1075                         error = ENOTDIR;
1076                         break;
1077                 }
1078                 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1079                         sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
1080                         IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
1081                 if (error != 0)
1082                         break;
1083 #               if (BYTE_ORDER == LITTLE_ENDIAN)
1084                         if (vp->v_mount->mnt_maxsymlinklen > 0)
1085                                 namlen = dirbuf.dotdot_namlen;
1086                         else
1087                                 namlen = dirbuf.dotdot_type;
1088 #               else
1089                         namlen = dirbuf.dotdot_namlen;
1090 #               endif
1091                 if (namlen != 2 ||
1092                     dirbuf.dotdot_name[0] != '.' ||
1093                     dirbuf.dotdot_name[1] != '.') {
1094                         error = ENOTDIR;
1095                         break;
1096                 }
1097                 if (dirbuf.dotdot_ino == source->i_number) {
1098                         error = EINVAL;
1099                         break;
1100                 }
1101                 if (dirbuf.dotdot_ino == rootino)
1102                         break;
1103                 vput(vp);
1104                 if (error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) {
1105                         vp = NULL;
1106                         break;
1107                 }
1108         }
1109
1110 out:
1111         if (error == ENOTDIR)
1112                 printf("checkpath: .. not a directory\n");
1113         if (vp != NULL)
1114                 vput(vp);
1115         return (error);
1116 }