bsd/ufs/ufs/ufs_lookup.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993
  25  *      The Regents of the University of California.  All rights reserved.
  26  * (c) UNIX System Laboratories, Inc.
  27  * All or some portions of this file are derived from material licensed
  28  * to the University of California by American Telephone and Telegraph
  29  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30  * the permission of UNIX System Laboratories, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)ufs_lookup.c        8.15 (Berkeley) 6/16/95
  61  */
  62 #include <rev_endian_fs.h>
  63 #include <sys/param.h>
  64 #include <sys/namei.h>
  65 #include <sys/buf.h>
  66 #include <sys/file.h>
  67 #include <sys/mount_internal.h>
  68 #include <sys/vnode_internal.h>
  69 #include <sys/quota.h>
  70 #include <sys/kauth.h>
  71 #include <sys/uio_internal.h>
  72
  73 #include <ufs/ufs/quota.h>
  74 #include <ufs/ufs/inode.h>
  75 #include <ufs/ufs/dir.h>
  76 #include <ufs/ufs/ufsmount.h>
  77 #include <ufs/ufs/ufs_extern.h>
  78 #include <ufs/ffs/ffs_extern.h>
  79 #if REV_ENDIAN_FS
  80 #include <ufs/ufs/ufs_byte_order.h>
  81 #endif /* REV_ENDIAN_FS */
  82
  83 struct  nchstats ufs_nchstats;
  84 #if DIAGNOSTIC
  85 int     dirchk = 1;
  86 #else
  87 int     dirchk = 0;
  88 #endif
  89
  90 #define FSFMT(vp)       ((vp)->v_mount->mnt_maxsymlinklen <= 0)
  91
  92 /*
  93  * Convert a component of a pathname into a pointer to a locked inode.
  94  * This is a very central and rather complicated routine.
  95  * If the file system is not maintained in a strict tree hierarchy,
  96  * this can result in a deadlock situation (see comments in code below).
  97  *
  98  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  99  * on whether the name is to be looked up, created, renamed, or deleted.
 100  * When CREATE, RENAME, or DELETE is specified, information usable in
 101  * creating, renaming, or deleting a directory entry may be calculated.
 102  * If flag has LOCKPARENT or'ed into it and the target of the pathname
 103  * exists, lookup returns both the target and its parent directory locked.
 104  * When creating or renaming and LOCKPARENT is specified, the target may
 105  * not be ".".  When deleting and LOCKPARENT is specified, the target may
 106  * be ".".,
 107  *
 108  * Overall outline of ufs_lookup:
 109  *
 110  *      check accessibility of directory
 111  *      look for name in cache, if found, then if at end of path
 112  *        and deleting or creating, drop it, else return name
 113  *      search for name in directory, to found or notfound
 114  * notfound:
 115  *      if creating, return locked directory, leaving info on available slots
 116  *      else return error
 117  * found:
 118  *      if at end of path and deleting, return information to allow delete
 119  *      if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 120  *        inode and return info to allow rewrite
 121  *      if not at end, add name to cache; if at end and neither creating
 122  *        nor deleting, add name to cache
 123  */
 124 int
 125 ufs_lookup(ap)
 126         struct vnop_lookup_args /* {
 127                 struct vnode *a_dvp;
 128                 struct vnode **a_vpp;
 129                 struct componentname *a_cnp;
 130                 vfs_context_t a_context
 131         } */ *ap;
 132 {
 133         register struct vnode *vdp;     /* vnode for directory being searched */
 134         register struct inode *dp;      /* inode for directory being searched */
 135         struct buf *bp;                 /* a buffer of directory entries */
 136         register struct direct *ep;     /* the current directory entry */
 137         int entryoffsetinblock;         /* offset of ep in bp's buffer */
 138         enum {NONE, COMPACT, FOUND} slotstatus;
 139         doff_t slotoffset;              /* offset of area with free space */
 140         int slotsize;                   /* size of area at slotoffset */
 141         int slotfreespace;              /* amount of space free in slot */
 142         int slotneeded;                 /* size of the entry we're seeking */
 143         int numdirpasses;               /* strategy for directory search */
 144         doff_t endsearch;               /* offset to end directory search */
 145         doff_t prevoff;                 /* prev entry dp->i_offset */
 146         struct vnode *pdp;              /* saved dp during symlink work */
 147         struct vnode *tdp;              /* returned by VFS_VGET */
 148         doff_t enduseful;               /* pointer past last used dir slot */
 149         u_long bmask;                   /* block offset mask */
 150         int wantparent;                 /* 1 => wantparent or lockparent flag */
 151         int namlen, error;
 152         struct vnode **vpp = ap->a_vpp;
 153         struct componentname *cnp = ap->a_cnp;
 154         int flags = cnp->cn_flags;
 155         int nameiop = cnp->cn_nameiop;
 156         vfs_context_t context = ap->a_context;
 157         kauth_cred_t cred;
 158 #if REV_ENDIAN_FS
 159         int rev_endian=0;
 160 #endif /* REV_ENDIAN_FS */
 161
 162
 163         cred = vfs_context_ucred(context);
 164         bp = NULL;
 165         slotoffset = -1;
 166         *vpp = NULL;
 167         vdp = ap->a_dvp;
 168         dp = VTOI(vdp);
 169
 170         wantparent = flags & (LOCKPARENT|WANTPARENT);
 171
 172 #if REV_ENDIAN_FS
 173         rev_endian=(vdp->v_mount->mnt_flag & MNT_REVEND);
 174 #endif /* REV_ENDIAN_FS */
 175
 176         /*
 177          * Check accessiblity of directory.
 178          */
 179         if ((dp->i_mode & IFMT) != IFDIR)
 180                 return (ENOTDIR);
 181
 182         /*
 183          * We now have a segment name to search for, and a directory to search.
 184          *
 185          * Before tediously performing a linear scan of the directory,
 186          * check the name cache to see if the directory/name pair
 187          * we are looking for is known already.
 188          */
 189         if (error = cache_lookup(vdp, vpp, cnp)) {
 190                 if (error == ENOENT)
 191                         return (error);
 192                 return (0);
 193         }
 194         /*
 195          * Suppress search for slots unless creating
 196          * file and at end of pathname, in which case
 197          * we watch for a place to put the new file in
 198          * case it doesn't already exist.
 199          */
 200         slotstatus = FOUND;
 201         slotfreespace = slotsize = slotneeded = 0;
 202         if ((nameiop == CREATE || nameiop == RENAME) &&
 203             (flags & ISLASTCN)) {
 204                 slotstatus = NONE;
 205                 slotneeded = (sizeof(struct direct) - MAXNAMLEN +
 206                         cnp->cn_namelen + 3) &~ 3;
 207         }
 208         /*
 209          * If there is cached information on a previous search of
 210          * this directory, pick up where we last left off.
 211          * We cache only lookups as these are the most common
 212          * and have the greatest payoff. Caching CREATE has little
 213          * benefit as it usually must search the entire directory
 214          * to determine that the entry does not exist. Caching the
 215          * location of the last DELETE or RENAME has not reduced
 216          * profiling time and hence has been removed in the interest
 217          * of simplicity.
 218          */
 219         bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_vfsstat.f_iosize - 1;
 220         if (nameiop != LOOKUP || dp->i_diroff == 0 ||
 221             dp->i_diroff > dp->i_size) {
 222                 entryoffsetinblock = 0;
 223                 dp->i_offset = 0;
 224                 numdirpasses = 1;
 225         } else {
 226                 dp->i_offset = dp->i_diroff;
 227                 if ((entryoffsetinblock = dp->i_offset & bmask) &&
 228                     (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)))
 229                         goto out;
 230                 numdirpasses = 2;
 231                 ufs_nchstats.ncs_2passes++;
 232         }
 233         prevoff = dp->i_offset;
 234         endsearch = roundup(dp->i_size, DIRBLKSIZ);
 235         enduseful = 0;
 236
 237 searchloop:
 238         while (dp->i_offset < endsearch) {
 239                 /*
 240                  * If necessary, get the next directory block.
 241                  */
 242                 if ((dp->i_offset & bmask) == 0) {
 243                         if (bp != NULL)  {
 244 #if REV_ENDIAN_FS
 245                                 if (rev_endian)
 246                                         byte_swap_dir_block_out(bp);
 247 #endif /* REV_ENDIAN_FS */
 248                                 buf_brelse(bp);
 249                         }
 250                         if (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))
 251                                 goto out;
 252                         entryoffsetinblock = 0;
 253                 }
 254                 /*
 255                  * If still looking for a slot, and at a DIRBLKSIZE
 256                  * boundary, have to start looking for free space again.
 257                  */
 258                 if (slotstatus == NONE &&
 259                     (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
 260                         slotoffset = -1;
 261                         slotfreespace = 0;
 262                 }
 263                 /*
 264                  * Get pointer to next entry.
 265                  * Full validation checks are slow, so we only check
 266                  * enough to insure forward progress through the
 267                  * directory. Complete checks can be run by patching
 268                  * "dirchk" to be true.
 269                  */
 270                 ep = (struct direct *)((char *)buf_dataptr(bp) + entryoffsetinblock);
 271                 if (ep->d_reclen == 0 ||
 272                     dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) {
 273                         int i;
 274
 275                         ufs_dirbad(dp, dp->i_offset, "mangled entry");
 276                         i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
 277                         dp->i_offset += i;
 278                         entryoffsetinblock += i;
 279                         continue;
 280                 }
 281
 282                 /*
 283                  * If an appropriate sized slot has not yet been found,
 284                  * check to see if one is available. Also accumulate space
 285                  * in the current block so that we can determine if
 286                  * compaction is viable.
 287                  */
 288                 if (slotstatus != FOUND) {
 289                         int size = ep->d_reclen;
 290
 291                         if (ep->d_ino != 0)
 292                                 size -= DIRSIZ(FSFMT(vdp), ep);
 293                         if (size > 0) {
 294                                 if (size >= slotneeded) {
 295                                         slotstatus = FOUND;
 296                                         slotoffset = dp->i_offset;
 297                                         slotsize = ep->d_reclen;
 298                                 } else if (slotstatus == NONE) {
 299                                         slotfreespace += size;
 300                                         if (slotoffset == -1)
 301                                                 slotoffset = dp->i_offset;
 302                                         if (slotfreespace >= slotneeded) {
 303                                                 slotstatus = COMPACT;
 304                                                 slotsize = dp->i_offset +
 305                                                       ep->d_reclen - slotoffset;
 306                                         }
 307                                 }
 308                         }
 309                 }
 310
 311                 /*
 312                  * Check for a name match.
 313                  */
 314                 if (ep->d_ino) {
 315 #                       if (BYTE_ORDER == LITTLE_ENDIAN)
 316                                 if (vdp->v_mount->mnt_maxsymlinklen > 0)
 317                                         namlen = ep->d_namlen;
 318                                 else
 319                                         namlen = ep->d_type;
 320 #                       else
 321                                 namlen = ep->d_namlen;
 322 #                       endif
 323                         if (namlen == cnp->cn_namelen &&
 324                             !bcmp(cnp->cn_nameptr, ep->d_name,
 325                                 (unsigned)namlen)) {
 326                                 /*
 327                                  * Save directory entry's inode number and
 328                                  * reclen in ndp->ni_ufs area, and release
 329                                  * directory buffer.
 330                                  */
 331                                 if (vdp->v_mount->mnt_maxsymlinklen > 0 &&
 332                                     ep->d_type == DT_WHT) {
 333                                         slotstatus = FOUND;
 334                                         slotoffset = dp->i_offset;
 335                                         slotsize = ep->d_reclen;
 336                                         dp->i_reclen = slotsize;
 337                                         enduseful = dp->i_size;
 338                                         ap->a_cnp->cn_flags |= ISWHITEOUT;
 339                                         numdirpasses--;
 340                                         goto notfound;
 341                                 }
 342                                 dp->i_ino = ep->d_ino;
 343                                 dp->i_reclen = ep->d_reclen;
 344 #if REV_ENDIAN_FS
 345                                 if (rev_endian)
 346                                         byte_swap_dir_block_out(bp);
 347 #endif /* REV_ENDIAN_FS */
 348                                 buf_brelse(bp);
 349                                 goto found;
 350                         }
 351                 }
 352                 prevoff = dp->i_offset;
 353                 dp->i_offset += ep->d_reclen;
 354                 entryoffsetinblock += ep->d_reclen;
 355                 if (ep->d_ino)
 356                         enduseful = dp->i_offset;
 357         }
 358 notfound:
 359         /*
 360          * If we started in the middle of the directory and failed
 361          * to find our target, we must check the beginning as well.
 362          */
 363         if (numdirpasses == 2) {
 364                 numdirpasses--;
 365                 dp->i_offset = 0;
 366                 endsearch = dp->i_diroff;
 367                 goto searchloop;
 368         }
 369         if (bp != NULL) {
 370 #if REV_ENDIAN_FS
 371                 if (rev_endian)
 372                         byte_swap_dir_block_out(bp);
 373 #endif /* REV_ENDIAN_FS */
 374                 buf_brelse(bp);
 375         }
 376         /*
 377          * If creating, and at end of pathname and current
 378          * directory has not been removed, then can consider
 379          * allowing file to be created.
 380          */
 381         if ((nameiop == CREATE || nameiop == RENAME ||
 382              (nameiop == DELETE &&
 383               (ap->a_cnp->cn_flags & DOWHITEOUT) &&
 384               (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
 385             (flags & ISLASTCN) && dp->i_nlink != 0) {
 386                 /*
 387                  * Return an indication of where the new directory
 388                  * entry should be put.  If we didn't find a slot,
 389                  * then set dp->i_count to 0 indicating
 390                  * that the new slot belongs at the end of the
 391                  * directory. If we found a slot, then the new entry
 392                  * can be put in the range from dp->i_offset to
 393                  * dp->i_offset + dp->i_count.
 394                  */
 395                 if (slotstatus == NONE) {
 396                         dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
 397                         dp->i_count = 0;
 398                         enduseful = dp->i_offset;
 399                 } else if (nameiop == DELETE) {
 400                         dp->i_offset = slotoffset;
 401                         if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 402                                 dp->i_count = 0;
 403                         else
 404                                 dp->i_count = dp->i_offset - prevoff;
 405                 } else {
 406                         dp->i_offset = slotoffset;
 407                         dp->i_count = slotsize;
 408                         if (enduseful < slotoffset + slotsize)
 409                                 enduseful = slotoffset + slotsize;
 410                 }
 411                 dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
 412                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 413                 /*
 414                  * We return with the directory locked, so that
 415                  * the parameters we set up above will still be
 416                  * valid if we actually decide to do a direnter().
 417                  * We return ni_vp == NULL to indicate that the entry
 418                  * does not currently exist; we leave a pointer to
 419                  * the (locked) directory inode in ndp->ni_dvp.
 420                  *
 421                  * NB - if the directory is unlocked, then this
 422                  * information cannot be used.
 423                  */
 424                 error = EJUSTRETURN;
 425                 goto out;
 426         }
 427         /*
 428          * Insert name into cache (as non-existent) if appropriate.
 429          */
 430         if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
 431                 cache_enter(vdp, *vpp, cnp);
 432         error = ENOENT;
 433         goto out;
 434
 435 found:
 436         if (numdirpasses == 2)
 437                 ufs_nchstats.ncs_pass2++;
 438         /*
 439          * Check that directory length properly reflects presence
 440          * of this entry.
 441          */
 442         if (entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep) > dp->i_size) {
 443                 ufs_dirbad(dp, dp->i_offset, "i_size too small");
 444                 dp->i_size = entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep);
 445                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 446         }
 447
 448         /*
 449          * Found component in pathname.
 450          * If the final component of path name, save information
 451          * in the cache as to where the entry was found.
 452          */
 453         if ((flags & ISLASTCN) && nameiop == LOOKUP)
 454                 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
 455
 456         /*
 457          * If deleting, and at end of pathname, return
 458          * parameters which can be used to remove file.
 459          * If the wantparent flag isn't set, we return only
 460          * the directory (in ndp->ni_dvp), otherwise we go
 461          * on and lock the inode, being careful with ".".
 462          */
 463         if (nameiop == DELETE && (flags & ISLASTCN)) {
 464                 /*
 465                  * Return pointer to current entry in dp->i_offset,
 466                  * and distance past previous entry (if there
 467                  * is a previous entry in this block) in dp->i_count.
 468                  * Save directory inode pointer in ndp->ni_dvp for dirremove().
 469                  */
 470                 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 471                         dp->i_count = 0;
 472                 else
 473                         dp->i_count = dp->i_offset - prevoff;
 474                 if (dp->i_number == dp->i_ino) {
 475                         vnode_get(vdp);
 476                         *vpp = vdp;
 477                         error = 0;
 478                         goto out;
 479                 }
 480                 if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0))
 481                         goto out;
 482                 *vpp = tdp;
 483                 goto out;
 484         }
 485
 486         /*
 487          * If rewriting (RENAME), return the inode and the
 488          * information required to rewrite the present directory
 489          * Must get inode of directory entry to verify it's a
 490          * regular file, or empty directory.
 491          */
 492         if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 493                 /*
 494                  * Careful about locking second inode.
 495                  * This can only occur if the target is ".".
 496                  */
 497                 if (dp->i_number == dp->i_ino) {
 498                         error =EISDIR;
 499                         goto out;
 500                 }
 501                 if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0))
 502                         goto out;
 503                 *vpp = tdp;
 504
 505                 goto out;
 506         }
 507
 508         /*
 509          * Step through the translation in the name.  We do not `vnode_put' the
 510          * directory because we may need it again if a symbolic link
 511          * is relative to the current directory.  Instead we save it
 512          * unlocked as "pdp".  We must get the target inode before unlocking
 513          * the directory to insure that the inode will not be removed
 514          * before we get it.  We prevent deadlock by always fetching
 515          * inodes from the root, moving down the directory tree. Thus
 516          * when following backward pointers ".." we must unlock the
 517          * parent directory before getting the requested directory.
 518          * There is a potential race condition here if both the current
 519          * and parent directories are removed before the VFS_VGET for the
 520          * inode associated with ".." returns.  We hope that this occurs
 521          * infrequently since we cannot avoid this race condition without
 522          * implementing a sophisticated deadlock detection algorithm.
 523          * Note also that this simple deadlock detection scheme will not
 524          * work if the file system has any hard links other than ".."
 525          * that point backwards in the directory structure.
 526          */
 527         pdp = vdp;
 528         if (flags & ISDOTDOT) {
 529                 if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) {
 530                         goto out;
 531                 }
 532                 *vpp = tdp;
 533         } else if (dp->i_number == dp->i_ino) {
 534                 vnode_get(vdp); /* we want ourself, ie "." */
 535                 *vpp = vdp;
 536         } else {
 537                 if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0))
 538                         goto out;
 539                 *vpp = tdp;
 540         }
 541
 542         error = 0;
 543 out:
 544         return (error);
 545 }
 546
 547 void
 548 ufs_dirbad(ip, offset, how)
 549         struct inode *ip;
 550         doff_t offset;
 551         const char *how;
 552 {
 553         struct mount *mp;
 554
 555         mp = ITOV(ip)->v_mount;
 556         (void)printf("%s: bad dir ino %d at offset %d: %s\n",
 557             mp->mnt_vfsstat.f_mntonname, ip->i_number, offset, how);
 558         if ((mp->mnt_vfsstat.f_flags & MNT_RDONLY) == 0)
 559                 panic("bad dir");
 560 }
 561
 562 /*
 563  * Do consistency checking on a directory entry:
 564  *      record length must be multiple of 4
 565  *      entry must fit in rest of its DIRBLKSIZ block
 566  *      record must be large enough to contain entry
 567  *      name is not longer than MAXNAMLEN
 568  *      name must be as long as advertised, and null terminated
 569  */
 570 int
 571 ufs_dirbadentry(dp, ep, entryoffsetinblock)
 572         struct vnode *dp;
 573         register struct direct *ep;
 574         int entryoffsetinblock;
 575 {
 576         register int i;
 577         int namlen;
 578
 579 #       if (BYTE_ORDER == LITTLE_ENDIAN)
 580                 if (dp->v_mount->mnt_maxsymlinklen > 0)
 581                         namlen = ep->d_namlen;
 582                 else
 583                         namlen = ep->d_type;
 584 #       else
 585                 namlen = ep->d_namlen;
 586 #       endif
 587         if ((ep->d_reclen & 0x3) != 0 ||
 588             ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
 589             ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > MAXNAMLEN) {
 590                 /*return (1); */
 591                 printf("First bad\n");
 592                 goto bad;
 593         }
 594         if (ep->d_ino == 0)
 595                 return (0);
 596         for (i = 0; i < namlen; i++)
 597                 if (ep->d_name[i] == '\0') {
 598                         /*return (1); */
 599                         printf("Second bad\n");
 600                         goto bad;
 601         }
 602         if (ep->d_name[i])
 603                 goto bad;
 604         return (0);
 605 bad:
 606         return (1);
 607 }
 608
 609 /*
 610  * Write a directory entry after a call to namei, using the parameters
 611  * that it left in nameidata.  The argument ip is the inode which the new
 612  * directory entry will refer to.  Dvp is a pointer to the directory to
 613  * be written, which was left locked by namei. Remaining parameters
 614  * (dp->i_offset, dp->i_count) indicate how the space for the new
 615  * entry is to be obtained.
 616  */
 617 int
 618 ufs_direnter(ip, dvp, cnp)
 619         struct inode *ip;
 620         struct vnode *dvp;
 621         register struct componentname *cnp;
 622 {
 623         register struct inode *dp;
 624         struct direct newdir;
 625
 626         dp = VTOI(dvp);
 627         newdir.d_ino = ip->i_number;
 628         newdir.d_namlen = cnp->cn_namelen;
 629         bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 630         if (dvp->v_mount->mnt_maxsymlinklen > 0)
 631                 newdir.d_type = IFTODT(ip->i_mode);
 632         else {
 633                 newdir.d_type = 0;
 634 #               if (BYTE_ORDER == LITTLE_ENDIAN)
 635                         { u_char tmp = newdir.d_namlen;
 636                         newdir.d_namlen = newdir.d_type;
 637                         newdir.d_type = tmp; }
 638 #               endif
 639         }
 640         return (ufs_direnter2(dvp, &newdir, cnp->cn_context));
 641 }
 642
 643 /*
 644  * Common entry point for directory entry removal used by ufs_direnter
 645  * and ufs_whiteout
 646  */
 647 int
 648 ufs_direnter2(struct vnode *dvp, struct direct *dirp, vfs_context_t ctx)
 649 {
 650         int newentrysize;
 651         struct inode *dp;
 652         struct buf *bp;
 653         uio_t auio;
 654         u_int dsize;
 655         struct direct *ep, *nep;
 656         int error, loc, spacefree;
 657         char *dirbuf;
 658         char uio_buf[ UIO_SIZEOF(1) ];
 659 #if REV_ENDIAN_FS
 660         struct mount *mp=dvp->v_mount;
 661         int rev_endian=(mp->mnt_flag & MNT_REVEND);
 662 #endif /* REV_ENDIAN_FS */
 663
 664         dp = VTOI(dvp);
 665         newentrysize = DIRSIZ(FSFMT(dvp), dirp);
 666
 667         if (dp->i_count == 0) {
 668                 /*
 669                  * If dp->i_count is 0, then namei could find no
 670                  * space in the directory. Here, dp->i_offset will
 671                  * be on a directory block boundary and we will write the
 672                  * new entry into a fresh block.
 673                  */
 674                 if (dp->i_offset & (DIRBLKSIZ - 1))
 675                         panic("ufs_direnter2: newblk");
 676                 dirp->d_reclen = DIRBLKSIZ;
 677                 auio = uio_createwithbuffer(1, dp->i_offset, UIO_SYSSPACE, UIO_WRITE,
 678                                                                           &uio_buf[0], sizeof(uio_buf));
 679                 uio_addiov(auio, CAST_USER_ADDR_T(dirp), newentrysize);
 680
 681                 error = ffs_write_internal(dvp, auio, IO_SYNC, vfs_context_ucred(ctx));
 682                 if (DIRBLKSIZ >
 683                     VFSTOUFS(dvp->v_mount)->um_mountp->mnt_vfsstat.f_bsize)
 684                         /* XXX should grow with balloc() */
 685                         panic("ufs_direnter2: frag size");
 686                 else if (!error) {
 687                         dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
 688                         dp->i_flag |= IN_CHANGE;
 689                 }
 690                 return (error);
 691         }
 692
 693         /*
 694          * If dp->i_count is non-zero, then namei found space
 695          * for the new entry in the range dp->i_offset to
 696          * dp->i_offset + dp->i_count in the directory.
 697          * To use this space, we may have to compact the entries located
 698          * there, by copying them together towards the beginning of the
 699          * block, leaving the free space in one usable chunk at the end.
 700          */
 701
 702         /*
 703          * Increase size of directory if entry eats into new space.
 704          * This should never push the size past a new multiple of
 705          * DIRBLKSIZE.
 706          *
 707          * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 708          */
 709         if (dp->i_offset + dp->i_count > dp->i_size)
 710                 dp->i_size = dp->i_offset + dp->i_count;
 711         /*
 712          * Get the block containing the space for the new directory entry.
 713          */
 714         if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, &bp))
 715                 return (error);
 716         /*
 717          * Find space for the new entry. In the simple case, the entry at
 718          * offset base will have the space. If it does not, then namei
 719          * arranged that compacting the region dp->i_offset to
 720          * dp->i_offset + dp->i_count would yield the
 721          * space.
 722          */
 723         ep = (struct direct *)dirbuf;
 724         dsize = DIRSIZ(FSFMT(dvp), ep);
 725         spacefree = ep->d_reclen - dsize;
 726         for (loc = ep->d_reclen; loc < dp->i_count; ) {
 727                 nep = (struct direct *)(dirbuf + loc);
 728                 if (ep->d_ino) {
 729                         /* trim the existing slot */
 730                         ep->d_reclen = dsize;
 731                         ep = (struct direct *)((char *)ep + dsize);
 732                 } else {
 733                         /* overwrite; nothing there; header is ours */
 734                         spacefree += dsize;
 735                 }
 736                 dsize = DIRSIZ(FSFMT(dvp), nep);
 737                 spacefree += nep->d_reclen - dsize;
 738                 loc += nep->d_reclen;
 739                 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 740         }
 741         /*
 742          * Update the pointer fields in the previous entry (if any),
 743          * copy in the new entry, and write out the block.
 744          */
 745         if (ep->d_ino == 0 ||
 746             (ep->d_ino == WINO &&
 747              bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
 748                 if (spacefree + dsize < newentrysize)
 749                         panic("ufs_direnter2: compact1");
 750                 dirp->d_reclen = spacefree + dsize;
 751         } else {
 752                 if (spacefree < newentrysize)
 753                         panic("ufs_direnter2: compact2");
 754                 dirp->d_reclen = spacefree;
 755                 ep->d_reclen = dsize;
 756                 ep = (struct direct *)((char *)ep + dsize);
 757         }
 758         bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
 759 #if REV_ENDIAN_FS
 760         if (rev_endian)
 761                 byte_swap_dir_block_out(bp);
 762 #endif /* REV_ENDIAN_FS */
 763         if (mp->mnt_flag & MNT_ASYNC) {
 764                 error = 0;
 765                 buf_bdwrite(bp);
 766         } else {
 767                 error = VNOP_BWRITE(bp);
 768         }
 769         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 770         if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
 771                 error = ffs_truncate_internal(dvp, (off_t)dp->i_endoff, IO_SYNC, vfs_context_ucred(ctx));
 772
 773         return (error);
 774 }
 775
 776 /*
 777  * Remove a directory entry after a call to namei, using
 778  * the parameters which it left in nameidata. The entry
 779  * dp->i_offset contains the offset into the directory of the
 780  * entry to be eliminated.  The dp->i_count field contains the
 781  * size of the previous record in the directory.  If this
 782  * is 0, the first entry is being deleted, so we need only
 783  * zero the inode number to mark the entry as free.  If the
 784  * entry is not the first in the directory, we must reclaim
 785  * the space of the now empty record by adding the record size
 786  * to the size of the previous entry.
 787  */
 788 int
 789 ufs_dirremove(dvp, cnp)
 790         struct vnode *dvp;
 791         struct componentname *cnp;
 792 {
 793         register struct inode *dp;
 794         struct direct *ep;
 795         struct buf *bp;
 796         int error;
 797 #if REV_ENDIAN_FS
 798         struct mount *mp=dvp->v_mount;
 799         int rev_endian=(mp->mnt_flag & MNT_REVEND);
 800 #endif /* REV_ENDIAN_FS */
 801
 802         dp = VTOI(dvp);
 803
 804         if (cnp->cn_flags & DOWHITEOUT) {
 805                 /*
 806                  * Whiteout entry: set d_ino to WINO.
 807                  */
 808                 if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
 809                         return (error);
 810                 ep->d_ino = WINO;
 811                 ep->d_type = DT_WHT;
 812 #if REV_ENDIAN_FS
 813                 if (rev_endian)
 814                         byte_swap_dir_block_out(bp);
 815 #endif /* REV_ENDIAN_FS */
 816                 if (mp->mnt_flag & MNT_ASYNC) {
 817                         error = 0;
 818                         buf_bdwrite(bp);
 819                 } else {
 820                         error = VNOP_BWRITE(bp);
 821                 }
 822                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 823                 return (error);
 824         }
 825
 826         if (dp->i_count == 0) {
 827                 /*
 828                  * First entry in block: set d_ino to zero.
 829                  */
 830                 if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
 831                         return (error);
 832                 ep->d_ino = 0;
 833 #if REV_ENDIAN_FS
 834                 if (rev_endian)
 835                         byte_swap_dir_block_out(bp);
 836 #endif /* REV_ENDIAN_FS */
 837                 if (mp->mnt_flag & MNT_ASYNC) {
 838                         error = 0;
 839                         buf_bdwrite(bp);
 840                 } else {
 841                         error = VNOP_BWRITE(bp);
 842                 }
 843                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 844                 return (error);
 845         }
 846         /*
 847          * Collapse new free space into previous entry.
 848          */
 849         if (error = ffs_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count),
 850             (char **)&ep, &bp))
 851                 return (error);
 852         ep->d_reclen += dp->i_reclen;
 853 #if REV_ENDIAN_FS
 854         if (rev_endian)
 855                 byte_swap_dir_block_out(bp);
 856 #endif /* REV_ENDIAN_FS */
 857         if (mp->mnt_flag & MNT_ASYNC) {
 858                 error = 0;
 859                 buf_bdwrite(bp);
 860         } else {
 861                 error = VNOP_BWRITE(bp);
 862         }
 863         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 864
 865         return (error);
 866 }
 867
 868 /*
 869  * Rewrite an existing directory entry to point at the inode
 870  * supplied.  The parameters describing the directory entry are
 871  * set up by a call to namei.
 872  */
 873 int
 874 ufs_dirrewrite(dp, ip, cnp)
 875         struct inode *dp, *ip;
 876         struct componentname *cnp;
 877 {
 878         struct buf *bp;
 879         struct direct *ep;
 880         struct vnode *vdp = ITOV(dp);
 881         int error;
 882
 883         if (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, &bp))
 884                 return (error);
 885         ep->d_ino = ip->i_number;
 886         if (vdp->v_mount->mnt_maxsymlinklen > 0)
 887                 ep->d_type = IFTODT(ip->i_mode);
 888 #if REV_ENDIAN_FS
 889         if (vdp->v_mount->mnt_flag & MNT_REVEND)
 890                 byte_swap_dir_block_out(bp);
 891 #endif /* REV_ENDIAN_FS */
 892         if (vdp->v_mount->mnt_flag & MNT_ASYNC) {
 893                 error = 0;
 894                 buf_bdwrite(bp);
 895         } else {
 896                 error = VNOP_BWRITE(bp);
 897         }
 898         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 899         return (error);
 900 }
 901
 902 /*
 903  * Check if a directory is empty or not.
 904  * Inode supplied must be locked.
 905  *
 906  * Using a struct dirtemplate here is not precisely
 907  * what we want, but better than using a struct direct.
 908  *
 909  * NB: does not handle corrupted directories.
 910  */
 911 int
 912 ufs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred)
 913 {
 914         register off_t off;
 915         struct dirtemplate dbuf;
 916         register struct direct *dp = (struct direct *)&dbuf;
 917         int error, count, namlen;
 918 #if REV_ENDIAN_FS
 919         struct vnode *vp=ITOV(ip);
 920         struct mount *mp=vp->v_mount;
 921         int rev_endian=(mp->mnt_flag & MNT_REVEND);
 922 #endif /* REV_ENDIAN_FS */
 923
 924 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
 925
 926         for (off = 0; off < ip->i_size; off += dp->d_reclen) {
 927                 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
 928                    UIO_SYSSPACE32, IO_NODELOCKED, cred, &count, (struct proc *)0);
 929                 /*
 930                  * Since we read MINDIRSIZ, residual must
 931                  * be 0 unless we're at end of file.
 932                  */
 933                 if (error || count != 0)
 934                         return (0);
 935 #if 0 /*REV_ENDIAN_FS */
 936                 if (rev_endian)
 937                         byte_swap_minidir_in(dp);
 938 #endif /* REV_ENDIAN_FS */
 939                 /* avoid infinite loops */
 940                 if (dp->d_reclen == 0)
 941                         return (0);
 942                 /* skip empty entries */
 943                 if (dp->d_ino == 0 || dp->d_ino == WINO)
 944                         continue;
 945                 /* accept only "." and ".." */
 946 #               if (BYTE_ORDER == LITTLE_ENDIAN)
 947                         if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
 948                                 namlen = dp->d_namlen;
 949                         else
 950                                 namlen = dp->d_type;
 951 #               else
 952                         namlen = dp->d_namlen;
 953 #               endif
 954                 if (namlen > 2)
 955                         return (0);
 956                 if (dp->d_name[0] != '.')
 957                         return (0);
 958                 /*
 959                  * At this point namlen must be 1 or 2.
 960                  * 1 implies ".", 2 implies ".." if second
 961                  * char is also "."
 962                  */
 963                 if (namlen == 1)
 964                         continue;
 965                 if (dp->d_name[1] == '.' && dp->d_ino == parentino)
 966                         continue;
 967                 return (0);
 968         }
 969         return (1);
 970 }
 971
 972 /*
 973  * Check if source directory is in the path of the target directory.
 974  * Target is supplied locked, source is unlocked.
 975  */
 976 int
 977 ufs_checkpath(source, target, cred)
 978         struct inode *source, *target;
 979         kauth_cred_t cred;
 980 {
 981         struct vnode *vp;
 982         int error, rootino, namlen;
 983         int need_put = 0;
 984         struct dirtemplate dirbuf;
 985
 986         vp = ITOV(target);
 987         if (target->i_number == source->i_number) {
 988                 error = EEXIST;
 989                 goto out;
 990         }
 991         rootino = ROOTINO;
 992         error = 0;
 993         if (target->i_number == rootino)
 994                 goto out;
 995
 996         for (;;) {
 997                 if (vp->v_type != VDIR) {
 998                         error = ENOTDIR;
 999                         break;
1000                 }
1001                 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1002                         sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE32,
1003                         IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
1004                 if (error != 0)
1005                         break;
1006 #               if (BYTE_ORDER == LITTLE_ENDIAN)
1007                         if (vp->v_mount->mnt_maxsymlinklen > 0)
1008                                 namlen = dirbuf.dotdot_namlen;
1009                         else
1010                                 namlen = dirbuf.dotdot_type;
1011 #               else
1012                         namlen = dirbuf.dotdot_namlen;
1013 #               endif
1014                 if (namlen != 2 ||
1015                     dirbuf.dotdot_name[0] != '.' ||
1016                     dirbuf.dotdot_name[1] != '.') {
1017                         error = ENOTDIR;
1018                         break;
1019                 }
1020                 if (dirbuf.dotdot_ino == source->i_number) {
1021                         error = EINVAL;
1022                         break;
1023                 }
1024                 if (dirbuf.dotdot_ino == rootino)
1025                         break;
1026
1027                 if (need_put)
1028                         vnode_put(vp);
1029
1030                 if (error = VFS_VGET(vp->v_mount, (ino64_t)dirbuf.dotdot_ino, &vp, NULL)) { /* XXX need context */
1031                         vp = NULL;
1032                         break;
1033                 }
1034                 need_put = 1;
1035         }
1036
1037 out:
1038         if (error == ENOTDIR)
1039                 printf("checkpath: .. not a directory\n");
1040         if (need_put && vp)
1041                 vnode_put(vp);
1042
1043         return (error);
1044 }