bsd/hfs/hfs_cnode.c

   1 /*
   2  * Copyright (c) 2002-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/proc.h>
  31 #include <sys/vnode.h>
  32 #include <sys/mount.h>
  33 #include <sys/kernel.h>
  34 #include <sys/malloc.h>
  35 #include <sys/time.h>
  36 #include <sys/ubc.h>
  37 #include <sys/quota.h>
  38 #include <sys/kdebug.h>
  39 #include <libkern/OSByteOrder.h>
  40 #include <sys/buf_internal.h>
  41
  42 #include <kern/locks.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45 #include <miscfs/fifofs/fifo.h>
  46
  47 #include <hfs/hfs.h>
  48 #include <hfs/hfs_catalog.h>
  49 #include <hfs/hfs_cnode.h>
  50 #include <hfs/hfs_quota.h>
  51 #include <hfs/hfs_format.h>
  52
  53 extern int prtactive;
  54
  55 extern lck_attr_t *  hfs_lock_attr;
  56 extern lck_grp_t *  hfs_mutex_group;
  57 extern lck_grp_t *  hfs_rwlock_group;
  58
  59 static void  hfs_reclaim_cnode(struct cnode *);
  60 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
  61 static int hfs_isordered(struct cnode *, struct cnode *);
  62
  63 extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
  64
  65 __inline__ int hfs_checkdeleted (struct cnode *cp) {
  66         return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
  67 }
  68
  69 /*
  70  * Function used by a special fcntl() that decorates a cnode/vnode that
  71  * indicates it is backing another filesystem, like a disk image.
  72  *
  73  * the argument 'val' indicates whether or not to set the bit in the cnode flags
  74  *
  75  * Returns non-zero on failure. 0 on success
  76  */
  77 int hfs_set_backingstore (struct vnode *vp, int val) {
  78         struct cnode *cp = NULL;
  79         int err = 0;
  80
  81         cp = VTOC(vp);
  82         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
  83                 return EINVAL;
  84         }
  85
  86         /* lock the cnode */
  87         err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
  88         if (err) {
  89                 return err;
  90         }
  91
  92         if (val) {
  93                 cp->c_flag |= C_BACKINGSTORE;
  94         }
  95         else {
  96                 cp->c_flag &= ~C_BACKINGSTORE;
  97         }
  98
  99         /* unlock everything */
 100         hfs_unlock (cp);
 101
 102         return err;
 103 }
 104
 105 /*
 106  * Function used by a special fcntl() that check to see if a cnode/vnode
 107  * indicates it is backing another filesystem, like a disk image.
 108  *
 109  * the argument 'val' is an output argument for whether or not the bit is set
 110  *
 111  * Returns non-zero on failure. 0 on success
 112  */
 113
 114 int hfs_is_backingstore (struct vnode *vp, int *val) {
 115         struct cnode *cp = NULL;
 116         int err = 0;
 117
 118         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
 119                 *val = 0;
 120                 return 0;
 121         }
 122
 123         cp = VTOC(vp);
 124
 125         /* lock the cnode */
 126         err = hfs_lock (cp, HFS_SHARED_LOCK);
 127         if (err) {
 128                 return err;
 129         }
 130
 131         if (cp->c_flag & C_BACKINGSTORE) {
 132                 *val = 1;
 133         }
 134         else {
 135                 *val = 0;
 136         }
 137
 138         /* unlock everything */
 139         hfs_unlock (cp);
 140
 141         return err;
 142 }
 143
 144
 145 /*
 146  * hfs_cnode_teardown
 147  *
 148  * This is an internal function that is invoked from both hfs_vnop_inactive
 149  * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
 150  * being recycled and reclaimed, it is important that we do any post-processing
 151  * necessary for the cnode in both places.  Important tasks include things such as
 152  * releasing the blocks from an open-unlinked file when all references to it have dropped,
 153  * and handling resource forks separately from data forks.
 154  *
 155  * Note that we take only the vnode as an argument here (rather than the cnode).
 156  * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
 157  * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
 158  * vnode we need to reclaim if only the cnode is supplied.
 159  *
 160  * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
 161  * if both are invoked right after the other.  In the second call, most of this function's if()
 162  * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
 163  * As a quick check to see if this function is necessary, determine if the cnode is already
 164  * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that
 165  * remain for cnodes marked in such a fashion is to teardown their fork references and
 166  * release all directory hints and hardlink origins.  However, both of those are done
 167  * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
 168  * entry is no longer there.
 169  *
 170  * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
 171  * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
 172  * is totally gone by that point.
 173  *
 174  * Assumes that both truncate and cnode locks for 'cp' are held.
 175  */
 176 static
 177 int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
 178
 179         int forkcount = 0;
 180         enum vtype v_type;
 181         struct cnode *cp;
 182         int error = 0;
 183         int started_tr = 0;
 184         struct hfsmount *hfsmp = VTOHFS(vp);
 185         struct proc *p = vfs_context_proc(ctx);
 186         int truncated = 0;
 187     cat_cookie_t cookie;
 188     int cat_reserve = 0;
 189     int lockflags;
 190         int ea_error = 0;
 191
 192         v_type = vnode_vtype(vp);
 193         cp = VTOC(vp);
 194
 195         if (cp->c_datafork) {
 196                 ++forkcount;
 197         }
 198         if (cp->c_rsrcfork) {
 199                 ++forkcount;
 200         }
 201
 202
 203         /*
 204          * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
 205          * The dirty regions would have already been synced to disk, so informing UBC
 206          * that they can toss the pages doesn't help anyone at this point.
 207          *
 208          * Note that this is a performance problem if the vnode goes straight to reclaim
 209          * (and skips inactive), since there would be no way for anyone to notify the UBC
 210          * that all pages in this file are basically useless.
 211          */
 212         if (reclaim == 0) {
 213                 /*
 214                  * Check whether we are tearing down a cnode with only one remaining fork.
 215                  * If there are blocks in its filefork, then we need to unlock the cnode
 216                  * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
 217                  * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
 218                  * panic.
 219                  */
 220
 221                 if ((v_type == VREG || v_type == VLNK) &&
 222                         (cp->c_flag & C_DELETED) &&
 223                         (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
 224                         hfs_unlock(cp);
 225                         /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
 226                         ubc_setsize(vp, 0);
 227                         (void) hfs_lock(cp, HFS_FORCE_LOCK);
 228                 }
 229         }
 230
 231         /*
 232          * Push file data out for normal files that haven't been evicted from
 233          * the namespace.  We only do this if this function was not called from reclaim,
 234          * because by that point the UBC information has been totally torn down.
 235          *
 236          * There should also be no way that a normal file that has NOT been deleted from
 237          * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
 238          * when the file becomes open-unlinked.
 239          */
 240         if ((v_type == VREG) &&
 241                 (!ISSET(cp->c_flag, C_DELETED)) &&
 242                 (!ISSET(cp->c_flag, C_NOEXISTS)) &&
 243                 (VTOF(vp)->ff_blocks) &&
 244                 (reclaim == 0)) {
 245                 /*
 246                  * Note that if content protection is enabled, then this is where we will
 247                  * attempt to issue IOs for all dirty regions of this file.
 248                  *
 249                  * If we're called from hfs_vnop_inactive, all this means is at the time
 250                  * the logic for deciding to call this function, there were not any lingering
 251                  * mmap/fd references for this file.  However, there is nothing preventing the system
 252                  * from creating a new reference in between the time that logic was checked
 253                  * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
 254                  * that there aren't any references is during vnop_reclaim.
 255                  */
 256                 hfs_filedone(vp, ctx);
 257         }
 258
 259         /*
 260          * We're holding the cnode lock now.  Stall behind any shadow BPs that may
 261          * be involved with this vnode if it is a symlink.  We don't want to allow
 262          * the blocks that we're about to release to be put back into the pool if there
 263          * is pending I/O to them.
 264          */
 265         if (v_type == VLNK) {
 266                 /*
 267                  * This will block if the asynchronous journal flush is in progress.
 268                  * If this symlink is not being renamed over and doesn't have any open FDs,
 269                  * then we'll remove it from the journal's bufs below in kill_block.
 270                  */
 271                 buf_wait_for_shadow_io (vp, 0);
 272         }
 273
 274         /*
 275          * Remove any directory hints or cached origins
 276          */
 277         if (v_type == VDIR) {
 278                 hfs_reldirhints(cp, 0);
 279         }
 280         if (cp->c_flag & C_HARDLINK) {
 281                 hfs_relorigins(cp);
 282         }
 283
 284         /*
 285          * This check is slightly complicated.  We should only truncate data
 286          * in very specific cases for open-unlinked files.  This is because
 287          * we want to ensure that the resource fork continues to be available
 288          * if the caller has the data fork open.  However, this is not symmetric;
 289          * someone who has the resource fork open need not be able to access the data
 290          * fork once the data fork has gone inactive.
 291          *
 292          * If we're the last fork, then we have cleaning up to do.
 293          *
 294          * A) last fork, and vp == c_vp
 295          *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
 296          *
 297          * B) last fork, and vp == c_rsrc_vp
 298          *      Truncate ourselves, assume data fork has been cleaned due to C).
 299          *
 300          * If we're not the last fork, then things are a little different:
 301          *
 302          * C) not the last fork, vp == c_vp
 303          *      Truncate ourselves.  Once the file has gone out of the namespace,
 304          *      it cannot be further opened.  Further access to the rsrc fork may
 305          *      continue, however.
 306          *
 307          * D) not the last fork, vp == c_rsrc_vp
 308          *      Don't enter the block below, just clean up vnode and push it out of core.
 309          */
 310
 311         if ((v_type == VREG || v_type == VLNK) &&
 312                 (cp->c_flag & C_DELETED) &&
 313                 ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
 314
 315                 /* Truncate away our own fork data. (Case A, B, C above) */
 316                 if (VTOF(vp)->ff_blocks != 0) {
 317
 318                         /*
 319                          * SYMLINKS only:
 320                          *
 321                          * Encapsulate the entire change (including truncating the link) in
 322                          * nested transactions if we are modifying a symlink, because we know that its
 323                          * file length will be at most 4k, and we can fit both the truncation and
 324                          * any relevant bitmap changes into a single journal transaction.  We also want
 325                          * the kill_block code to execute in the same transaction so that any dirty symlink
 326                          * blocks will not be written. Otherwise, rely on
 327                          * hfs_truncate doing its own transactions to ensure that we don't blow up
 328                          * the journal.
 329                          */
 330                         if ((started_tr == 0) && (v_type == VLNK)) {
 331                                 if (hfs_start_transaction(hfsmp) != 0) {
 332                                         error = EINVAL;
 333                                         goto out;
 334                                 }
 335                                 else {
 336                                         started_tr = 1;
 337                                 }
 338                         }
 339
 340                         /*
 341                          * At this point, we have decided that this cnode is
 342                          * suitable for full removal.  We are about to deallocate
 343                          * its blocks and remove its entry from the catalog.
 344                          * If it was a symlink, then it's possible that the operation
 345                          * which created it is still in the current transaction group
 346                          * due to coalescing.  Take action here to kill the data blocks
 347                          * of the symlink out of the journal before moving to
 348                          * deallocate the blocks.  We need to be in the middle of
 349                          * a transaction before calling buf_iterate like this.
 350                          *
 351                          * Note: we have to kill any potential symlink buffers out of
 352                          * the journal prior to deallocating their blocks.  This is so
 353                          * that we don't race with another thread that may be doing an
 354                          * an allocation concurrently and pick up these blocks. It could
 355                          * generate I/O against them which could go out ahead of our journal
 356                          * transaction.
 357                          */
 358
 359                         if (hfsmp->jnl && vnode_islnk(vp)) {
 360                                 buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
 361                         }
 362
 363                         /*
 364                          * This truncate call (and the one below) is fine from VNOP_RECLAIM's
 365                          * context because we're only removing blocks, not zero-filling new
 366                          * ones.  The C_DELETED check above makes things much simpler.
 367                          */
 368                         error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx);
 369                         if (error) {
 370                                 goto out;
 371                         }
 372                         truncated = 1;
 373
 374                         /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
 375                         if (started_tr) {
 376                                 hfs_end_transaction(hfsmp);
 377                                 started_tr = 0;
 378                         }
 379                 }
 380
 381                 /*
 382                  * Truncate away the resource fork, if we represent the data fork and
 383                  * it is the last fork.  That means, by definition, the rsrc fork is not in
 384                  * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
 385                  * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
 386                  * to get rid of the resource fork's data. Note that because we are holding the
 387                  * cnode lock, it is impossible for a competing thread to create the resource fork
 388                  * vnode from underneath us while we do this.
 389                  *
 390                  * This is invoked via case A above only.
 391                  */
 392                 if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
 393                         struct cat_lookup_buffer *lookup_rsrc = NULL;
 394                         struct cat_desc *desc_ptr = NULL;
 395                         lockflags = 0;
 396
 397                         MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
 398                         if (lookup_rsrc == NULL) {
 399                                 printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
 400                                 error = ENOMEM;
 401                                 goto out;
 402                         }
 403                         else {
 404                                 bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
 405                         }
 406
 407                         if (cp->c_desc.cd_namelen == 0) {
 408                                 /* Initialize the rsrc descriptor for lookup if necessary*/
 409                                 MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
 410
 411                                 lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
 412                                 lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
 413                                 lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
 414                                 lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;
 415
 416                                 desc_ptr = &lookup_rsrc->lookup_desc;
 417                         }
 418                         else {
 419                                 desc_ptr = &cp->c_desc;
 420                         }
 421
 422                         lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 423
 424                         error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL,
 425                                         (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
 426
 427                         hfs_systemfile_unlock (hfsmp, lockflags);
 428
 429                         if (error) {
 430                                 FREE (lookup_rsrc, M_TEMP);
 431                                 goto out;
 432                         }
 433
 434                         /*
 435                          * Make the filefork in our temporary struct look like a real
 436                          * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
 437                          */
 438                         rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
 439                         lookup_rsrc->lookup_fork.ff_cp = cp;
 440
 441                         /*
 442                          * If there were no errors, then we have the catalog's fork information
 443                          * for the resource fork in question.  Go ahead and delete the data in it now.
 444                          */
 445
 446                         error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
 447                         FREE(lookup_rsrc, M_TEMP);
 448
 449                         if (error) {
 450                                 goto out;
 451                         }
 452
 453                         /*
 454                          * This fileid's resource fork extents have now been fully deleted on-disk
 455                          * and this CNID is no longer valid. At this point, we should be able to
 456                          * zero out cp->c_blocks to indicate there is no data left in this file.
 457                          */
 458                         cp->c_blocks = 0;
 459                 }
 460         }
 461
 462         /*
 463          * If we represent the last fork (or none in the case of a dir),
 464          * and the cnode has become open-unlinked,
 465          * AND it has EA's, then we need to get rid of them.
 466          *
 467          * Note that this must happen outside of any other transactions
 468          * because it starts/ends its own transactions and grabs its
 469          * own locks.  This is to prevent a file with a lot of attributes
 470          * from creating a transaction that is too large (which panics).
 471          */
 472     if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
 473                 (cp->c_flag & C_DELETED) &&
 474                 (forkcount <= 1)) {
 475
 476         ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
 477     }
 478
 479
 480         /*
 481          * If the cnode represented an open-unlinked file, then now
 482          * actually remove the cnode's catalog entry and release all blocks
 483          * it may have been using.
 484          */
 485     if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
 486         /*
 487          * Mark cnode in transit so that no one can get this
 488          * cnode from cnode hash.
 489          */
 490                 // hfs_chash_mark_in_transit(hfsmp, cp);
 491                 // XXXdbg - remove the cnode from the hash table since it's deleted
 492                 //          otherwise someone could go to sleep on the cnode and not
 493                 //          be woken up until this vnode gets recycled which could be
 494                 //          a very long time...
 495         hfs_chashremove(hfsmp, cp);
 496
 497         cp->c_flag |= C_NOEXISTS;   // XXXdbg
 498         cp->c_rdev = 0;
 499
 500         if (started_tr == 0) {
 501             if (hfs_start_transaction(hfsmp) != 0) {
 502                                 error = EINVAL;
 503                                 goto out;
 504             }
 505             started_tr = 1;
 506         }
 507
 508         /*
 509          * Reserve some space in the Catalog file.
 510          */
 511         if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
 512             goto out;
 513         }
 514         cat_reserve = 1;
 515
 516         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 517
 518         if (cp->c_blocks > 0) {
 519             printf("hfs_inactive: deleting non-empty%sfile %d, "
 520                    "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
 521                    (int)cp->c_fileid, (int)cp->c_blocks);
 522         }
 523
 524                 //
 525         // release the name pointer in the descriptor so that
 526         // cat_delete() will use the file-id to do the deletion.
 527         // in the case of hard links this is imperative (in the
 528         // case of regular files the fileid and cnid are the
 529         // same so it doesn't matter).
 530         //
 531         cat_releasedesc(&cp->c_desc);
 532
 533         /*
 534          * The descriptor name may be zero,
 535          * in which case the fileid is used.
 536          */
 537         error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
 538
 539         if (error && truncated && (error != ENXIO))
 540             printf("hfs_inactive: couldn't delete a truncated file!");
 541
 542         /* Update HFS Private Data dir */
 543         if (error == 0) {
 544             hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
 545             if (vnode_isdir(vp)) {
 546                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
 547             }
 548             (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
 549                                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
 550         }
 551
 552         hfs_systemfile_unlock(hfsmp, lockflags);
 553
 554         if (error) {
 555                         goto out;
 556                 }
 557
 558 #if QUOTA
 559         if (hfsmp->hfs_flags & HFS_QUOTAS)
 560             (void)hfs_chkiq(cp, -1, NOCRED, 0);
 561 #endif /* QUOTA */
 562
 563         /* Already set C_NOEXISTS at the beginning of this block */
 564         cp->c_flag &= ~C_DELETED;
 565         cp->c_touch_chgtime = TRUE;
 566         cp->c_touch_modtime = TRUE;
 567
 568         if (error == 0)
 569             hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
 570     }
 571
 572         /*
 573      * A file may have had delayed allocations, in which case hfs_update
 574      * would not have updated the catalog record (cat_update).  We need
 575      * to do that now, before we lose our fork data.  We also need to
 576      * force the update, or hfs_update will again skip the cat_update.
 577          *
 578          * If the file has C_NOEXISTS set, then we can skip the hfs_update call
 579          * because the catalog entry has already been removed.  There would be no point
 580      * to looking up the entry in the catalog to modify it when we already know it's gone
 581          */
 582     if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
 583                 ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime ||
 584                  cp->c_touch_chgtime || cp->c_touch_modtime)) {
 585
 586                         if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
 587                                 cp->c_flag |= C_FORCEUPDATE;
 588                         }
 589                         hfs_update(vp, 0);
 590                 }
 591
 592 out:
 593     if (cat_reserve)
 594         cat_postflight(hfsmp, &cookie, p);
 595
 596     // XXXdbg - have to do this because a goto could have come here
 597     if (started_tr) {
 598         hfs_end_transaction(hfsmp);
 599         started_tr = 0;
 600     }
 601
 602 #if 0
 603 #if CONFIG_PROTECT
 604         /*
 605          * cnode truncate lock and cnode lock are both held exclusive here.
 606          *
 607          * Go ahead and flush the keys out if this cnode is the last fork
 608          * and it is not class F.  Class F keys should not be purged because they only
 609          * exist in memory and have no persistent keys.  Only do this
 610          * if we haven't already done it yet (maybe a vnode skipped inactive
 611          * and went straight to reclaim).  This function gets called from both reclaim and
 612          * inactive, so it will happen first in inactive if possible.
 613          *
 614          * We need to be mindful that all pending IO for this file has already been
 615          * issued and completed before we bzero out the key.  This is because
 616          * if it isn't, tossing the key here could result in garbage IO being
 617          * written (by using the bzero'd key) if the writes are happening asynchronously.
 618          *
 619          * In addition, class A files may have already been purged due to the
 620          * lock event occurring.
 621          */
 622         if (forkcount == 1) {
 623                 struct cprotect *entry = cp->c_cpentry;
 624                 if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
 625                         if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
 626                                 cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
 627                                 bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
 628                                 bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
 629                         }
 630                 }
 631         }
 632 #endif
 633 #endif
 634
 635         return error;
 636 }
 637
 638
 639 /*
 640  * hfs_vnop_inactive
 641  *
 642  * The last usecount on the vnode has gone away, so we need to tear down
 643  * any remaining data still residing in the cnode.  If necessary, write out
 644  * remaining blocks or delete the cnode's entry in the catalog.
 645  */
 646 int
 647 hfs_vnop_inactive(struct vnop_inactive_args *ap)
 648 {
 649         struct vnode *vp = ap->a_vp;
 650         struct cnode *cp;
 651         struct hfsmount *hfsmp = VTOHFS(vp);
 652         struct proc *p = vfs_context_proc(ap->a_context);
 653         int error = 0;
 654         int took_trunc_lock = 0;
 655         enum vtype v_type;
 656
 657         v_type = vnode_vtype(vp);
 658         cp = VTOC(vp);
 659
 660         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
 661             (hfsmp->hfs_freezing_proc == p)) {
 662                 error = 0;
 663                 goto inactive_done;
 664         }
 665
 666         /*
 667          * For safety, do NOT call vnode_recycle from inside this function.  This can cause
 668          * problems in the following scenario:
 669          *
 670          * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
 671          *
 672          * If we're being invoked as a result of a reclaim that was already in-flight, then we
 673          * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
 674          * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
 675          * try to re-enter reclaim again and panic.
 676          *
 677          * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
 678          * 1) last usecount goes away on the vnode (vnode_rele)
 679          * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
 680          *              vnode_recycle called (vnode_put)
 681          * 3) vclean by way of reclaim
 682          *
 683          * In this function we would generally want to call vnode_recycle to speed things
 684          * along to ensure that we don't leak blocks due to open-unlinked files.  However, by
 685          * virtue of being in this function already, we can call hfs_cnode_teardown, which
 686          * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
 687          * there's no entry in the catalog and no backing store anymore.  If that's the case,
 688          * then we really don't care all that much when the vnode actually goes through reclaim.
 689          * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
 690          * unlinked file in the first place should have already called vnode_recycle on the vnode
 691          * to guarantee that it would go through reclaim in a speedy way.
 692          */
 693
 694         if (cp->c_flag & C_NOEXISTS) {
 695                 /*
 696                  * If the cnode has already had its cat entry removed, then
 697                  * just skip to the end. We don't need to do anything here.
 698                  */
 699                 error = 0;
 700                 goto inactive_done;
 701         }
 702
 703         if ((v_type == VREG || v_type == VLNK)) {
 704                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 705                 took_trunc_lock = 1;
 706         }
 707
 708         (void) hfs_lock(cp, HFS_FORCE_LOCK);
 709
 710         /*
 711          * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
 712          * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
 713          */
 714         error = hfs_cnode_teardown (vp, ap->a_context, 0);
 715
 716     /*
 717      * Drop the truncate lock before unlocking the cnode
 718      * (which can potentially perform a vnode_put and
 719      * recycle the vnode which in turn might require the
 720      * truncate lock)
 721      */
 722         if (took_trunc_lock) {
 723             hfs_unlock_truncate(cp, 0);
 724         }
 725
 726         hfs_unlock(cp);
 727
 728 inactive_done:
 729
 730         return error;
 731 }
 732
 733
 734 /*
 735  * File clean-up (zero fill and shrink peof).
 736  */
 737
 738 int
 739 hfs_filedone(struct vnode *vp, vfs_context_t context)
 740 {
 741         struct cnode *cp;
 742         struct filefork *fp;
 743         struct hfsmount *hfsmp;
 744         struct rl_entry *invalid_range;
 745         off_t leof;
 746         u_int32_t blks, blocksize;
 747         /* flags for zero-filling sparse ranges */
 748         int cluster_flags = IO_CLOSE;
 749         int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 750
 751         cp = VTOC(vp);
 752         fp = VTOF(vp);
 753         hfsmp = VTOHFS(vp);
 754         leof = fp->ff_size;
 755
 756         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
 757                 return (0);
 758
 759 #if CONFIG_PROTECT
 760         /*
 761          * Figure out if we need to do synchronous IO.
 762          *
 763          * If the file represents a content-protected file, we may need
 764          * to issue synchronous IO when we dispatch to the cluster layer.
 765          * If we didn't, then the IO would go out to the disk asynchronously.
 766          * If the vnode hits the end of inactive before getting reclaimed, the
 767          * content protection keys would be wiped/bzeroed out, and we'd end up
 768          * trying to issue the IO with an invalid key.  This will lead to file
 769          * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
 770          * have completed (though they may be in the track cache).
 771          */
 772         if (cp_fs_protected(VTOVFS(vp))) {
 773                 cluster_flags |= IO_SYNC;
 774                 cluster_zero_flags |= IO_SYNC;
 775         }
 776 #endif
 777
 778         /*
 779          * If we are being invoked from F_SWAPDATAEXTENTS, then we
 780          * need to issue synchronous IO; Unless we are sure that all
 781          * of the data has been written to the disk, we won't know
 782          * that all of the blocks have been allocated properly.
 783          */
 784         if (cp->c_flag & C_SWAPINPROGRESS) {
 785                 cluster_flags |= IO_SYNC;
 786         }
 787
 788         hfs_unlock(cp);
 789         (void) cluster_push(vp, cluster_flags);
 790         hfs_lock(cp, HFS_FORCE_LOCK);
 791
 792         /*
 793          * Explicitly zero out the areas of file
 794          * that are currently marked invalid.
 795          */
 796         while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
 797                 off_t start = invalid_range->rl_start;
 798                 off_t end = invalid_range->rl_end;
 799
 800                 /* The range about to be written must be validated
 801                  * first, so that VNOP_BLOCKMAP() will return the
 802                  * appropriate mapping for the cluster code:
 803                  */
 804                 rl_remove(start, end, &fp->ff_invalidranges);
 805
 806                 hfs_unlock(cp);
 807                 (void) cluster_write(vp, (struct uio *) 0,
 808                                      leof, end + 1, start, (off_t)0, cluster_zero_flags);
 809                 hfs_lock(cp, HFS_FORCE_LOCK);
 810                 cp->c_flag |= C_MODIFIED;
 811         }
 812         cp->c_flag &= ~C_ZFWANTSYNC;
 813         cp->c_zftimeout = 0;
 814         blocksize = VTOVCB(vp)->blockSize;
 815         blks = leof / blocksize;
 816         if (((off_t)blks * (off_t)blocksize) != leof)
 817                 blks++;
 818         /*
 819          * Shrink the peof to the smallest size neccessary to contain the leof.
 820          */
 821         if (blks < fp->ff_blocks) {
 822                 (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
 823         }
 824
 825         hfs_unlock(cp);
 826         (void) cluster_push(vp, cluster_flags);
 827         hfs_lock(cp, HFS_FORCE_LOCK);
 828
 829         /*
 830          * If the hfs_truncate didn't happen to flush the vnode's
 831          * information out to disk, force it to be updated now that
 832          * all invalid ranges have been zero-filled and validated:
 833          */
 834         if (cp->c_flag & C_MODIFIED) {
 835                 hfs_update(vp, 0);
 836         }
 837         return (0);
 838 }
 839
 840
 841 /*
 842  * Reclaim a cnode so that it can be used for other purposes.
 843  */
 844 int
 845 hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 846 {
 847         struct vnode *vp = ap->a_vp;
 848         struct cnode *cp;
 849         struct filefork *fp = NULL;
 850         struct filefork *altfp = NULL;
 851         struct hfsmount *hfsmp = VTOHFS(vp);
 852         vfs_context_t ctx = ap->a_context;
 853         int reclaim_cnode = 0;
 854         int err = 0;
 855         enum vtype v_type;
 856
 857         v_type = vnode_vtype(vp);
 858         cp = VTOC(vp);
 859
 860         /*
 861          * We don't take the truncate lock since by the time reclaim comes along,
 862          * all dirty pages have been synced and nobody should be competing
 863          * with us for this thread.
 864          */
 865         (void) hfs_lock (cp, HFS_FORCE_LOCK);
 866
 867         /*
 868          * Sync to disk any remaining data in the cnode/vnode.  This includes
 869          * a call to hfs_update if the cnode has outbound data.
 870          *
 871          * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
 872          * because the catalog entry for this cnode is already gone.
 873          */
 874         if (!ISSET(cp->c_flag, C_NOEXISTS)) {
 875                 err = hfs_cnode_teardown(vp, ctx, 1);
 876         }
 877
 878         /*
 879          * Keep track of an inactive hot file.
 880          */
 881         if (!vnode_isdir(vp) &&
 882             !vnode_issystem(vp) &&
 883             !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) {
 884                 (void) hfs_addhotfile(vp);
 885         }
 886         vnode_removefsref(vp);
 887
 888         /*
 889          * Find file fork for this vnode (if any)
 890          * Also check if another fork is active
 891          */
 892         if (cp->c_vp == vp) {
 893                 fp = cp->c_datafork;
 894                 altfp = cp->c_rsrcfork;
 895
 896                 cp->c_datafork = NULL;
 897                 cp->c_vp = NULL;
 898         } else if (cp->c_rsrc_vp == vp) {
 899                 fp = cp->c_rsrcfork;
 900                 altfp = cp->c_datafork;
 901
 902                 cp->c_rsrcfork = NULL;
 903                 cp->c_rsrc_vp = NULL;
 904         } else {
 905                 panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
 906         }
 907         /*
 908          * On the last fork, remove the cnode from its hash chain.
 909          */
 910         if (altfp == NULL) {
 911                 /* If we can't remove it then the cnode must persist! */
 912                 if (hfs_chashremove(hfsmp, cp) == 0)
 913                         reclaim_cnode = 1;
 914                 /*
 915                  * Remove any directory hints
 916                  */
 917                 if (vnode_isdir(vp)) {
 918                         hfs_reldirhints(cp, 0);
 919                 }
 920
 921                 if(cp->c_flag & C_HARDLINK) {
 922                         hfs_relorigins(cp);
 923                 }
 924         }
 925         /* Release the file fork and related data */
 926         if (fp) {
 927                 /* Dump cached symlink data */
 928                 if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
 929                         FREE(fp->ff_symlinkptr, M_TEMP);
 930                 }
 931                 FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
 932         }
 933
 934         /*
 935          * If there was only one active fork then we can release the cnode.
 936          */
 937         if (reclaim_cnode) {
 938                 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
 939                 hfs_reclaim_cnode(cp);
 940         }
 941         else  {
 942                 /*
 943                  * cnode in use.  If it is a directory, it could have
 944                  * no live forks. Just release the lock.
 945                  */
 946                 hfs_unlock(cp);
 947         }
 948
 949         vnode_clearfsnode(vp);
 950         return (0);
 951 }
 952
 953
 954 extern int (**hfs_vnodeop_p) (void *);
 955 extern int (**hfs_std_vnodeop_p) (void *);
 956 extern int (**hfs_specop_p)  (void *);
 957 #if FIFO
 958 extern int (**hfs_fifoop_p)  (void *);
 959 #endif
 960
 961 /*
 962  * hfs_getnewvnode - get new default vnode
 963  *
 964  * The vnode is returned with an iocount and the cnode locked
 965  */
 966 int
 967 hfs_getnewvnode(
 968         struct hfsmount *hfsmp,
 969         struct vnode *dvp,
 970         struct componentname *cnp,
 971         struct cat_desc *descp,
 972         int flags,
 973         struct cat_attr *attrp,
 974         struct cat_fork *forkp,
 975         struct vnode **vpp,
 976         int *out_flags)
 977 {
 978         struct mount *mp = HFSTOVFS(hfsmp);
 979         struct vnode *vp = NULL;
 980         struct vnode **cvpp;
 981         struct vnode *tvp = NULLVP;
 982         struct cnode *cp = NULL;
 983         struct filefork *fp = NULL;
 984         int hfs_standard = 0;
 985         int retval;
 986         int issystemfile;
 987         int wantrsrc;
 988         int hflags = 0;
 989         struct vnode_fsparam vfsp;
 990         enum vtype vtype;
 991 #if QUOTA
 992         int i;
 993 #endif /* QUOTA */
 994
 995         hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
 996
 997         if (attrp->ca_fileid == 0) {
 998                 *vpp = NULL;
 999                 return (ENOENT);
1000         }
1001
1002 #if !FIFO
1003         if (IFTOVT(attrp->ca_mode) == VFIFO) {
1004                 *vpp = NULL;
1005                 return (ENOTSUP);
1006         }
1007 #endif /* !FIFO */
1008         vtype = IFTOVT(attrp->ca_mode);
1009         issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
1010         wantrsrc = flags & GNV_WANTRSRC;
1011
1012         /* Sanity check the vtype and mode */
1013         if (vtype == VBAD) {
1014                 /* Mark the FS as corrupt and bail out */
1015                 hfs_mark_volume_inconsistent(hfsmp);
1016                 return (EINVAL);
1017         }
1018
1019         /* Zero out the out_flags */
1020         *out_flags = 0;
1021
1022 #ifdef HFS_CHECK_LOCK_ORDER
1023         /*
1024          * The only case were its permissible to hold the parent cnode
1025          * lock is during a create operation (hfs_makenode) or when
1026          * we don't need the cnode lock (GNV_SKIPLOCK).
1027          */
1028         if ((dvp != NULL) &&
1029             (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
1030             VTOC(dvp)->c_lockowner == current_thread()) {
1031                 panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
1032         }
1033 #endif /* HFS_CHECK_LOCK_ORDER */
1034
1035         /*
1036          * Get a cnode (new or existing)
1037          */
1038         cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc,
1039                                                         (flags & GNV_SKIPLOCK), out_flags, &hflags);
1040
1041         /*
1042          * If the id is no longer valid for lookups we'll get back a NULL cp.
1043          */
1044         if (cp == NULL) {
1045                 return (ENOENT);
1046         }
1047
1048         /*
1049          * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
1050          * descriptor in the cnode as needed if the cnode represents a hardlink.
1051          * We want the caller to get the most up-to-date copy of the descriptor
1052          * as possible. However, we only do anything here if there was a valid vnode.
1053          * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1054          * as it doesn't have a descriptor or cat_attr yet.
1055          *
1056          * If we are about to replace the descriptor with the user-supplied one, then validate
1057          * that the descriptor correctly acknowledges this item is a hardlink.  We could be
1058          * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1059          * result but the file was not yet a hardlink. With sufficient delay between there
1060          * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1061          * call below.  If the descriptor's CNID is the same as the fileID then it must
1062          * not yet have been a hardlink when the lookup occurred.
1063          */
1064
1065         if (!(hfs_checkdeleted(cp))) {
1066                 if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
1067                         /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1068                         if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
1069                                         (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
1070                                 if ((flags & GNV_SKIPLOCK) == 0) {
1071                                         /*
1072                                          * Then we took the lock. Drop it before calling
1073                                          * vnode_put, which may invoke hfs_vnop_inactive and need to take
1074                                          * the cnode lock again.
1075                                          */
1076                                         hfs_unlock(cp);
1077                                 }
1078
1079                                 /*
1080                                  * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1081                                  * force a re-drive in the lookup routine.
1082                                  * Drop the iocount on the vnode obtained from
1083                                  * chash_getcnode if needed.
1084                                  */
1085                                 if (*vpp != NULL) {
1086                                         vnode_put (*vpp);
1087                                         *vpp = NULL;
1088                                 }
1089
1090                                 /*
1091                                  * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1092                                  * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1093                                  * the hash code peeks at those fields without holding the cnode lock because
1094                                  * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
1095                                  * call above.  Since we're bailing out, unset whatever flags we just set, and
1096                                  * wake up all waiters for this cnode.
1097                                  */
1098                                 if (hflags) {
1099                                         hfs_chashwakeup(hfsmp, cp, hflags);
1100                                 }
1101
1102                                 *out_flags = GNV_CAT_ATTRCHANGED;
1103                                 return ERECYCLE;
1104                         }
1105                         else {
1106                                 /*
1107                                  * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1108                                  *
1109                                  * Replacing the descriptor here is fine because we looked up the item without
1110                                  * a vnode in hand before.  If a vnode existed, its identity must be attached to this
1111                                  * item.  We are not susceptible to the lookup fastpath issue at this point.
1112                                  */
1113                                 replace_desc(cp, descp);
1114                         }
1115                 }
1116         }
1117
1118         /* Check if we found a matching vnode */
1119         if (*vpp != NULL) {
1120                 return (0);
1121         }
1122
1123         /*
1124          * If this is a new cnode then initialize it.
1125          */
1126         if (ISSET(cp->c_hflag, H_ALLOC)) {
1127                 lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
1128 #if HFS_COMPRESSION
1129                 cp->c_decmp = NULL;
1130 #endif
1131
1132                 /* Make sure its still valid (ie exists on disk). */
1133                 if (!(flags & GNV_CREATE)) {
1134                         int error = 0;
1135                         if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
1136                                 hfs_chash_abort(hfsmp, cp);
1137                                 hfs_reclaim_cnode(cp);
1138                                 *vpp = NULL;
1139                                 /*
1140                                  * If we hit this case, that means that the entry was there in the catalog when
1141                                  * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
1142                                  * that we checked the catalog and the time we went to get a vnode/cnode for it,
1143                                  * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
1144                                  * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1145                                  * an ENOENT.  To indicate to the caller that they should really double-check the
1146                                  * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1147                                  * in the output flags.
1148                                  */
1149                                 if (error == ENOENT) {
1150                                         *out_flags = GNV_CAT_DELETED;
1151                                         return ENOENT;
1152                                 }
1153
1154                                 /*
1155                                  * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1156                                  * this function as an argument because the catalog may have changed w.r.t hardlink
1157                                  * link counts and the firstlink field.  If that validation check fails, then let
1158                                  * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1159                                  */
1160                                 if (error == ERECYCLE) {
1161                                         *out_flags = GNV_CAT_ATTRCHANGED;
1162                                         return (ERECYCLE);
1163                                 }
1164                         }
1165                 }
1166                 bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
1167                 bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
1168
1169                 /* The name was inherited so clear descriptor state... */
1170                 descp->cd_namelen = 0;
1171                 descp->cd_nameptr = NULL;
1172                 descp->cd_flags &= ~CD_HASBUF;
1173
1174                 /* Tag hardlinks */
1175                 if ((vtype == VREG || vtype == VDIR) &&
1176                     ((descp->cd_cnid != attrp->ca_fileid) ||
1177                      (attrp->ca_recflags & kHFSHasLinkChainMask))) {
1178                         cp->c_flag |= C_HARDLINK;
1179                 }
1180                 /*
1181                  * Fix-up dir link counts.
1182                  *
1183                  * Earlier versions of Leopard used ca_linkcount for posix
1184                  * nlink support (effectively the sub-directory count + 2).
1185                  * That is now accomplished using the ca_dircount field with
1186                  * the corresponding kHFSHasFolderCountMask flag.
1187                  *
1188                  * For directories the ca_linkcount is the true link count,
1189                  * tracking the number of actual hardlinks to a directory.
1190                  *
1191                  * We only do this if the mount has HFS_FOLDERCOUNT set;
1192                  * at the moment, we only set that for HFSX volumes.
1193                  */
1194                 if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&
1195                     (vtype == VDIR) &&
1196                     !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
1197                     (cp->c_attr.ca_linkcount > 1)) {
1198                         if (cp->c_attr.ca_entries == 0)
1199                                 cp->c_attr.ca_dircount = 0;
1200                         else
1201                                 cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
1202
1203                         cp->c_attr.ca_linkcount = 1;
1204                         cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
1205                         if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
1206                                 cp->c_flag |= C_MODIFIED;
1207                 }
1208 #if QUOTA
1209                 if (hfsmp->hfs_flags & HFS_QUOTAS) {
1210                         for (i = 0; i < MAXQUOTAS; i++)
1211                                 cp->c_dquot[i] = NODQUOT;
1212                 }
1213 #endif /* QUOTA */
1214                 /* Mark the output flag that we're vending a new cnode */
1215                 *out_flags |= GNV_NEW_CNODE;
1216         }
1217
1218         if (vtype == VDIR) {
1219                 if (cp->c_vp != NULL)
1220                         panic("hfs_getnewvnode: orphaned vnode (data)");
1221                 cvpp = &cp->c_vp;
1222         } else {
1223                 if (forkp && attrp->ca_blocks < forkp->cf_blocks)
1224                         panic("hfs_getnewvnode: bad ca_blocks (too small)");
1225                 /*
1226                  * Allocate and initialize a file fork...
1227                  */
1228                 MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork),
1229                         M_HFSFORK, M_WAITOK);
1230                 fp->ff_cp = cp;
1231                 if (forkp)
1232                         bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
1233                 else
1234                         bzero(&fp->ff_data, sizeof(struct cat_fork));
1235                 rl_init(&fp->ff_invalidranges);
1236                 fp->ff_sysfileinfo = 0;
1237
1238                 if (wantrsrc) {
1239                         if (cp->c_rsrcfork != NULL)
1240                                 panic("hfs_getnewvnode: orphaned rsrc fork");
1241                         if (cp->c_rsrc_vp != NULL)
1242                                 panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1243                         cp->c_rsrcfork = fp;
1244                         cvpp = &cp->c_rsrc_vp;
1245                         if ( (tvp = cp->c_vp) != NULLVP )
1246                                 cp->c_flag |= C_NEED_DVNODE_PUT;
1247                 } else {
1248                         if (cp->c_datafork != NULL)
1249                                 panic("hfs_getnewvnode: orphaned data fork");
1250                         if (cp->c_vp != NULL)
1251                                 panic("hfs_getnewvnode: orphaned vnode (data)");
1252                         cp->c_datafork = fp;
1253                         cvpp = &cp->c_vp;
1254                         if ( (tvp = cp->c_rsrc_vp) != NULLVP)
1255                                 cp->c_flag |= C_NEED_RVNODE_PUT;
1256                 }
1257         }
1258         if (tvp != NULLVP) {
1259                 /*
1260                  * grab an iocount on the vnode we weren't
1261                  * interested in (i.e. we want the resource fork
1262                  * but the cnode already has the data fork)
1263                  * to prevent it from being
1264                  * recycled by us when we call vnode_create
1265                  * which will result in a deadlock when we
1266                  * try to take the cnode lock in hfs_vnop_fsync or
1267                  * hfs_vnop_reclaim... vnode_get can be called here
1268                  * because we already hold the cnode lock which will
1269                  * prevent the vnode from changing identity until
1270                  * we drop it.. vnode_get will not block waiting for
1271                  * a change of state... however, it will return an
1272                  * error if the current iocount == 0 and we've already
1273                  * started to terminate the vnode... we don't need/want to
1274                  * grab an iocount in the case since we can't cause
1275                  * the fileystem to be re-entered on this thread for this vp
1276                  *
1277                  * the matching vnode_put will happen in hfs_unlock
1278                  * after we've dropped the cnode lock
1279                  */
1280                 if ( vnode_get(tvp) != 0)
1281                         cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
1282         }
1283         vfsp.vnfs_mp = mp;
1284         vfsp.vnfs_vtype = vtype;
1285         vfsp.vnfs_str = "hfs";
1286         if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
1287                 vfsp.vnfs_dvp = NULL;  /* no parent for me! */
1288                 vfsp.vnfs_cnp = NULL;  /* no name for me! */
1289         } else {
1290                 vfsp.vnfs_dvp = dvp;
1291                 vfsp.vnfs_cnp = cnp;
1292         }
1293         vfsp.vnfs_fsnode = cp;
1294
1295         /*
1296          * Special Case HFS Standard VNOPs from HFS+, since
1297          * HFS standard is readonly/deprecated as of 10.6
1298          */
1299
1300 #if FIFO
1301         if (vtype == VFIFO )
1302                 vfsp.vnfs_vops = hfs_fifoop_p;
1303         else
1304 #endif
1305         if (vtype == VBLK || vtype == VCHR)
1306                 vfsp.vnfs_vops = hfs_specop_p;
1307         else if (hfs_standard)
1308                 vfsp.vnfs_vops = hfs_std_vnodeop_p;
1309         else
1310                 vfsp.vnfs_vops = hfs_vnodeop_p;
1311
1312         if (vtype == VBLK || vtype == VCHR)
1313                 vfsp.vnfs_rdev = attrp->ca_rdev;
1314         else
1315                 vfsp.vnfs_rdev = 0;
1316
1317         if (forkp)
1318                 vfsp.vnfs_filesize = forkp->cf_size;
1319         else
1320                 vfsp.vnfs_filesize = 0;
1321
1322         vfsp.vnfs_flags = VNFS_ADDFSREF;
1323         if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
1324                 vfsp.vnfs_flags |= VNFS_NOCACHE;
1325
1326         /* Tag system files */
1327         vfsp.vnfs_marksystem = issystemfile;
1328
1329         /* Tag root directory */
1330         if (descp->cd_cnid == kHFSRootFolderID)
1331                 vfsp.vnfs_markroot = 1;
1332         else
1333                 vfsp.vnfs_markroot = 0;
1334
1335         if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) {
1336                 if (fp) {
1337                         if (fp == cp->c_datafork)
1338                                 cp->c_datafork = NULL;
1339                         else
1340                                 cp->c_rsrcfork = NULL;
1341
1342                         FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
1343                 }
1344                 /*
1345                  * If this is a newly created cnode or a vnode reclaim
1346                  * occurred during the attachment, then cleanup the cnode.
1347                  */
1348                 if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
1349                         hfs_chash_abort(hfsmp, cp);
1350                         hfs_reclaim_cnode(cp);
1351                 }
1352                 else {
1353                         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1354                         if ((flags & GNV_SKIPLOCK) == 0){
1355                                 hfs_unlock(cp);
1356                         }
1357                 }
1358                 *vpp = NULL;
1359                 return (retval);
1360         }
1361         vp = *cvpp;
1362         vnode_settag(vp, VT_HFS);
1363         if (cp->c_flag & C_HARDLINK) {
1364                 vnode_setmultipath(vp);
1365         }
1366         /*
1367          * Tag resource fork vnodes as needing an VNOP_INACTIVE
1368          * so that any deferred removes (open unlinked files)
1369          * have the chance to process the resource fork.
1370          */
1371         if (VNODE_IS_RSRC(vp)) {
1372                 int err;
1373                 KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0);
1374
1375                 /* Force VL_NEEDINACTIVE on this vnode */
1376                 err = vnode_ref(vp);
1377                 if (err == 0) {
1378                         vnode_rele(vp);
1379                 }
1380         }
1381         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1382
1383         /*
1384          * Stop tracking an active hot file.
1385          */
1386         if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
1387                 (void) hfs_removehotfile(vp);
1388         }
1389
1390 #if CONFIG_PROTECT
1391         /* Initialize the cp data structures. The key should be in place now. */
1392         if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
1393                 cp_entry_init(cp, mp);
1394         }
1395 #endif
1396
1397         *vpp = vp;
1398         return (0);
1399 }
1400
1401
1402 static void
1403 hfs_reclaim_cnode(struct cnode *cp)
1404 {
1405 #if QUOTA
1406         int i;
1407
1408         for (i = 0; i < MAXQUOTAS; i++) {
1409                 if (cp->c_dquot[i] != NODQUOT) {
1410                         dqreclaim(cp->c_dquot[i]);
1411                         cp->c_dquot[i] = NODQUOT;
1412                 }
1413         }
1414 #endif /* QUOTA */
1415
1416         /*
1417          * If the descriptor has a name then release it
1418          */
1419         if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
1420                 const char *nameptr;
1421
1422                 nameptr = (const char *) cp->c_desc.cd_nameptr;
1423                 cp->c_desc.cd_nameptr = 0;
1424                 cp->c_desc.cd_flags &= ~CD_HASBUF;
1425                 cp->c_desc.cd_namelen = 0;
1426                 vfs_removename(nameptr);
1427         }
1428
1429         /*
1430          * We only call this function if we are in hfs_vnop_reclaim and
1431          * attempting to reclaim a cnode with only one live fork.  Because the vnode
1432          * went through reclaim, any future attempts to use this item will have to
1433          * go through lookup again, which will need to create a new vnode.  Thus,
1434          * destroying the locks below (while they were still held during our parent
1435          * function hfs_vnop_reclaim) is safe.
1436          */
1437
1438         lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
1439         lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
1440 #if HFS_COMPRESSION
1441         if (cp->c_decmp) {
1442                 decmpfs_cnode_destroy(cp->c_decmp);
1443                 FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
1444         }
1445 #endif
1446 #if CONFIG_PROTECT
1447         cp_entry_destroy(&cp->c_cpentry);
1448 #endif
1449
1450
1451         bzero(cp, sizeof(struct cnode));
1452         FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
1453 }
1454
1455
1456 /*
1457  * hfs_valid_cnode
1458  *
1459  * This function is used to validate data that is stored in-core against what is contained
1460  * in the catalog.  Common uses include validating that the parent-child relationship still exist
1461  * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1462  * the point of the check.
1463  */
1464 int
1465 hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
1466                 cnid_t cnid, struct cat_attr *cattr, int *error)
1467 {
1468         struct cat_attr attr;
1469         struct cat_desc cndesc;
1470         int stillvalid = 0;
1471         int lockflags;
1472
1473         /* System files are always valid */
1474         if (cnid < kHFSFirstUserCatalogNodeID) {
1475                 *error = 0;
1476                 return (1);
1477         }
1478
1479         /* XXX optimization:  check write count in dvp */
1480
1481         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1482
1483         if (dvp && cnp) {
1484                 int lookup = 0;
1485                 struct cat_fork fork;
1486                 bzero(&cndesc, sizeof(cndesc));
1487                 cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
1488                 cndesc.cd_namelen = cnp->cn_namelen;
1489                 cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
1490                 cndesc.cd_hint = VTOC(dvp)->c_childhint;
1491
1492                 /*
1493                  * We have to be careful when calling cat_lookup.  The result argument
1494                  * 'attr' may get different results based on whether or not you ask
1495                  * for the filefork to be supplied as output.  This is because cat_lookupbykey
1496                  * will attempt to do basic validation/smoke tests against the resident
1497                  * extents if there are no overflow extent records, but it needs someplace
1498                  * in memory to store the on-disk fork structures.
1499                  *
1500                  * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1501                  * do the same here, to verify that block count differences are not
1502                  * due to calling the function with different styles.  cat_lookupbykey
1503                  * will request the volume be fsck'd if there is true on-disk corruption
1504                  * where the number of blocks does not match the number generated by
1505                  * summing the number of blocks in the resident extents.
1506                  */
1507
1508                 lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
1509
1510                 if ((lookup == 0) && (cnid == attr.ca_fileid)) {
1511                         stillvalid = 1;
1512                         *error = 0;
1513                 }
1514                 else {
1515                         *error = ENOENT;
1516                 }
1517
1518                 /*
1519                  * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1520                  * race.  Specifically, if there is no vnode/cnode pair for the directory entry
1521                  * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
1522                  * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1523                  * changing in between the time we do the cat_lookup there and the time we re-grab the
1524                  * catalog lock above to do another cat_lookup.
1525                  *
1526                  * However, we need to check more than just the CNID and parent-child name relationships above.
1527                  * Hardlinks can suffer the same race in the following scenario:  Suppose we do a
1528                  * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have
1529                  * the cat_attr in hand (passed in above).  But in between then and now, the vnode was
1530                  * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1531                  * a chance to do anything.  This is possible if there are a lot of threads thrashing around
1532                  * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
1533                  * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1534                  * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
1535                  * already exists, as it does in the case of rename and delete.
1536                  */
1537                 if (stillvalid && cattr != NULL) {
1538                         if (cattr->ca_linkcount != attr.ca_linkcount) {
1539                                 stillvalid = 0;
1540                                 *error = ERECYCLE;
1541                                 goto notvalid;
1542                         }
1543
1544                         if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
1545                                 stillvalid = 0;
1546                                 *error = ERECYCLE;
1547                                 goto notvalid;
1548                         }
1549
1550                         if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
1551                                 stillvalid = 0;
1552                                 *error = ERECYCLE;
1553                                 goto notvalid;
1554                         }
1555
1556                         if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
1557                                 stillvalid = 0;
1558                                 *error = ERECYCLE;
1559                                 goto notvalid;
1560                         }
1561                 }
1562         } else {
1563                 if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
1564                         stillvalid = 1;
1565                         *error = 0;
1566                 }
1567                 else {
1568                         *error = ENOENT;
1569                 }
1570         }
1571 notvalid:
1572         hfs_systemfile_unlock(hfsmp, lockflags);
1573
1574         return (stillvalid);
1575 }
1576
1577
1578 /*
1579  * Per HI and Finder requirements, HFS should add in the
1580  * date/time that a particular directory entry was added
1581  * to the containing directory.
1582  * This is stored in the extended Finder Info for the
1583  * item in question.
1584  *
1585  * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1586  * We must ignore user attempts to set this part of the finderinfo, and
1587  * so we need to save a local copy of the date added, write in the user
1588  * finderinfo, then stuff the value back in.
1589  */
1590 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
1591         u_int8_t *finfo = NULL;
1592
1593         /* overlay the FinderInfo to the correct pointer, and advance */
1594         finfo = (u_int8_t*)attrp->ca_finderinfo;
1595         finfo = finfo + 16;
1596
1597         /*
1598          * Make sure to write it out as big endian, since that's how
1599          * finder info is defined.
1600          *
1601          * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1602          */
1603         if (S_ISREG(attrp->ca_mode)) {
1604                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1605                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1606                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1607         }
1608         else if (S_ISDIR(attrp->ca_mode)) {
1609                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1610                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1611                                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1612         }
1613         /* If it were neither directory/file, then we'd bail out */
1614         return;
1615 }
1616
1617
1618 u_int32_t hfs_get_dateadded (struct cnode *cp) {
1619         u_int8_t *finfo = NULL;
1620         u_int32_t dateadded = 0;
1621
1622         if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
1623                 /* Date added was never set.  Return 0. */
1624                 return dateadded;
1625         }
1626
1627
1628         /* overlay the FinderInfo to the correct pointer, and advance */
1629         finfo = (u_int8_t*)cp->c_finderinfo;
1630         finfo = finfo + 16;
1631
1632         /*
1633          * FinderInfo is written out in big endian... make sure to convert it to host
1634          * native before we use it.
1635          */
1636         if (S_ISREG(cp->c_attr.ca_mode)) {
1637                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1638                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1639         }
1640         else if (S_ISDIR(cp->c_attr.ca_mode)) {
1641                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1642                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1643         }
1644
1645         return dateadded;
1646 }
1647
1648 /*
1649  * Touch cnode times based on c_touch_xxx flags
1650  *
1651  * cnode must be locked exclusive
1652  *
1653  * This will also update the volume modify time
1654  */
1655 void
1656 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
1657 {
1658         vfs_context_t ctx;
1659         /* don't modify times if volume is read-only */
1660         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1661                 cp->c_touch_acctime = FALSE;
1662                 cp->c_touch_chgtime = FALSE;
1663                 cp->c_touch_modtime = FALSE;
1664                 return;
1665         }
1666         else if (hfsmp->hfs_flags & HFS_STANDARD) {
1667         /* HFS Standard doesn't support access times */
1668                 cp->c_touch_acctime = FALSE;
1669         }
1670
1671         ctx = vfs_context_current();
1672         /*
1673          * Skip access time updates if:
1674          *      . MNT_NOATIME is set
1675          *      . a file system freeze is in progress
1676          *      . a file system resize is in progress
1677          *      . the vnode associated with this cnode is marked for rapid aging
1678          */
1679         if (cp->c_touch_acctime) {
1680                 if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
1681                     (hfsmp->hfs_freezing_proc != NULL) ||
1682                     (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
1683                     (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
1684
1685                         cp->c_touch_acctime = FALSE;
1686                 }
1687         }
1688         if (cp->c_touch_acctime || cp->c_touch_chgtime ||
1689                 cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
1690                 struct timeval tv;
1691                 int touchvol = 0;
1692
1693                 microtime(&tv);
1694
1695                 if (cp->c_touch_acctime) {
1696                         cp->c_atime = tv.tv_sec;
1697                         /*
1698                          * When the access time is the only thing changing
1699                          * then make sure its sufficiently newer before
1700                          * committing it to disk.
1701                          */
1702                         if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) >
1703                               ATIME_ONDISK_ACCURACY)) {
1704                                 cp->c_flag |= C_MODIFIED;
1705                         }
1706                         cp->c_touch_acctime = FALSE;
1707                 }
1708                 if (cp->c_touch_modtime) {
1709                         cp->c_mtime = tv.tv_sec;
1710                         cp->c_touch_modtime = FALSE;
1711                         cp->c_flag |= C_MODIFIED;
1712                         touchvol = 1;
1713 #if 1
1714                         /*
1715                          * HFS dates that WE set must be adjusted for DST
1716                          */
1717                         if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
1718                                 cp->c_mtime += 3600;
1719                         }
1720 #endif
1721                 }
1722                 if (cp->c_touch_chgtime) {
1723                         cp->c_ctime = tv.tv_sec;
1724                         cp->c_touch_chgtime = FALSE;
1725                         cp->c_flag |= C_MODIFIED;
1726                         touchvol = 1;
1727                 }
1728
1729                 if (cp->c_flag & C_NEEDS_DATEADDED) {
1730                         hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
1731                         cp->c_flag |= C_MODIFIED;
1732                         /* untwiddle the bit */
1733                         cp->c_flag &= ~C_NEEDS_DATEADDED;
1734                         touchvol = 1;
1735                 }
1736
1737                 /* Touch the volume modtime if needed */
1738                 if (touchvol) {
1739                         MarkVCBDirty(hfsmp);
1740                         HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
1741                 }
1742         }
1743 }
1744
1745 /*
1746  * Lock a cnode.
1747  */
1748 int
1749 hfs_lock(struct cnode *cp, enum hfslocktype locktype)
1750 {
1751         void * thread = current_thread();
1752
1753         if (cp->c_lockowner == thread) {
1754                 /*
1755                  * Only the extents and bitmap file's support lock recursion.
1756                  */
1757                 if ((cp->c_fileid == kHFSExtentsFileID) ||
1758                     (cp->c_fileid == kHFSAllocationFileID)) {
1759                         cp->c_syslockcount++;
1760                 } else {
1761                         panic("hfs_lock: locking against myself!");
1762                 }
1763         } else if (locktype == HFS_SHARED_LOCK) {
1764                 lck_rw_lock_shared(&cp->c_rwlock);
1765                 cp->c_lockowner = HFS_SHARED_OWNER;
1766
1767         } else /* HFS_EXCLUSIVE_LOCK */ {
1768                 lck_rw_lock_exclusive(&cp->c_rwlock);
1769                 cp->c_lockowner = thread;
1770
1771                 /*
1772                  * Only the extents and bitmap file's support lock recursion.
1773                  */
1774                 if ((cp->c_fileid == kHFSExtentsFileID) ||
1775                     (cp->c_fileid == kHFSAllocationFileID)) {
1776                         cp->c_syslockcount = 1;
1777                 }
1778         }
1779
1780 #ifdef HFS_CHECK_LOCK_ORDER
1781         /*
1782          * Regular cnodes (non-system files) cannot be locked
1783          * while holding the journal lock or a system file lock.
1784          */
1785         if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
1786             ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
1787                 vnode_t vp = NULLVP;
1788
1789                 /* Find corresponding vnode. */
1790                 if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
1791                         vp = cp->c_vp;
1792                 } else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
1793                         vp = cp->c_rsrc_vp;
1794                 }
1795                 if (vp != NULLVP) {
1796                         struct hfsmount *hfsmp = VTOHFS(vp);
1797
1798                         if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
1799                                 /* This will eventually be a panic here. */
1800                                 printf("hfs_lock: bad lock order (cnode after journal)\n");
1801                         }
1802                         if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
1803                                 panic("hfs_lock: bad lock order (cnode after catalog)");
1804                         }
1805                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
1806                                 panic("hfs_lock: bad lock order (cnode after attribute)");
1807                         }
1808                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
1809                                 panic("hfs_lock: bad lock order (cnode after extents)");
1810                         }
1811                 }
1812         }
1813 #endif /* HFS_CHECK_LOCK_ORDER */
1814
1815         /*
1816          * Skip cnodes that no longer exist (were deleted).
1817          */
1818         if ((locktype != HFS_FORCE_LOCK) &&
1819             ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
1820             (cp->c_flag & C_NOEXISTS)) {
1821                 hfs_unlock(cp);
1822                 return (ENOENT);
1823         }
1824         return (0);
1825 }
1826
1827 /*
1828  * Lock a pair of cnodes.
1829  */
1830 int
1831 hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype)
1832 {
1833         struct cnode *first, *last;
1834         int error;
1835
1836         /*
1837          * If cnodes match then just lock one.
1838          */
1839         if (cp1 == cp2) {
1840                 return hfs_lock(cp1, locktype);
1841         }
1842
1843         /*
1844          * Lock in cnode address order.
1845          */
1846         if (cp1 < cp2) {
1847                 first = cp1;
1848                 last = cp2;
1849         } else {
1850                 first = cp2;
1851                 last = cp1;
1852         }
1853
1854         if ( (error = hfs_lock(first, locktype))) {
1855                 return (error);
1856         }
1857         if ( (error = hfs_lock(last, locktype))) {
1858                 hfs_unlock(first);
1859                 return (error);
1860         }
1861         return (0);
1862 }
1863
1864 /*
1865  * Check ordering of two cnodes. Return true if they are are in-order.
1866  */
1867 static int
1868 hfs_isordered(struct cnode *cp1, struct cnode *cp2)
1869 {
1870         if (cp1 == cp2)
1871                 return (0);
1872         if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
1873                 return (1);
1874         if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
1875                 return (0);
1876         /*
1877          * Locking order is cnode address order.
1878          */
1879         return (cp1 < cp2);
1880 }
1881
1882 /*
1883  * Acquire 4 cnode locks.
1884  *   - locked in cnode address order (lesser address first).
1885  *   - all or none of the locks are taken
1886  *   - only one lock taken per cnode (dup cnodes are skipped)
1887  *   - some of the cnode pointers may be null
1888  */
1889 int
1890 hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
1891              struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode)
1892 {
1893         struct cnode * a[3];
1894         struct cnode * b[3];
1895         struct cnode * list[4];
1896         struct cnode * tmp;
1897         int i, j, k;
1898         int error;
1899         if (error_cnode) {
1900                 *error_cnode = NULL;
1901         }
1902
1903         if (hfs_isordered(cp1, cp2)) {
1904                 a[0] = cp1; a[1] = cp2;
1905         } else {
1906                 a[0] = cp2; a[1] = cp1;
1907         }
1908         if (hfs_isordered(cp3, cp4)) {
1909                 b[0] = cp3; b[1] = cp4;
1910         } else {
1911                 b[0] = cp4; b[1] = cp3;
1912         }
1913         a[2] = (struct cnode *)0xffffffff;  /* sentinel value */
1914         b[2] = (struct cnode *)0xffffffff;  /* sentinel value */
1915
1916         /*
1917          * Build the lock list, skipping over duplicates
1918          */
1919         for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
1920                 tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
1921                 if (k == 0 || tmp != list[k-1])
1922                         list[k++] = tmp;
1923         }
1924
1925         /*
1926          * Now we can lock using list[0 - k].
1927          * Skip over NULL entries.
1928          */
1929         for (i = 0; i < k; ++i) {
1930                 if (list[i])
1931                         if ((error = hfs_lock(list[i], locktype))) {
1932                                 /* Only stuff error_cnode if requested */
1933                                 if (error_cnode) {
1934                                         *error_cnode = list[i];
1935                                 }
1936                                 /* Drop any locks we acquired. */
1937                                 while (--i >= 0) {
1938                                         if (list[i])
1939                                                 hfs_unlock(list[i]);
1940                                 }
1941                                 return (error);
1942                         }
1943         }
1944         return (0);
1945 }
1946
1947
1948 /*
1949  * Unlock a cnode.
1950  */
1951 void
1952 hfs_unlock(struct cnode *cp)
1953 {
1954         vnode_t rvp = NULLVP;
1955         vnode_t vp = NULLVP;
1956         u_int32_t c_flag;
1957         void *lockowner;
1958
1959         /*
1960          * Only the extents and bitmap file's support lock recursion.
1961          */
1962         if ((cp->c_fileid == kHFSExtentsFileID) ||
1963             (cp->c_fileid == kHFSAllocationFileID)) {
1964                 if (--cp->c_syslockcount > 0) {
1965                         return;
1966                 }
1967         }
1968         c_flag = cp->c_flag;
1969         cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE);
1970
1971         if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
1972                 vp = cp->c_vp;
1973         }
1974         if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
1975                 rvp = cp->c_rsrc_vp;
1976         }
1977
1978         lockowner = cp->c_lockowner;
1979         if (lockowner == current_thread()) {
1980             cp->c_lockowner = NULL;
1981             lck_rw_unlock_exclusive(&cp->c_rwlock);
1982         } else {
1983             lck_rw_unlock_shared(&cp->c_rwlock);
1984         }
1985
1986         /* Perform any vnode post processing after cnode lock is dropped. */
1987         if (vp) {
1988                 if (c_flag & C_NEED_DATA_SETSIZE)
1989                         ubc_setsize(vp, 0);
1990                 if (c_flag & C_NEED_DVNODE_PUT)
1991                         vnode_put(vp);
1992         }
1993         if (rvp) {
1994                 if (c_flag & C_NEED_RSRC_SETSIZE)
1995                         ubc_setsize(rvp, 0);
1996                 if (c_flag & C_NEED_RVNODE_PUT)
1997                         vnode_put(rvp);
1998         }
1999 }
2000
2001 /*
2002  * Unlock a pair of cnodes.
2003  */
2004 void
2005 hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
2006 {
2007         hfs_unlock(cp1);
2008         if (cp2 != cp1)
2009                 hfs_unlock(cp2);
2010 }
2011
2012 /*
2013  * Unlock a group of cnodes.
2014  */
2015 void
2016 hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
2017 {
2018         struct cnode * list[4];
2019         int i, k = 0;
2020
2021         if (cp1) {
2022                 hfs_unlock(cp1);
2023                 list[k++] = cp1;
2024         }
2025         if (cp2) {
2026                 for (i = 0; i < k; ++i) {
2027                         if (list[i] == cp2)
2028                                 goto skip1;
2029                 }
2030                 hfs_unlock(cp2);
2031                 list[k++] = cp2;
2032         }
2033 skip1:
2034         if (cp3) {
2035                 for (i = 0; i < k; ++i) {
2036                         if (list[i] == cp3)
2037                                 goto skip2;
2038                 }
2039                 hfs_unlock(cp3);
2040                 list[k++] = cp3;
2041         }
2042 skip2:
2043         if (cp4) {
2044                 for (i = 0; i < k; ++i) {
2045                         if (list[i] == cp4)
2046                                 return;
2047                 }
2048                 hfs_unlock(cp4);
2049         }
2050 }
2051
2052
2053 /*
2054  * Protect a cnode against a truncation.
2055  *
2056  * Used mainly by read/write since they don't hold the
2057  * cnode lock across calls to the cluster layer.
2058  *
2059  * The process doing a truncation must take the lock
2060  * exclusive. The read/write processes can take it
2061  * shared.  The locktype argument is the same as supplied to
2062  * hfs_lock.
2063  */
2064 void
2065 hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype)
2066 {
2067         void * thread = current_thread();
2068
2069         if (cp->c_truncatelockowner == thread) {
2070                 /*
2071                  * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
2072                  *
2073                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2074                  * the file does not change sizes while we are paging in.  However,
2075                  * we may already hold the lock exclusive due to another
2076                  * VNOP from earlier in the call stack.  So if we already hold
2077                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2078                  * it's in the recursive case.
2079                  */
2080                 if (locktype != HFS_RECURSE_TRUNCLOCK) {
2081                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2082                 }
2083         }
2084         /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
2085         else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
2086                 lck_rw_lock_shared(&cp->c_truncatelock);
2087                 cp->c_truncatelockowner = HFS_SHARED_OWNER;
2088         }
2089         else { /* must be an HFS_EXCLUSIVE_LOCK */
2090                 lck_rw_lock_exclusive(&cp->c_truncatelock);
2091                 cp->c_truncatelockowner = thread;
2092         }
2093 }
2094
2095
2096 /*
2097  * Attempt to get the truncate lock.  If it cannot be acquired, error out.
2098  * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2099  * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2100  * temporarily need to disable V2 semantics.
2101  */
2102 int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) {
2103         void * thread = current_thread();
2104         boolean_t didlock = false;
2105
2106         if (cp->c_truncatelockowner == thread) {
2107                 /*
2108                  * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
2109                  *
2110                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2111                  * the file does not change sizes while we are paging in.  However,
2112                  * we may already hold the lock exclusive due to another
2113                  * VNOP from earlier in the call stack.  So if we already hold
2114                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2115                  * it's in the recursive case.
2116                  */
2117                 if (locktype != HFS_RECURSE_TRUNCLOCK) {
2118                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2119                 }
2120         }
2121         /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
2122         else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
2123                 didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
2124                 if (didlock) {
2125                         cp->c_truncatelockowner = HFS_SHARED_OWNER;
2126                 }
2127         }
2128         else { /* must be an HFS_EXCLUSIVE_LOCK */
2129                 didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
2130                 if (didlock) {
2131                         cp->c_truncatelockowner = thread;
2132                 }
2133         }
2134
2135         return didlock;
2136 }
2137
2138
2139 /*
2140  * Unlock the truncate lock, which protects against size changes.
2141  *
2142  * The been_recursed argument is used when we may need to return
2143  * from this function without actually unlocking the truncate lock.
2144  */
2145 void
2146 hfs_unlock_truncate(struct cnode *cp, int been_recursed)
2147 {
2148         void *thread = current_thread();
2149
2150         /*
2151          * If been_recursed is nonzero AND the current lock owner of the
2152          * truncate lock is our current thread, then we must have recursively
2153          * taken the lock earlier on.  If the lock were unlocked,
2154          * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through
2155          * to the SHARED case below.
2156          *
2157          * If been_recursed is zero (most of the time) then we check the
2158          * lockowner field to infer whether the lock was taken exclusively or
2159          * shared in order to know what underlying lock routine to call.
2160          */
2161         if (been_recursed) {
2162                 if (cp->c_truncatelockowner == thread) {
2163                         return;
2164                 }
2165         }
2166
2167         /* HFS_LOCK_EXCLUSIVE */
2168         if (thread == cp->c_truncatelockowner) {
2169                 cp->c_truncatelockowner = NULL;
2170                 lck_rw_unlock_exclusive(&cp->c_truncatelock);
2171         }
2172         /* HFS_LOCK_SHARED */
2173         else {
2174                 lck_rw_unlock_shared(&cp->c_truncatelock);
2175         }
2176 }