bsd/hfs/hfs_cnode.c

   1 /*
   2  * Copyright (c) 2002-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/proc.h>
  31 #include <sys/vnode.h>
  32 #include <sys/mount.h>
  33 #include <sys/kernel.h>
  34 #include <sys/malloc.h>
  35 #include <sys/time.h>
  36 #include <sys/ubc.h>
  37 #include <sys/quota.h>
  38 #include <sys/kdebug.h>
  39 #include <libkern/OSByteOrder.h>
  40 #include <sys/buf_internal.h>
  41
  42 #include <kern/locks.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45 #include <miscfs/fifofs/fifo.h>
  46
  47 #include <hfs/hfs.h>
  48 #include <hfs/hfs_catalog.h>
  49 #include <hfs/hfs_cnode.h>
  50 #include <hfs/hfs_quota.h>
  51 #include <hfs/hfs_format.h>
  52
  53 extern int prtactive;
  54
  55 extern lck_attr_t *  hfs_lock_attr;
  56 extern lck_grp_t *  hfs_mutex_group;
  57 extern lck_grp_t *  hfs_rwlock_group;
  58
  59 static void  hfs_reclaim_cnode(struct cnode *);
  60 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
  61 static int hfs_isordered(struct cnode *, struct cnode *);
  62
  63 extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
  64
  65
  66 __inline__ int hfs_checkdeleted (struct cnode *cp) {
  67         return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
  68 }
  69
  70 /*
  71  * Function used by a special fcntl() that decorates a cnode/vnode that
  72  * indicates it is backing another filesystem, like a disk image.
  73  *
  74  * the argument 'val' indicates whether or not to set the bit in the cnode flags
  75  *
  76  * Returns non-zero on failure. 0 on success
  77  */
  78 int hfs_set_backingstore (struct vnode *vp, int val) {
  79         struct cnode *cp = NULL;
  80         int err = 0;
  81
  82         cp = VTOC(vp);
  83         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
  84                 return EINVAL;
  85         }
  86
  87         /* lock the cnode */
  88         err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
  89         if (err) {
  90                 return err;
  91         }
  92
  93         if (val) {
  94                 cp->c_flag |= C_BACKINGSTORE;
  95         }
  96         else {
  97                 cp->c_flag &= ~C_BACKINGSTORE;
  98         }
  99
 100         /* unlock everything */
 101         hfs_unlock (cp);
 102
 103         return err;
 104 }
 105
 106 /*
 107  * Function used by a special fcntl() that check to see if a cnode/vnode
 108  * indicates it is backing another filesystem, like a disk image.
 109  *
 110  * the argument 'val' is an output argument for whether or not the bit is set
 111  *
 112  * Returns non-zero on failure. 0 on success
 113  */
 114
 115 int hfs_is_backingstore (struct vnode *vp, int *val) {
 116         struct cnode *cp = NULL;
 117         int err = 0;
 118
 119         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
 120                 *val = 0;
 121                 return 0;
 122         }
 123
 124         cp = VTOC(vp);
 125
 126         /* lock the cnode */
 127         err = hfs_lock (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
 128         if (err) {
 129                 return err;
 130         }
 131
 132         if (cp->c_flag & C_BACKINGSTORE) {
 133                 *val = 1;
 134         }
 135         else {
 136                 *val = 0;
 137         }
 138
 139         /* unlock everything */
 140         hfs_unlock (cp);
 141
 142         return err;
 143 }
 144
 145
 146 /*
 147  * hfs_cnode_teardown
 148  *
 149  * This is an internal function that is invoked from both hfs_vnop_inactive
 150  * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
 151  * being recycled and reclaimed, it is important that we do any post-processing
 152  * necessary for the cnode in both places.  Important tasks include things such as
 153  * releasing the blocks from an open-unlinked file when all references to it have dropped,
 154  * and handling resource forks separately from data forks.
 155  *
 156  * Note that we take only the vnode as an argument here (rather than the cnode).
 157  * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
 158  * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
 159  * vnode we need to reclaim if only the cnode is supplied.
 160  *
 161  * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
 162  * if both are invoked right after the other.  In the second call, most of this function's if()
 163  * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
 164  * As a quick check to see if this function is necessary, determine if the cnode is already
 165  * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that
 166  * remain for cnodes marked in such a fashion is to teardown their fork references and
 167  * release all directory hints and hardlink origins.  However, both of those are done
 168  * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
 169  * entry is no longer there.
 170  *
 171  * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
 172  * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
 173  * is totally gone by that point.
 174  *
 175  * Assumes that both truncate and cnode locks for 'cp' are held.
 176  */
 177 static
 178 int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
 179
 180         int forkcount = 0;
 181         enum vtype v_type;
 182         struct cnode *cp;
 183         int error = 0;
 184         int started_tr = 0;
 185         struct hfsmount *hfsmp = VTOHFS(vp);
 186         struct proc *p = vfs_context_proc(ctx);
 187         int truncated = 0;
 188     cat_cookie_t cookie;
 189     int cat_reserve = 0;
 190     int lockflags;
 191         int ea_error = 0;
 192
 193         v_type = vnode_vtype(vp);
 194         cp = VTOC(vp);
 195
 196         if (cp->c_datafork) {
 197                 ++forkcount;
 198         }
 199         if (cp->c_rsrcfork) {
 200                 ++forkcount;
 201         }
 202
 203
 204         /*
 205          * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
 206          * The dirty regions would have already been synced to disk, so informing UBC
 207          * that they can toss the pages doesn't help anyone at this point.
 208          *
 209          * Note that this is a performance problem if the vnode goes straight to reclaim
 210          * (and skips inactive), since there would be no way for anyone to notify the UBC
 211          * that all pages in this file are basically useless.
 212          */
 213         if (reclaim == 0) {
 214                 /*
 215                  * Check whether we are tearing down a cnode with only one remaining fork.
 216                  * If there are blocks in its filefork, then we need to unlock the cnode
 217                  * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
 218                  * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
 219                  * panic.
 220                  */
 221
 222                 if ((v_type == VREG || v_type == VLNK) &&
 223                         (cp->c_flag & C_DELETED) &&
 224                         (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
 225                         hfs_unlock(cp);
 226                         /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
 227                         ubc_setsize(vp, 0);
 228                         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 229                 }
 230         }
 231
 232         /*
 233          * Push file data out for normal files that haven't been evicted from
 234          * the namespace.  We only do this if this function was not called from reclaim,
 235          * because by that point the UBC information has been totally torn down.
 236          *
 237          * There should also be no way that a normal file that has NOT been deleted from
 238          * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
 239          * when the file becomes open-unlinked.
 240          */
 241         if ((v_type == VREG) &&
 242                 (!ISSET(cp->c_flag, C_DELETED)) &&
 243                 (!ISSET(cp->c_flag, C_NOEXISTS)) &&
 244                 (VTOF(vp)->ff_blocks) &&
 245                 (reclaim == 0)) {
 246                 /*
 247                  * Note that if content protection is enabled, then this is where we will
 248                  * attempt to issue IOs for all dirty regions of this file.
 249                  *
 250                  * If we're called from hfs_vnop_inactive, all this means is at the time
 251                  * the logic for deciding to call this function, there were not any lingering
 252                  * mmap/fd references for this file.  However, there is nothing preventing the system
 253                  * from creating a new reference in between the time that logic was checked
 254                  * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
 255                  * that there aren't any references is during vnop_reclaim.
 256                  */
 257                 hfs_filedone(vp, ctx);
 258         }
 259
 260         /*
 261          * We're holding the cnode lock now.  Stall behind any shadow BPs that may
 262          * be involved with this vnode if it is a symlink.  We don't want to allow
 263          * the blocks that we're about to release to be put back into the pool if there
 264          * is pending I/O to them.
 265          */
 266         if (v_type == VLNK) {
 267                 /*
 268                  * This will block if the asynchronous journal flush is in progress.
 269                  * If this symlink is not being renamed over and doesn't have any open FDs,
 270                  * then we'll remove it from the journal's bufs below in kill_block.
 271                  */
 272                 buf_wait_for_shadow_io (vp, 0);
 273         }
 274
 275         /*
 276          * Remove any directory hints or cached origins
 277          */
 278         if (v_type == VDIR) {
 279                 hfs_reldirhints(cp, 0);
 280         }
 281         if (cp->c_flag & C_HARDLINK) {
 282                 hfs_relorigins(cp);
 283         }
 284
 285         /*
 286          * This check is slightly complicated.  We should only truncate data
 287          * in very specific cases for open-unlinked files.  This is because
 288          * we want to ensure that the resource fork continues to be available
 289          * if the caller has the data fork open.  However, this is not symmetric;
 290          * someone who has the resource fork open need not be able to access the data
 291          * fork once the data fork has gone inactive.
 292          *
 293          * If we're the last fork, then we have cleaning up to do.
 294          *
 295          * A) last fork, and vp == c_vp
 296          *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
 297          *
 298          * B) last fork, and vp == c_rsrc_vp
 299          *      Truncate ourselves, assume data fork has been cleaned due to C).
 300          *
 301          * If we're not the last fork, then things are a little different:
 302          *
 303          * C) not the last fork, vp == c_vp
 304          *      Truncate ourselves.  Once the file has gone out of the namespace,
 305          *      it cannot be further opened.  Further access to the rsrc fork may
 306          *      continue, however.
 307          *
 308          * D) not the last fork, vp == c_rsrc_vp
 309          *      Don't enter the block below, just clean up vnode and push it out of core.
 310          */
 311
 312         if ((v_type == VREG || v_type == VLNK) &&
 313                         (cp->c_flag & C_DELETED) &&
 314                         ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
 315
 316                 /* Truncate away our own fork data. (Case A, B, C above) */
 317                 if (VTOF(vp)->ff_blocks != 0) {
 318
 319                         /*
 320                          * SYMLINKS only:
 321                          *
 322                          * Encapsulate the entire change (including truncating the link) in
 323                          * nested transactions if we are modifying a symlink, because we know that its
 324                          * file length will be at most 4k, and we can fit both the truncation and
 325                          * any relevant bitmap changes into a single journal transaction.  We also want
 326                          * the kill_block code to execute in the same transaction so that any dirty symlink
 327                          * blocks will not be written. Otherwise, rely on
 328                          * hfs_truncate doing its own transactions to ensure that we don't blow up
 329                          * the journal.
 330                          */
 331                         if ((started_tr == 0) && (v_type == VLNK)) {
 332                                 if (hfs_start_transaction(hfsmp) != 0) {
 333                                         error = EINVAL;
 334                                         goto out;
 335                                 }
 336                                 else {
 337                                         started_tr = 1;
 338                                 }
 339                         }
 340
 341                         /*
 342                          * At this point, we have decided that this cnode is
 343                          * suitable for full removal.  We are about to deallocate
 344                          * its blocks and remove its entry from the catalog.
 345                          * If it was a symlink, then it's possible that the operation
 346                          * which created it is still in the current transaction group
 347                          * due to coalescing.  Take action here to kill the data blocks
 348                          * of the symlink out of the journal before moving to
 349                          * deallocate the blocks.  We need to be in the middle of
 350                          * a transaction before calling buf_iterate like this.
 351                          *
 352                          * Note: we have to kill any potential symlink buffers out of
 353                          * the journal prior to deallocating their blocks.  This is so
 354                          * that we don't race with another thread that may be doing an
 355                          * an allocation concurrently and pick up these blocks. It could
 356                          * generate I/O against them which could go out ahead of our journal
 357                          * transaction.
 358                          */
 359
 360                         if (hfsmp->jnl && vnode_islnk(vp)) {
 361                                 buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
 362                         }
 363
 364
 365                         /*
 366                          * This truncate call (and the one below) is fine from VNOP_RECLAIM's
 367                          * context because we're only removing blocks, not zero-filling new
 368                          * ones.  The C_DELETED check above makes things much simpler.
 369                          */
 370                         error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx);
 371                         if (error) {
 372                                 goto out;
 373                         }
 374                         truncated = 1;
 375
 376                         /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
 377                         if (started_tr) {
 378                                 hfs_end_transaction(hfsmp);
 379                                 started_tr = 0;
 380                         }
 381
 382                 }
 383
 384                 /*
 385                  * Truncate away the resource fork, if we represent the data fork and
 386                  * it is the last fork.  That means, by definition, the rsrc fork is not in
 387                  * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
 388                  * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
 389                  * to get rid of the resource fork's data. Note that because we are holding the
 390                  * cnode lock, it is impossible for a competing thread to create the resource fork
 391                  * vnode from underneath us while we do this.
 392                  *
 393                  * This is invoked via case A above only.
 394                  */
 395                 if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
 396                         struct cat_lookup_buffer *lookup_rsrc = NULL;
 397                         struct cat_desc *desc_ptr = NULL;
 398                         lockflags = 0;
 399
 400                         MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
 401                         if (lookup_rsrc == NULL) {
 402                                 printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
 403                                 error = ENOMEM;
 404                                 goto out;
 405                         }
 406                         else {
 407                                 bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
 408                         }
 409
 410                         if (cp->c_desc.cd_namelen == 0) {
 411                                 /* Initialize the rsrc descriptor for lookup if necessary*/
 412                                 MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
 413
 414                                 lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
 415                                 lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
 416                                 lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
 417                                 lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;
 418
 419                                 desc_ptr = &lookup_rsrc->lookup_desc;
 420                         }
 421                         else {
 422                                 desc_ptr = &cp->c_desc;
 423                         }
 424
 425                         lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 426
 427                         error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL,
 428                                         (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
 429
 430                         hfs_systemfile_unlock (hfsmp, lockflags);
 431
 432                         if (error) {
 433                                 FREE (lookup_rsrc, M_TEMP);
 434                                 goto out;
 435                         }
 436
 437                         /*
 438                          * Make the filefork in our temporary struct look like a real
 439                          * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
 440                          */
 441                         rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
 442                         lookup_rsrc->lookup_fork.ff_cp = cp;
 443
 444                         /*
 445                          * If there were no errors, then we have the catalog's fork information
 446                          * for the resource fork in question.  Go ahead and delete the data in it now.
 447                          */
 448
 449                         error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
 450                         FREE(lookup_rsrc, M_TEMP);
 451
 452                         if (error) {
 453                                 goto out;
 454                         }
 455
 456                         /*
 457                          * This fileid's resource fork extents have now been fully deleted on-disk
 458                          * and this CNID is no longer valid. At this point, we should be able to
 459                          * zero out cp->c_blocks to indicate there is no data left in this file.
 460                          */
 461                         cp->c_blocks = 0;
 462                 }
 463         }
 464
 465         /*
 466          * If we represent the last fork (or none in the case of a dir),
 467          * and the cnode has become open-unlinked,
 468          * AND it has EA's, then we need to get rid of them.
 469          *
 470          * Note that this must happen outside of any other transactions
 471          * because it starts/ends its own transactions and grabs its
 472          * own locks.  This is to prevent a file with a lot of attributes
 473          * from creating a transaction that is too large (which panics).
 474          */
 475     if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
 476                 (cp->c_flag & C_DELETED) &&
 477                 (forkcount <= 1)) {
 478
 479         ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
 480     }
 481
 482
 483         /*
 484          * If the cnode represented an open-unlinked file, then now
 485          * actually remove the cnode's catalog entry and release all blocks
 486          * it may have been using.
 487          */
 488     if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
 489         /*
 490          * Mark cnode in transit so that no one can get this
 491          * cnode from cnode hash.
 492          */
 493                 // hfs_chash_mark_in_transit(hfsmp, cp);
 494                 // XXXdbg - remove the cnode from the hash table since it's deleted
 495                 //          otherwise someone could go to sleep on the cnode and not
 496                 //          be woken up until this vnode gets recycled which could be
 497                 //          a very long time...
 498         hfs_chashremove(hfsmp, cp);
 499
 500         cp->c_flag |= C_NOEXISTS;   // XXXdbg
 501         cp->c_rdev = 0;
 502
 503         if (started_tr == 0) {
 504             if (hfs_start_transaction(hfsmp) != 0) {
 505                                 error = EINVAL;
 506                                 goto out;
 507             }
 508             started_tr = 1;
 509         }
 510
 511         /*
 512          * Reserve some space in the Catalog file.
 513          */
 514         if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
 515             goto out;
 516         }
 517         cat_reserve = 1;
 518
 519         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 520
 521         if (cp->c_blocks > 0) {
 522             printf("hfs_inactive: deleting non-empty%sfile %d, "
 523                    "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
 524                    (int)cp->c_fileid, (int)cp->c_blocks);
 525         }
 526
 527                 //
 528         // release the name pointer in the descriptor so that
 529         // cat_delete() will use the file-id to do the deletion.
 530         // in the case of hard links this is imperative (in the
 531         // case of regular files the fileid and cnid are the
 532         // same so it doesn't matter).
 533         //
 534         cat_releasedesc(&cp->c_desc);
 535
 536         /*
 537          * The descriptor name may be zero,
 538          * in which case the fileid is used.
 539          */
 540         error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
 541
 542         if (error && truncated && (error != ENXIO)) {
 543             printf("hfs_inactive: couldn't delete a truncated file!");
 544         }
 545
 546         /* Update HFS Private Data dir */
 547         if (error == 0) {
 548             hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
 549             if (vnode_isdir(vp)) {
 550                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
 551             }
 552             (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
 553                                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
 554         }
 555
 556         hfs_systemfile_unlock(hfsmp, lockflags);
 557
 558         if (error) {
 559                         goto out;
 560                 }
 561
 562 #if QUOTA
 563         if (hfsmp->hfs_flags & HFS_QUOTAS)
 564             (void)hfs_chkiq(cp, -1, NOCRED, 0);
 565 #endif /* QUOTA */
 566
 567         /* Already set C_NOEXISTS at the beginning of this block */
 568         cp->c_flag &= ~C_DELETED;
 569         cp->c_touch_chgtime = TRUE;
 570         cp->c_touch_modtime = TRUE;
 571
 572         if (error == 0)
 573             hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
 574     }
 575
 576         /*
 577      * A file may have had delayed allocations, in which case hfs_update
 578      * would not have updated the catalog record (cat_update).  We need
 579      * to do that now, before we lose our fork data.  We also need to
 580      * force the update, or hfs_update will again skip the cat_update.
 581          *
 582          * If the file has C_NOEXISTS set, then we can skip the hfs_update call
 583          * because the catalog entry has already been removed.  There would be no point
 584      * to looking up the entry in the catalog to modify it when we already know it's gone
 585          */
 586     if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
 587                 ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime ||
 588                  cp->c_touch_chgtime || cp->c_touch_modtime)) {
 589
 590                         if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
 591                                 cp->c_flag |= C_FORCEUPDATE;
 592                         }
 593                         hfs_update(vp, 0);
 594                 }
 595
 596 out:
 597     if (cat_reserve)
 598         cat_postflight(hfsmp, &cookie, p);
 599
 600     // XXXdbg - have to do this because a goto could have come here
 601     if (started_tr) {
 602         hfs_end_transaction(hfsmp);
 603         started_tr = 0;
 604     }
 605
 606 #if 0
 607 #if CONFIG_PROTECT
 608         /*
 609          * cnode truncate lock and cnode lock are both held exclusive here.
 610          *
 611          * Go ahead and flush the keys out if this cnode is the last fork
 612          * and it is not class F.  Class F keys should not be purged because they only
 613          * exist in memory and have no persistent keys.  Only do this
 614          * if we haven't already done it yet (maybe a vnode skipped inactive
 615          * and went straight to reclaim).  This function gets called from both reclaim and
 616          * inactive, so it will happen first in inactive if possible.
 617          *
 618          * We need to be mindful that all pending IO for this file has already been
 619          * issued and completed before we bzero out the key.  This is because
 620          * if it isn't, tossing the key here could result in garbage IO being
 621          * written (by using the bzero'd key) if the writes are happening asynchronously.
 622          *
 623          * In addition, class A files may have already been purged due to the
 624          * lock event occurring.
 625          */
 626         if (forkcount == 1) {
 627                 struct cprotect *entry = cp->c_cpentry;
 628                 if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
 629                         if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
 630                                 cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
 631                                 bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
 632                                 bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
 633                         }
 634                 }
 635         }
 636 #endif
 637 #endif
 638
 639         return error;
 640 }
 641
 642
 643 /*
 644  * hfs_vnop_inactive
 645  *
 646  * The last usecount on the vnode has gone away, so we need to tear down
 647  * any remaining data still residing in the cnode.  If necessary, write out
 648  * remaining blocks or delete the cnode's entry in the catalog.
 649  */
 650 int
 651 hfs_vnop_inactive(struct vnop_inactive_args *ap)
 652 {
 653         struct vnode *vp = ap->a_vp;
 654         struct cnode *cp;
 655         struct hfsmount *hfsmp = VTOHFS(vp);
 656         struct proc *p = vfs_context_proc(ap->a_context);
 657         int error = 0;
 658         int took_trunc_lock = 0;
 659         enum vtype v_type;
 660
 661         v_type = vnode_vtype(vp);
 662         cp = VTOC(vp);
 663
 664         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
 665             (hfsmp->hfs_freezing_proc == p)) {
 666                 error = 0;
 667                 goto inactive_done;
 668         }
 669
 670         /*
 671          * For safety, do NOT call vnode_recycle from inside this function.  This can cause
 672          * problems in the following scenario:
 673          *
 674          * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
 675          *
 676          * If we're being invoked as a result of a reclaim that was already in-flight, then we
 677          * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
 678          * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
 679          * try to re-enter reclaim again and panic.
 680          *
 681          * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
 682          * 1) last usecount goes away on the vnode (vnode_rele)
 683          * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
 684          *              vnode_recycle called (vnode_put)
 685          * 3) vclean by way of reclaim
 686          *
 687          * In this function we would generally want to call vnode_recycle to speed things
 688          * along to ensure that we don't leak blocks due to open-unlinked files.  However, by
 689          * virtue of being in this function already, we can call hfs_cnode_teardown, which
 690          * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
 691          * there's no entry in the catalog and no backing store anymore.  If that's the case,
 692          * then we really don't care all that much when the vnode actually goes through reclaim.
 693          * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
 694          * unlinked file in the first place should have already called vnode_recycle on the vnode
 695          * to guarantee that it would go through reclaim in a speedy way.
 696          */
 697
 698         if (cp->c_flag & C_NOEXISTS) {
 699                 /*
 700                  * If the cnode has already had its cat entry removed, then
 701                  * just skip to the end. We don't need to do anything here.
 702                  */
 703                 error = 0;
 704                 goto inactive_done;
 705         }
 706
 707         if ((v_type == VREG || v_type == VLNK)) {
 708                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 709                 took_trunc_lock = 1;
 710         }
 711
 712         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 713
 714         /*
 715          * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
 716          * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
 717          */
 718         error = hfs_cnode_teardown (vp, ap->a_context, 0);
 719
 720     /*
 721      * Drop the truncate lock before unlocking the cnode
 722      * (which can potentially perform a vnode_put and
 723      * recycle the vnode which in turn might require the
 724      * truncate lock)
 725      */
 726         if (took_trunc_lock) {
 727             hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
 728         }
 729
 730         hfs_unlock(cp);
 731
 732 inactive_done:
 733
 734         return error;
 735 }
 736
 737
 738 /*
 739  * File clean-up (zero fill and shrink peof).
 740  */
 741
 742 int
 743 hfs_filedone(struct vnode *vp, vfs_context_t context)
 744 {
 745         struct cnode *cp;
 746         struct filefork *fp;
 747         struct hfsmount *hfsmp;
 748         struct rl_entry *invalid_range;
 749         off_t leof;
 750         u_int32_t blks, blocksize;
 751         /* flags for zero-filling sparse ranges */
 752         int cluster_flags = IO_CLOSE;
 753         int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 754
 755         cp = VTOC(vp);
 756         fp = VTOF(vp);
 757         hfsmp = VTOHFS(vp);
 758         leof = fp->ff_size;
 759
 760         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
 761                 return (0);
 762
 763 #if CONFIG_PROTECT
 764         /*
 765          * Figure out if we need to do synchronous IO.
 766          *
 767          * If the file represents a content-protected file, we may need
 768          * to issue synchronous IO when we dispatch to the cluster layer.
 769          * If we didn't, then the IO would go out to the disk asynchronously.
 770          * If the vnode hits the end of inactive before getting reclaimed, the
 771          * content protection keys would be wiped/bzeroed out, and we'd end up
 772          * trying to issue the IO with an invalid key.  This will lead to file
 773          * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
 774          * have completed (though they may be in the track cache).
 775          */
 776         if (cp_fs_protected(VTOVFS(vp))) {
 777                 cluster_flags |= IO_SYNC;
 778                 cluster_zero_flags |= IO_SYNC;
 779         }
 780 #endif
 781
 782         /*
 783          * If we are being invoked from F_SWAPDATAEXTENTS, then we
 784          * need to issue synchronous IO; Unless we are sure that all
 785          * of the data has been written to the disk, we won't know
 786          * that all of the blocks have been allocated properly.
 787          */
 788         if (cp->c_flag & C_SWAPINPROGRESS) {
 789                 cluster_flags |= IO_SYNC;
 790         }
 791
 792         hfs_unlock(cp);
 793         (void) cluster_push(vp, cluster_flags);
 794         hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 795
 796         /*
 797          * Explicitly zero out the areas of file
 798          * that are currently marked invalid.
 799          */
 800         while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
 801                 off_t start = invalid_range->rl_start;
 802                 off_t end = invalid_range->rl_end;
 803
 804                 /* The range about to be written must be validated
 805                  * first, so that VNOP_BLOCKMAP() will return the
 806                  * appropriate mapping for the cluster code:
 807                  */
 808                 rl_remove(start, end, &fp->ff_invalidranges);
 809
 810                 hfs_unlock(cp);
 811                 (void) cluster_write(vp, (struct uio *) 0,
 812                                      leof, end + 1, start, (off_t)0, cluster_zero_flags);
 813                 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 814                 cp->c_flag |= C_MODIFIED;
 815         }
 816         cp->c_flag &= ~C_ZFWANTSYNC;
 817         cp->c_zftimeout = 0;
 818         blocksize = VTOVCB(vp)->blockSize;
 819         blks = leof / blocksize;
 820         if (((off_t)blks * (off_t)blocksize) != leof)
 821                 blks++;
 822         /*
 823          * Shrink the peof to the smallest size neccessary to contain the leof.
 824          */
 825         if (blks < fp->ff_blocks) {
 826                 (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
 827         }
 828
 829         hfs_unlock(cp);
 830         (void) cluster_push(vp, cluster_flags);
 831         hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 832
 833         /*
 834          * If the hfs_truncate didn't happen to flush the vnode's
 835          * information out to disk, force it to be updated now that
 836          * all invalid ranges have been zero-filled and validated:
 837          */
 838         if (cp->c_flag & C_MODIFIED) {
 839                 hfs_update(vp, 0);
 840         }
 841         return (0);
 842 }
 843
 844
 845 /*
 846  * Reclaim a cnode so that it can be used for other purposes.
 847  */
 848 int
 849 hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 850 {
 851         struct vnode *vp = ap->a_vp;
 852         struct cnode *cp;
 853         struct filefork *fp = NULL;
 854         struct filefork *altfp = NULL;
 855         struct hfsmount *hfsmp = VTOHFS(vp);
 856         vfs_context_t ctx = ap->a_context;
 857         int reclaim_cnode = 0;
 858         int err = 0;
 859         enum vtype v_type;
 860
 861         v_type = vnode_vtype(vp);
 862         cp = VTOC(vp);
 863
 864         /*
 865          * We don't take the truncate lock since by the time reclaim comes along,
 866          * all dirty pages have been synced and nobody should be competing
 867          * with us for this thread.
 868          */
 869         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 870
 871         /*
 872          * Sync to disk any remaining data in the cnode/vnode.  This includes
 873          * a call to hfs_update if the cnode has outbound data.
 874          *
 875          * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
 876          * because the catalog entry for this cnode is already gone.
 877          */
 878         if (!ISSET(cp->c_flag, C_NOEXISTS)) {
 879                 err = hfs_cnode_teardown(vp, ctx, 1);
 880         }
 881
 882         /*
 883          * Keep track of an inactive hot file.
 884          */
 885         if (!vnode_isdir(vp) &&
 886             !vnode_issystem(vp) &&
 887             !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) {
 888                 (void) hfs_addhotfile(vp);
 889         }
 890         vnode_removefsref(vp);
 891
 892         /*
 893          * Find file fork for this vnode (if any)
 894          * Also check if another fork is active
 895          */
 896         if (cp->c_vp == vp) {
 897                 fp = cp->c_datafork;
 898                 altfp = cp->c_rsrcfork;
 899
 900                 cp->c_datafork = NULL;
 901                 cp->c_vp = NULL;
 902         } else if (cp->c_rsrc_vp == vp) {
 903                 fp = cp->c_rsrcfork;
 904                 altfp = cp->c_datafork;
 905
 906                 cp->c_rsrcfork = NULL;
 907                 cp->c_rsrc_vp = NULL;
 908         } else {
 909                 panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
 910         }
 911         /*
 912          * On the last fork, remove the cnode from its hash chain.
 913          */
 914         if (altfp == NULL) {
 915                 /* If we can't remove it then the cnode must persist! */
 916                 if (hfs_chashremove(hfsmp, cp) == 0)
 917                         reclaim_cnode = 1;
 918                 /*
 919                  * Remove any directory hints
 920                  */
 921                 if (vnode_isdir(vp)) {
 922                         hfs_reldirhints(cp, 0);
 923                 }
 924
 925                 if(cp->c_flag & C_HARDLINK) {
 926                         hfs_relorigins(cp);
 927                 }
 928         }
 929         /* Release the file fork and related data */
 930         if (fp) {
 931                 /* Dump cached symlink data */
 932                 if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
 933                         FREE(fp->ff_symlinkptr, M_TEMP);
 934                 }
 935                 FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
 936         }
 937
 938         /*
 939          * If there was only one active fork then we can release the cnode.
 940          */
 941         if (reclaim_cnode) {
 942                 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
 943                 hfs_unlock(cp);
 944                 hfs_reclaim_cnode(cp);
 945         }
 946         else  {
 947                 /*
 948                  * cnode in use.  If it is a directory, it could have
 949                  * no live forks. Just release the lock.
 950                  */
 951                 hfs_unlock(cp);
 952         }
 953
 954         vnode_clearfsnode(vp);
 955         return (0);
 956 }
 957
 958
 959 extern int (**hfs_vnodeop_p) (void *);
 960 extern int (**hfs_specop_p)  (void *);
 961 #if FIFO
 962 extern int (**hfs_fifoop_p)  (void *);
 963 #endif
 964
 965 #if CONFIG_HFS_STD
 966 extern int (**hfs_std_vnodeop_p) (void *);
 967 #endif
 968
 969 /*
 970  * hfs_getnewvnode - get new default vnode
 971  *
 972  * The vnode is returned with an iocount and the cnode locked
 973  */
 974 int
 975 hfs_getnewvnode(
 976         struct hfsmount *hfsmp,
 977         struct vnode *dvp,
 978         struct componentname *cnp,
 979         struct cat_desc *descp,
 980         int flags,
 981         struct cat_attr *attrp,
 982         struct cat_fork *forkp,
 983         struct vnode **vpp,
 984         int *out_flags)
 985 {
 986         struct mount *mp = HFSTOVFS(hfsmp);
 987         struct vnode *vp = NULL;
 988         struct vnode **cvpp;
 989         struct vnode *tvp = NULLVP;
 990         struct cnode *cp = NULL;
 991         struct filefork *fp = NULL;
 992         int hfs_standard = 0;
 993         int retval;
 994         int issystemfile;
 995         int wantrsrc;
 996         int hflags = 0;
 997         struct vnode_fsparam vfsp;
 998         enum vtype vtype;
 999 #if QUOTA
1000         int i;
1001 #endif /* QUOTA */
1002
1003         hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
1004
1005         if (attrp->ca_fileid == 0) {
1006                 *vpp = NULL;
1007                 return (ENOENT);
1008         }
1009
1010 #if !FIFO
1011         if (IFTOVT(attrp->ca_mode) == VFIFO) {
1012                 *vpp = NULL;
1013                 return (ENOTSUP);
1014         }
1015 #endif /* !FIFO */
1016         vtype = IFTOVT(attrp->ca_mode);
1017         issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
1018         wantrsrc = flags & GNV_WANTRSRC;
1019
1020         /* Sanity check the vtype and mode */
1021         if (vtype == VBAD) {
1022                 /* Mark the FS as corrupt and bail out */
1023                 hfs_mark_volume_inconsistent(hfsmp);
1024                 return EINVAL;
1025         }
1026
1027         /* Zero out the out_flags */
1028         *out_flags = 0;
1029
1030 #ifdef HFS_CHECK_LOCK_ORDER
1031         /*
1032          * The only case were its permissible to hold the parent cnode
1033          * lock is during a create operation (hfs_makenode) or when
1034          * we don't need the cnode lock (GNV_SKIPLOCK).
1035          */
1036         if ((dvp != NULL) &&
1037             (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
1038             VTOC(dvp)->c_lockowner == current_thread()) {
1039                 panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
1040         }
1041 #endif /* HFS_CHECK_LOCK_ORDER */
1042
1043         /*
1044          * Get a cnode (new or existing)
1045          */
1046         cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc,
1047                                                         (flags & GNV_SKIPLOCK), out_flags, &hflags);
1048
1049         /*
1050          * If the id is no longer valid for lookups we'll get back a NULL cp.
1051          */
1052         if (cp == NULL) {
1053                 return (ENOENT);
1054         }
1055
1056         /*
1057          * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
1058          * descriptor in the cnode as needed if the cnode represents a hardlink.
1059          * We want the caller to get the most up-to-date copy of the descriptor
1060          * as possible. However, we only do anything here if there was a valid vnode.
1061          * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1062          * as it doesn't have a descriptor or cat_attr yet.
1063          *
1064          * If we are about to replace the descriptor with the user-supplied one, then validate
1065          * that the descriptor correctly acknowledges this item is a hardlink.  We could be
1066          * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1067          * result but the file was not yet a hardlink. With sufficient delay between there
1068          * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1069          * call below.  If the descriptor's CNID is the same as the fileID then it must
1070          * not yet have been a hardlink when the lookup occurred.
1071          */
1072
1073         if (!(hfs_checkdeleted(cp))) {
1074                 if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
1075                         /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1076                         if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
1077                                         (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
1078                                 if ((flags & GNV_SKIPLOCK) == 0) {
1079                                         /*
1080                                          * Then we took the lock. Drop it before calling
1081                                          * vnode_put, which may invoke hfs_vnop_inactive and need to take
1082                                          * the cnode lock again.
1083                                          */
1084                                         hfs_unlock(cp);
1085                                 }
1086
1087                                 /*
1088                                  * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1089                                  * force a re-drive in the lookup routine.
1090                                  * Drop the iocount on the vnode obtained from
1091                                  * chash_getcnode if needed.
1092                                  */
1093                                 if (*vpp != NULL) {
1094                                         vnode_put (*vpp);
1095                                         *vpp = NULL;
1096                                 }
1097
1098                                 /*
1099                                  * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1100                                  * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1101                                  * the hash code peeks at those fields without holding the cnode lock because
1102                                  * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
1103                                  * call above.  Since we're bailing out, unset whatever flags we just set, and
1104                                  * wake up all waiters for this cnode.
1105                                  */
1106                                 if (hflags) {
1107                                         hfs_chashwakeup(hfsmp, cp, hflags);
1108                                 }
1109
1110                                 *out_flags = GNV_CAT_ATTRCHANGED;
1111                                 return ERECYCLE;
1112                         }
1113                         else {
1114                                 /*
1115                                  * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1116                                  *
1117                                  * Replacing the descriptor here is fine because we looked up the item without
1118                                  * a vnode in hand before.  If a vnode existed, its identity must be attached to this
1119                                  * item.  We are not susceptible to the lookup fastpath issue at this point.
1120                                  */
1121                                 replace_desc(cp, descp);
1122
1123                                 /*
1124                                  * This item was a hardlink, and its name needed to be updated. By replacing the
1125                                  * descriptor above, we've now updated the cnode's internal representation of
1126                                  * its link ID/CNID, parent ID, and its name.  However, VFS must now be alerted
1127                                  * to the fact that this vnode now has a new parent, since we cannot guarantee
1128                                  * that the new link lived in the same directory as the alternative name for
1129                                  * this item.
1130                                  */
1131                                 if ((*vpp != NULL) && (cnp)) {
1132                                         /* we could be requesting the rsrc of a hardlink file... */
1133                                         vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash,
1134                                                         (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
1135                                 }
1136                         }
1137                 }
1138         }
1139
1140         /* Check if we found a matching vnode */
1141         if (*vpp != NULL) {
1142                 return (0);
1143         }
1144
1145         /*
1146          * If this is a new cnode then initialize it.
1147          */
1148         if (ISSET(cp->c_hflag, H_ALLOC)) {
1149                 lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
1150 #if HFS_COMPRESSION
1151                 cp->c_decmp = NULL;
1152 #endif
1153
1154                 /* Make sure its still valid (ie exists on disk). */
1155                 if (!(flags & GNV_CREATE)) {
1156                         int error = 0;
1157                         if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
1158                                 hfs_chash_abort(hfsmp, cp);
1159                                 if ((flags & GNV_SKIPLOCK) == 0) {
1160                                         hfs_unlock(cp);
1161                                 }
1162                                 hfs_reclaim_cnode(cp);
1163                                 *vpp = NULL;
1164                                 /*
1165                                  * If we hit this case, that means that the entry was there in the catalog when
1166                                  * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
1167                                  * that we checked the catalog and the time we went to get a vnode/cnode for it,
1168                                  * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
1169                                  * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1170                                  * an ENOENT.  To indicate to the caller that they should really double-check the
1171                                  * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1172                                  * in the output flags.
1173                                  */
1174                                 if (error == ENOENT) {
1175                                         *out_flags = GNV_CAT_DELETED;
1176                                         return ENOENT;
1177                                 }
1178
1179                                 /*
1180                                  * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1181                                  * this function as an argument because the catalog may have changed w.r.t hardlink
1182                                  * link counts and the firstlink field.  If that validation check fails, then let
1183                                  * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1184                                  */
1185                                 if (error == ERECYCLE) {
1186                                         *out_flags = GNV_CAT_ATTRCHANGED;
1187                                         return (ERECYCLE);
1188                                 }
1189                         }
1190                 }
1191                 bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
1192                 bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
1193
1194                 /* The name was inherited so clear descriptor state... */
1195                 descp->cd_namelen = 0;
1196                 descp->cd_nameptr = NULL;
1197                 descp->cd_flags &= ~CD_HASBUF;
1198
1199                 /* Tag hardlinks */
1200                 if ((vtype == VREG || vtype == VDIR) &&
1201                     ((descp->cd_cnid != attrp->ca_fileid) ||
1202                      (attrp->ca_recflags & kHFSHasLinkChainMask))) {
1203                         cp->c_flag |= C_HARDLINK;
1204                 }
1205                 /*
1206                  * Fix-up dir link counts.
1207                  *
1208                  * Earlier versions of Leopard used ca_linkcount for posix
1209                  * nlink support (effectively the sub-directory count + 2).
1210                  * That is now accomplished using the ca_dircount field with
1211                  * the corresponding kHFSHasFolderCountMask flag.
1212                  *
1213                  * For directories the ca_linkcount is the true link count,
1214                  * tracking the number of actual hardlinks to a directory.
1215                  *
1216                  * We only do this if the mount has HFS_FOLDERCOUNT set;
1217                  * at the moment, we only set that for HFSX volumes.
1218                  */
1219                 if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&
1220                     (vtype == VDIR) &&
1221                     !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
1222                     (cp->c_attr.ca_linkcount > 1)) {
1223                         if (cp->c_attr.ca_entries == 0)
1224                                 cp->c_attr.ca_dircount = 0;
1225                         else
1226                                 cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
1227
1228                         cp->c_attr.ca_linkcount = 1;
1229                         cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
1230                         if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
1231                                 cp->c_flag |= C_MODIFIED;
1232                 }
1233 #if QUOTA
1234                 if (hfsmp->hfs_flags & HFS_QUOTAS) {
1235                         for (i = 0; i < MAXQUOTAS; i++)
1236                                 cp->c_dquot[i] = NODQUOT;
1237                 }
1238 #endif /* QUOTA */
1239                 /* Mark the output flag that we're vending a new cnode */
1240                 *out_flags |= GNV_NEW_CNODE;
1241         }
1242
1243         if (vtype == VDIR) {
1244                 if (cp->c_vp != NULL)
1245                         panic("hfs_getnewvnode: orphaned vnode (data)");
1246                 cvpp = &cp->c_vp;
1247         } else {
1248                 if (forkp && attrp->ca_blocks < forkp->cf_blocks)
1249                         panic("hfs_getnewvnode: bad ca_blocks (too small)");
1250                 /*
1251                  * Allocate and initialize a file fork...
1252                  */
1253                 MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork),
1254                         M_HFSFORK, M_WAITOK);
1255                 fp->ff_cp = cp;
1256                 if (forkp)
1257                         bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
1258                 else
1259                         bzero(&fp->ff_data, sizeof(struct cat_fork));
1260                 rl_init(&fp->ff_invalidranges);
1261                 fp->ff_sysfileinfo = 0;
1262
1263                 if (wantrsrc) {
1264                         if (cp->c_rsrcfork != NULL)
1265                                 panic("hfs_getnewvnode: orphaned rsrc fork");
1266                         if (cp->c_rsrc_vp != NULL)
1267                                 panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1268                         cp->c_rsrcfork = fp;
1269                         cvpp = &cp->c_rsrc_vp;
1270                         if ( (tvp = cp->c_vp) != NULLVP )
1271                                 cp->c_flag |= C_NEED_DVNODE_PUT;
1272                 } else {
1273                         if (cp->c_datafork != NULL)
1274                                 panic("hfs_getnewvnode: orphaned data fork");
1275                         if (cp->c_vp != NULL)
1276                                 panic("hfs_getnewvnode: orphaned vnode (data)");
1277                         cp->c_datafork = fp;
1278                         cvpp = &cp->c_vp;
1279                         if ( (tvp = cp->c_rsrc_vp) != NULLVP)
1280                                 cp->c_flag |= C_NEED_RVNODE_PUT;
1281                 }
1282         }
1283         if (tvp != NULLVP) {
1284                 /*
1285                  * grab an iocount on the vnode we weren't
1286                  * interested in (i.e. we want the resource fork
1287                  * but the cnode already has the data fork)
1288                  * to prevent it from being
1289                  * recycled by us when we call vnode_create
1290                  * which will result in a deadlock when we
1291                  * try to take the cnode lock in hfs_vnop_fsync or
1292                  * hfs_vnop_reclaim... vnode_get can be called here
1293                  * because we already hold the cnode lock which will
1294                  * prevent the vnode from changing identity until
1295                  * we drop it.. vnode_get will not block waiting for
1296                  * a change of state... however, it will return an
1297                  * error if the current iocount == 0 and we've already
1298                  * started to terminate the vnode... we don't need/want to
1299                  * grab an iocount in the case since we can't cause
1300                  * the fileystem to be re-entered on this thread for this vp
1301                  *
1302                  * the matching vnode_put will happen in hfs_unlock
1303                  * after we've dropped the cnode lock
1304                  */
1305                 if ( vnode_get(tvp) != 0)
1306                         cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
1307         }
1308         vfsp.vnfs_mp = mp;
1309         vfsp.vnfs_vtype = vtype;
1310         vfsp.vnfs_str = "hfs";
1311         if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
1312                 vfsp.vnfs_dvp = NULL;  /* no parent for me! */
1313                 vfsp.vnfs_cnp = NULL;  /* no name for me! */
1314         } else {
1315                 vfsp.vnfs_dvp = dvp;
1316                 vfsp.vnfs_cnp = cnp;
1317         }
1318         vfsp.vnfs_fsnode = cp;
1319
1320         /*
1321          * Special Case HFS Standard VNOPs from HFS+, since
1322          * HFS standard is readonly/deprecated as of 10.6
1323          */
1324
1325 #if FIFO
1326         if (vtype == VFIFO )
1327                 vfsp.vnfs_vops = hfs_fifoop_p;
1328         else
1329 #endif
1330         if (vtype == VBLK || vtype == VCHR)
1331                 vfsp.vnfs_vops = hfs_specop_p;
1332 #if CONFIG_HFS_STD
1333         else if (hfs_standard)
1334                 vfsp.vnfs_vops = hfs_std_vnodeop_p;
1335 #endif
1336         else
1337                 vfsp.vnfs_vops = hfs_vnodeop_p;
1338
1339         if (vtype == VBLK || vtype == VCHR)
1340                 vfsp.vnfs_rdev = attrp->ca_rdev;
1341         else
1342                 vfsp.vnfs_rdev = 0;
1343
1344         if (forkp)
1345                 vfsp.vnfs_filesize = forkp->cf_size;
1346         else
1347                 vfsp.vnfs_filesize = 0;
1348
1349         vfsp.vnfs_flags = VNFS_ADDFSREF;
1350         if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
1351                 vfsp.vnfs_flags |= VNFS_NOCACHE;
1352
1353         /* Tag system files */
1354         vfsp.vnfs_marksystem = issystemfile;
1355
1356         /* Tag root directory */
1357         if (descp->cd_cnid == kHFSRootFolderID)
1358                 vfsp.vnfs_markroot = 1;
1359         else
1360                 vfsp.vnfs_markroot = 0;
1361
1362         if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) {
1363                 if (fp) {
1364                         if (fp == cp->c_datafork)
1365                                 cp->c_datafork = NULL;
1366                         else
1367                                 cp->c_rsrcfork = NULL;
1368
1369                         FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
1370                 }
1371                 /*
1372                  * If this is a newly created cnode or a vnode reclaim
1373                  * occurred during the attachment, then cleanup the cnode.
1374                  */
1375                 if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
1376                         hfs_chash_abort(hfsmp, cp);
1377                         hfs_reclaim_cnode(cp);
1378                 }
1379                 else {
1380                         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1381                         if ((flags & GNV_SKIPLOCK) == 0){
1382                                 hfs_unlock(cp);
1383                         }
1384                 }
1385                 *vpp = NULL;
1386                 return (retval);
1387         }
1388         vp = *cvpp;
1389         vnode_settag(vp, VT_HFS);
1390         if (cp->c_flag & C_HARDLINK) {
1391                 vnode_setmultipath(vp);
1392         }
1393         /*
1394          * Tag resource fork vnodes as needing an VNOP_INACTIVE
1395          * so that any deferred removes (open unlinked files)
1396          * have the chance to process the resource fork.
1397          */
1398         if (VNODE_IS_RSRC(vp)) {
1399                 int err;
1400                 KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0);
1401
1402                 /* Force VL_NEEDINACTIVE on this vnode */
1403                 err = vnode_ref(vp);
1404                 if (err == 0) {
1405                         vnode_rele(vp);
1406                 }
1407         }
1408         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1409
1410         /*
1411          * Stop tracking an active hot file.
1412          */
1413         if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
1414                 (void) hfs_removehotfile(vp);
1415         }
1416
1417 #if CONFIG_PROTECT
1418         /* Initialize the cp data structures. The key should be in place now. */
1419         if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
1420                 cp_entry_init(cp, mp);
1421         }
1422 #endif
1423
1424         *vpp = vp;
1425         return (0);
1426 }
1427
1428
1429 static void
1430 hfs_reclaim_cnode(struct cnode *cp)
1431 {
1432 #if QUOTA
1433         int i;
1434
1435         for (i = 0; i < MAXQUOTAS; i++) {
1436                 if (cp->c_dquot[i] != NODQUOT) {
1437                         dqreclaim(cp->c_dquot[i]);
1438                         cp->c_dquot[i] = NODQUOT;
1439                 }
1440         }
1441 #endif /* QUOTA */
1442
1443         /*
1444          * If the descriptor has a name then release it
1445          */
1446         if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
1447                 const char *nameptr;
1448
1449                 nameptr = (const char *) cp->c_desc.cd_nameptr;
1450                 cp->c_desc.cd_nameptr = 0;
1451                 cp->c_desc.cd_flags &= ~CD_HASBUF;
1452                 cp->c_desc.cd_namelen = 0;
1453                 vfs_removename(nameptr);
1454         }
1455
1456         /*
1457          * We only call this function if we are in hfs_vnop_reclaim and
1458          * attempting to reclaim a cnode with only one live fork.  Because the vnode
1459          * went through reclaim, any future attempts to use this item will have to
1460          * go through lookup again, which will need to create a new vnode.  Thus,
1461          * destroying the locks below is safe.
1462          */
1463
1464         lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
1465         lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
1466 #if HFS_COMPRESSION
1467         if (cp->c_decmp) {
1468                 decmpfs_cnode_destroy(cp->c_decmp);
1469                 FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
1470         }
1471 #endif
1472 #if CONFIG_PROTECT
1473         cp_entry_destroy(cp->c_cpentry);
1474         cp->c_cpentry = NULL;
1475 #endif
1476
1477
1478         bzero(cp, sizeof(struct cnode));
1479         FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
1480 }
1481
1482
1483 /*
1484  * hfs_valid_cnode
1485  *
1486  * This function is used to validate data that is stored in-core against what is contained
1487  * in the catalog.  Common uses include validating that the parent-child relationship still exist
1488  * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1489  * the point of the check.
1490  */
1491 int
1492 hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
1493                 cnid_t cnid, struct cat_attr *cattr, int *error)
1494 {
1495         struct cat_attr attr;
1496         struct cat_desc cndesc;
1497         int stillvalid = 0;
1498         int lockflags;
1499
1500         /* System files are always valid */
1501         if (cnid < kHFSFirstUserCatalogNodeID) {
1502                 *error = 0;
1503                 return (1);
1504         }
1505
1506         /* XXX optimization:  check write count in dvp */
1507
1508         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1509
1510         if (dvp && cnp) {
1511                 int lookup = 0;
1512                 struct cat_fork fork;
1513                 bzero(&cndesc, sizeof(cndesc));
1514                 cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
1515                 cndesc.cd_namelen = cnp->cn_namelen;
1516                 cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
1517                 cndesc.cd_hint = VTOC(dvp)->c_childhint;
1518
1519                 /*
1520                  * We have to be careful when calling cat_lookup.  The result argument
1521                  * 'attr' may get different results based on whether or not you ask
1522                  * for the filefork to be supplied as output.  This is because cat_lookupbykey
1523                  * will attempt to do basic validation/smoke tests against the resident
1524                  * extents if there are no overflow extent records, but it needs someplace
1525                  * in memory to store the on-disk fork structures.
1526                  *
1527                  * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1528                  * do the same here, to verify that block count differences are not
1529                  * due to calling the function with different styles.  cat_lookupbykey
1530                  * will request the volume be fsck'd if there is true on-disk corruption
1531                  * where the number of blocks does not match the number generated by
1532                  * summing the number of blocks in the resident extents.
1533                  */
1534
1535                 lookup = cat_lookup (hfsmp, &cndesc, 0, 0, NULL, &attr, &fork, NULL);
1536
1537                 if ((lookup == 0) && (cnid == attr.ca_fileid)) {
1538                         stillvalid = 1;
1539                         *error = 0;
1540                 }
1541                 else {
1542                         *error = ENOENT;
1543                 }
1544
1545                 /*
1546                  * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1547                  * race.  Specifically, if there is no vnode/cnode pair for the directory entry
1548                  * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
1549                  * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1550                  * changing in between the time we do the cat_lookup there and the time we re-grab the
1551                  * catalog lock above to do another cat_lookup.
1552                  *
1553                  * However, we need to check more than just the CNID and parent-child name relationships above.
1554                  * Hardlinks can suffer the same race in the following scenario:  Suppose we do a
1555                  * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have
1556                  * the cat_attr in hand (passed in above).  But in between then and now, the vnode was
1557                  * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1558                  * a chance to do anything.  This is possible if there are a lot of threads thrashing around
1559                  * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
1560                  * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1561                  * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
1562                  * already exists, as it does in the case of rename and delete.
1563                  */
1564                 if (stillvalid && cattr != NULL) {
1565                         if (cattr->ca_linkcount != attr.ca_linkcount) {
1566                                 stillvalid = 0;
1567                                 *error = ERECYCLE;
1568                                 goto notvalid;
1569                         }
1570
1571                         if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
1572                                 stillvalid = 0;
1573                                 *error = ERECYCLE;
1574                                 goto notvalid;
1575                         }
1576
1577                         if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
1578                                 stillvalid = 0;
1579                                 *error = ERECYCLE;
1580                                 goto notvalid;
1581                         }
1582
1583                         if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
1584                                 stillvalid = 0;
1585                                 *error = ERECYCLE;
1586                                 goto notvalid;
1587                         }
1588                 }
1589         } else {
1590                 if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
1591                         stillvalid = 1;
1592                         *error = 0;
1593                 }
1594                 else {
1595                         *error = ENOENT;
1596                 }
1597         }
1598 notvalid:
1599         hfs_systemfile_unlock(hfsmp, lockflags);
1600
1601         return (stillvalid);
1602 }
1603
1604
1605 /*
1606  * Per HI and Finder requirements, HFS should add in the
1607  * date/time that a particular directory entry was added
1608  * to the containing directory.
1609  * This is stored in the extended Finder Info for the
1610  * item in question.
1611  *
1612  * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1613  * We must ignore user attempts to set this part of the finderinfo, and
1614  * so we need to save a local copy of the date added, write in the user
1615  * finderinfo, then stuff the value back in.
1616  */
1617 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
1618         u_int8_t *finfo = NULL;
1619
1620         /* overlay the FinderInfo to the correct pointer, and advance */
1621         finfo = (u_int8_t*)attrp->ca_finderinfo;
1622         finfo = finfo + 16;
1623
1624         /*
1625          * Make sure to write it out as big endian, since that's how
1626          * finder info is defined.
1627          *
1628          * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1629          */
1630         if (S_ISREG(attrp->ca_mode)) {
1631                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1632                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1633                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1634         }
1635         else if (S_ISDIR(attrp->ca_mode)) {
1636                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1637                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1638                                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1639         }
1640         /* If it were neither directory/file, then we'd bail out */
1641         return;
1642 }
1643
1644
1645 u_int32_t hfs_get_dateadded (struct cnode *cp) {
1646         u_int8_t *finfo = NULL;
1647         u_int32_t dateadded = 0;
1648
1649         if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
1650                 /* Date added was never set.  Return 0. */
1651                 return dateadded;
1652         }
1653
1654
1655         /* overlay the FinderInfo to the correct pointer, and advance */
1656         finfo = (u_int8_t*)cp->c_finderinfo;
1657         finfo = finfo + 16;
1658
1659         /*
1660          * FinderInfo is written out in big endian... make sure to convert it to host
1661          * native before we use it.
1662          */
1663         if (S_ISREG(cp->c_attr.ca_mode)) {
1664                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1665                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1666         }
1667         else if (S_ISDIR(cp->c_attr.ca_mode)) {
1668                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1669                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1670         }
1671
1672         return dateadded;
1673 }
1674
1675 /*
1676  * Per HI and Finder requirements, HFS maintains a "write/generation count"
1677  * for each file that is incremented on any write & pageout.  It should start
1678  * at 1 to reserve "0" as a special value.  If it should ever wrap around,
1679  * it will skip using 0.
1680  *
1681  * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1682  * We must ignore user attempts to set this part of the finderinfo, and
1683  * so we need to save a local copy of the date added, write in the user
1684  * finderinfo, then stuff the value back in.
1685  */
1686 void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) {
1687         u_int8_t *finfo = NULL;
1688
1689         /* overlay the FinderInfo to the correct pointer, and advance */
1690         finfo = (u_int8_t*)attrp->ca_finderinfo;
1691         finfo = finfo + 16;
1692
1693         /*
1694          * Make sure to write it out as big endian, since that's how
1695          * finder info is defined.
1696          *
1697          * Generation count is only supported for files.
1698          */
1699         if (S_ISREG(attrp->ca_mode)) {
1700                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1701                 extinfo->write_gen_counter = OSSwapHostToBigInt32(gencount);
1702         }
1703
1704         /* If it were neither directory/file, then we'd bail out */
1705         return;
1706 }
1707
1708 /* Increase the gen count by 1; if it wraps around to 0, increment by two */
1709 uint32_t hfs_incr_gencount (struct cnode *cp) {
1710         u_int8_t *finfo = NULL;
1711         u_int32_t gcount = 0;
1712
1713         /* overlay the FinderInfo to the correct pointer, and advance */
1714         finfo = (u_int8_t*)cp->c_finderinfo;
1715         finfo = finfo + 16;
1716
1717         /*
1718          * FinderInfo is written out in big endian... make sure to convert it to host
1719          * native before we use it.
1720          */
1721         if (S_ISREG(cp->c_attr.ca_mode)) {
1722                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1723                 gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
1724
1725                 /* Was it zero to begin with (file originated in 10.8 or earlier?) */
1726                 if (gcount == 0) {
1727                         gcount++;
1728                 }
1729
1730                 /* now bump it */
1731                 gcount++;
1732
1733                 /* Did it wrap around ? */
1734                 if (gcount == 0) {
1735                         gcount++;
1736                 }
1737                 extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount);
1738         }
1739         else {
1740                 gcount = 0;
1741         }
1742
1743         return gcount;
1744 }
1745
1746 /* Getter for the gen count */
1747 u_int32_t hfs_get_gencount (struct cnode *cp) {
1748         u_int8_t *finfo = NULL;
1749         u_int32_t gcount = 0;
1750
1751         /* overlay the FinderInfo to the correct pointer, and advance */
1752         finfo = (u_int8_t*)cp->c_finderinfo;
1753         finfo = finfo + 16;
1754
1755         /*
1756          * FinderInfo is written out in big endian... make sure to convert it to host
1757          * native before we use it.
1758          */
1759         if (S_ISREG(cp->c_attr.ca_mode)) {
1760                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1761                 gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
1762
1763                 /*
1764                  * Is it zero?  File might originate in 10.8 or earlier. We lie and bump it to 1,
1765                  * since the incrementer code is able to handle this case and will double-increment
1766                  * for us.
1767                  */
1768                 if (gcount == 0) {
1769                         gcount++;
1770                 }
1771         }
1772         else {
1773                 gcount = 0;
1774         }
1775
1776         return gcount;
1777 }
1778
1779 /*
1780  * Touch cnode times based on c_touch_xxx flags
1781  *
1782  * cnode must be locked exclusive
1783  *
1784  * This will also update the volume modify time
1785  */
1786 void
1787 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
1788 {
1789         vfs_context_t ctx;
1790         /* don't modify times if volume is read-only */
1791         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1792                 cp->c_touch_acctime = FALSE;
1793                 cp->c_touch_chgtime = FALSE;
1794                 cp->c_touch_modtime = FALSE;
1795                 return;
1796         }
1797 #if CONFIG_HFS_STD
1798         else if (hfsmp->hfs_flags & HFS_STANDARD) {
1799         /* HFS Standard doesn't support access times */
1800                 cp->c_touch_acctime = FALSE;
1801         }
1802 #endif
1803
1804         ctx = vfs_context_current();
1805         /*
1806          * Skip access time updates if:
1807          *      . MNT_NOATIME is set
1808          *      . a file system freeze is in progress
1809          *      . a file system resize is in progress
1810          *      . the vnode associated with this cnode is marked for rapid aging
1811          */
1812         if (cp->c_touch_acctime) {
1813                 if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
1814                     (hfsmp->hfs_freezing_proc != NULL) ||
1815                     (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
1816                     (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
1817
1818                         cp->c_touch_acctime = FALSE;
1819                 }
1820         }
1821         if (cp->c_touch_acctime || cp->c_touch_chgtime ||
1822                 cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
1823                 struct timeval tv;
1824                 int touchvol = 0;
1825
1826                 microtime(&tv);
1827
1828                 if (cp->c_touch_acctime) {
1829                         cp->c_atime = tv.tv_sec;
1830                         /*
1831                          * When the access time is the only thing changing
1832                          * then make sure its sufficiently newer before
1833                          * committing it to disk.
1834                          */
1835                         if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) >
1836                               ATIME_ONDISK_ACCURACY)) {
1837                                 cp->c_flag |= C_MODIFIED;
1838                         }
1839                         cp->c_touch_acctime = FALSE;
1840                 }
1841                 if (cp->c_touch_modtime) {
1842                         cp->c_mtime = tv.tv_sec;
1843                         cp->c_touch_modtime = FALSE;
1844                         cp->c_flag |= C_MODIFIED;
1845                         touchvol = 1;
1846 #if CONFIG_HFS_STD
1847                         /*
1848                          * HFS dates that WE set must be adjusted for DST
1849                          */
1850                         if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
1851                                 cp->c_mtime += 3600;
1852                         }
1853 #endif
1854                 }
1855                 if (cp->c_touch_chgtime) {
1856                         cp->c_ctime = tv.tv_sec;
1857                         cp->c_touch_chgtime = FALSE;
1858                         cp->c_flag |= C_MODIFIED;
1859                         touchvol = 1;
1860                 }
1861
1862                 if (cp->c_flag & C_NEEDS_DATEADDED) {
1863                         hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
1864                         cp->c_flag |= C_MODIFIED;
1865                         /* untwiddle the bit */
1866                         cp->c_flag &= ~C_NEEDS_DATEADDED;
1867                         touchvol = 1;
1868                 }
1869
1870                 /* Touch the volume modtime if needed */
1871                 if (touchvol) {
1872                         MarkVCBDirty(hfsmp);
1873                         HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
1874                 }
1875         }
1876 }
1877
1878 /*
1879  * Lock a cnode.
1880  */
1881 int
1882 hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
1883 {
1884         void * thread = current_thread();
1885
1886         if (cp->c_lockowner == thread) {
1887                 /* Only the extents and bitmap files support lock recursion. */
1888                 if ((cp->c_fileid == kHFSExtentsFileID) ||
1889                     (cp->c_fileid == kHFSAllocationFileID)) {
1890                         cp->c_syslockcount++;
1891                 } else {
1892                         panic("hfs_lock: locking against myself!");
1893                 }
1894         } else if (locktype == HFS_SHARED_LOCK) {
1895                 lck_rw_lock_shared(&cp->c_rwlock);
1896                 cp->c_lockowner = HFS_SHARED_OWNER;
1897
1898         } else { /* HFS_EXCLUSIVE_LOCK */
1899                 lck_rw_lock_exclusive(&cp->c_rwlock);
1900                 cp->c_lockowner = thread;
1901
1902                 /* Only the extents and bitmap files support lock recursion. */
1903                 if ((cp->c_fileid == kHFSExtentsFileID) ||
1904                     (cp->c_fileid == kHFSAllocationFileID)) {
1905                         cp->c_syslockcount = 1;
1906                 }
1907         }
1908
1909 #ifdef HFS_CHECK_LOCK_ORDER
1910         /*
1911          * Regular cnodes (non-system files) cannot be locked
1912          * while holding the journal lock or a system file lock.
1913          */
1914         if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
1915             ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
1916                 vnode_t vp = NULLVP;
1917
1918                 /* Find corresponding vnode. */
1919                 if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
1920                         vp = cp->c_vp;
1921                 } else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
1922                         vp = cp->c_rsrc_vp;
1923                 }
1924                 if (vp != NULLVP) {
1925                         struct hfsmount *hfsmp = VTOHFS(vp);
1926
1927                         if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
1928                                 /* This will eventually be a panic here. */
1929                                 printf("hfs_lock: bad lock order (cnode after journal)\n");
1930                         }
1931                         if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
1932                                 panic("hfs_lock: bad lock order (cnode after catalog)");
1933                         }
1934                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
1935                                 panic("hfs_lock: bad lock order (cnode after attribute)");
1936                         }
1937                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
1938                                 panic("hfs_lock: bad lock order (cnode after extents)");
1939                         }
1940                 }
1941         }
1942 #endif /* HFS_CHECK_LOCK_ORDER */
1943
1944         /*
1945          * Skip cnodes for regular files that no longer exist
1946          * (marked deleted, catalog entry gone).
1947          */
1948         if (((flags & HFS_LOCK_ALLOW_NOEXISTS) == 0) &&
1949             ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
1950             (cp->c_flag & C_NOEXISTS)) {
1951                 hfs_unlock(cp);
1952                 return (ENOENT);
1953         }
1954         return (0);
1955 }
1956
1957 /*
1958  * Lock a pair of cnodes.
1959  */
1960 int
1961 hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfs_locktype locktype)
1962 {
1963         struct cnode *first, *last;
1964         int error;
1965
1966         /*
1967          * If cnodes match then just lock one.
1968          */
1969         if (cp1 == cp2) {
1970                 return hfs_lock(cp1, locktype, HFS_LOCK_DEFAULT);
1971         }
1972
1973         /*
1974          * Lock in cnode address order.
1975          */
1976         if (cp1 < cp2) {
1977                 first = cp1;
1978                 last = cp2;
1979         } else {
1980                 first = cp2;
1981                 last = cp1;
1982         }
1983
1984         if ( (error = hfs_lock(first, locktype, HFS_LOCK_DEFAULT))) {
1985                 return (error);
1986         }
1987         if ( (error = hfs_lock(last, locktype, HFS_LOCK_DEFAULT))) {
1988                 hfs_unlock(first);
1989                 return (error);
1990         }
1991         return (0);
1992 }
1993
1994 /*
1995  * Check ordering of two cnodes. Return true if they are are in-order.
1996  */
1997 static int
1998 hfs_isordered(struct cnode *cp1, struct cnode *cp2)
1999 {
2000         if (cp1 == cp2)
2001                 return (0);
2002         if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
2003                 return (1);
2004         if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
2005                 return (0);
2006         /*
2007          * Locking order is cnode address order.
2008          */
2009         return (cp1 < cp2);
2010 }
2011
2012 /*
2013  * Acquire 4 cnode locks.
2014  *   - locked in cnode address order (lesser address first).
2015  *   - all or none of the locks are taken
2016  *   - only one lock taken per cnode (dup cnodes are skipped)
2017  *   - some of the cnode pointers may be null
2018  */
2019 int
2020 hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
2021              struct cnode *cp4, enum hfs_locktype locktype, struct cnode **error_cnode)
2022 {
2023         struct cnode * a[3];
2024         struct cnode * b[3];
2025         struct cnode * list[4];
2026         struct cnode * tmp;
2027         int i, j, k;
2028         int error;
2029         if (error_cnode) {
2030                 *error_cnode = NULL;
2031         }
2032
2033         if (hfs_isordered(cp1, cp2)) {
2034                 a[0] = cp1; a[1] = cp2;
2035         } else {
2036                 a[0] = cp2; a[1] = cp1;
2037         }
2038         if (hfs_isordered(cp3, cp4)) {
2039                 b[0] = cp3; b[1] = cp4;
2040         } else {
2041                 b[0] = cp4; b[1] = cp3;
2042         }
2043         a[2] = (struct cnode *)0xffffffff;  /* sentinel value */
2044         b[2] = (struct cnode *)0xffffffff;  /* sentinel value */
2045
2046         /*
2047          * Build the lock list, skipping over duplicates
2048          */
2049         for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
2050                 tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
2051                 if (k == 0 || tmp != list[k-1])
2052                         list[k++] = tmp;
2053         }
2054
2055         /*
2056          * Now we can lock using list[0 - k].
2057          * Skip over NULL entries.
2058          */
2059         for (i = 0; i < k; ++i) {
2060                 if (list[i])
2061                         if ((error = hfs_lock(list[i], locktype, HFS_LOCK_DEFAULT))) {
2062                                 /* Only stuff error_cnode if requested */
2063                                 if (error_cnode) {
2064                                         *error_cnode = list[i];
2065                                 }
2066                                 /* Drop any locks we acquired. */
2067                                 while (--i >= 0) {
2068                                         if (list[i])
2069                                                 hfs_unlock(list[i]);
2070                                 }
2071                                 return (error);
2072                         }
2073         }
2074         return (0);
2075 }
2076
2077
2078 /*
2079  * Unlock a cnode.
2080  */
2081 void
2082 hfs_unlock(struct cnode *cp)
2083 {
2084         vnode_t rvp = NULLVP;
2085         vnode_t vp = NULLVP;
2086         u_int32_t c_flag;
2087         void *lockowner;
2088
2089         /*
2090          * Only the extents and bitmap file's support lock recursion.
2091          */
2092         if ((cp->c_fileid == kHFSExtentsFileID) ||
2093             (cp->c_fileid == kHFSAllocationFileID)) {
2094                 if (--cp->c_syslockcount > 0) {
2095                         return;
2096                 }
2097         }
2098         c_flag = cp->c_flag;
2099         cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE);
2100
2101         if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
2102                 vp = cp->c_vp;
2103         }
2104         if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
2105                 rvp = cp->c_rsrc_vp;
2106         }
2107
2108         lockowner = cp->c_lockowner;
2109         if (lockowner == current_thread()) {
2110             cp->c_lockowner = NULL;
2111             lck_rw_unlock_exclusive(&cp->c_rwlock);
2112         } else {
2113             lck_rw_unlock_shared(&cp->c_rwlock);
2114         }
2115
2116         /* Perform any vnode post processing after cnode lock is dropped. */
2117         if (vp) {
2118                 if (c_flag & C_NEED_DATA_SETSIZE)
2119                         ubc_setsize(vp, 0);
2120                 if (c_flag & C_NEED_DVNODE_PUT)
2121                         vnode_put(vp);
2122         }
2123         if (rvp) {
2124                 if (c_flag & C_NEED_RSRC_SETSIZE)
2125                         ubc_setsize(rvp, 0);
2126                 if (c_flag & C_NEED_RVNODE_PUT)
2127                         vnode_put(rvp);
2128         }
2129 }
2130
2131 /*
2132  * Unlock a pair of cnodes.
2133  */
2134 void
2135 hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
2136 {
2137         hfs_unlock(cp1);
2138         if (cp2 != cp1)
2139                 hfs_unlock(cp2);
2140 }
2141
2142 /*
2143  * Unlock a group of cnodes.
2144  */
2145 void
2146 hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
2147 {
2148         struct cnode * list[4];
2149         int i, k = 0;
2150
2151         if (cp1) {
2152                 hfs_unlock(cp1);
2153                 list[k++] = cp1;
2154         }
2155         if (cp2) {
2156                 for (i = 0; i < k; ++i) {
2157                         if (list[i] == cp2)
2158                                 goto skip1;
2159                 }
2160                 hfs_unlock(cp2);
2161                 list[k++] = cp2;
2162         }
2163 skip1:
2164         if (cp3) {
2165                 for (i = 0; i < k; ++i) {
2166                         if (list[i] == cp3)
2167                                 goto skip2;
2168                 }
2169                 hfs_unlock(cp3);
2170                 list[k++] = cp3;
2171         }
2172 skip2:
2173         if (cp4) {
2174                 for (i = 0; i < k; ++i) {
2175                         if (list[i] == cp4)
2176                                 return;
2177                 }
2178                 hfs_unlock(cp4);
2179         }
2180 }
2181
2182
2183 /*
2184  * Protect a cnode against a truncation.
2185  *
2186  * Used mainly by read/write since they don't hold the
2187  * cnode lock across calls to the cluster layer.
2188  *
2189  * The process doing a truncation must take the lock
2190  * exclusive. The read/write processes can take it
2191  * shared.  The locktype argument is the same as supplied to
2192  * hfs_lock.
2193  */
2194 void
2195 hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2196 {
2197         void * thread = current_thread();
2198
2199         if (cp->c_truncatelockowner == thread) {
2200                 /*
2201                  * Ignore grabbing the lock if it the current thread already
2202                  * holds exclusive lock.
2203                  *
2204                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2205                  * the file does not change sizes while we are paging in.  However,
2206                  * we may already hold the lock exclusive due to another
2207                  * VNOP from earlier in the call stack.  So if we already hold
2208                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2209                  * it's in the recursive case.
2210                  */
2211                 if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
2212                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2213                 }
2214         } else if (locktype == HFS_SHARED_LOCK) {
2215                 lck_rw_lock_shared(&cp->c_truncatelock);
2216                 cp->c_truncatelockowner = HFS_SHARED_OWNER;
2217         } else { /* HFS_EXCLUSIVE_LOCK */
2218                 lck_rw_lock_exclusive(&cp->c_truncatelock);
2219                 cp->c_truncatelockowner = thread;
2220         }
2221 }
2222
2223
2224 /*
2225  * Attempt to get the truncate lock.  If it cannot be acquired, error out.
2226  * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2227  * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2228  * temporarily need to disable V2 semantics.
2229  */
2230 int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2231 {
2232         void * thread = current_thread();
2233         boolean_t didlock = false;
2234
2235         if (cp->c_truncatelockowner == thread) {
2236                 /*
2237                  * Ignore grabbing the lock if the current thread already
2238                  * holds exclusive lock.
2239                  *
2240                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2241                  * the file does not change sizes while we are paging in.  However,
2242                  * we may already hold the lock exclusive due to another
2243                  * VNOP from earlier in the call stack.  So if we already hold
2244                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2245                  * it's in the recursive case.
2246                  */
2247                 if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
2248                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2249                 }
2250         } else if (locktype == HFS_SHARED_LOCK) {
2251                 didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
2252                 if (didlock) {
2253                         cp->c_truncatelockowner = HFS_SHARED_OWNER;
2254                 }
2255         } else { /* HFS_EXCLUSIVE_LOCK */
2256                 didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
2257                 if (didlock) {
2258                         cp->c_truncatelockowner = thread;
2259                 }
2260         }
2261
2262         return didlock;
2263 }
2264
2265
2266 /*
2267  * Unlock the truncate lock, which protects against size changes.
2268  *
2269  * If HFS_LOCK_SKIP_IF_EXCLUSIVE flag was set, it means that a previous
2270  * hfs_lock_truncate() might have skipped grabbing a lock because
2271  * the current thread was already holding the lock exclusive and
2272  * we may need to return from this function without actually unlocking
2273  * the truncate lock.
2274  */
2275 void
2276 hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags)
2277 {
2278         void *thread = current_thread();
2279
2280         /*
2281          * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current
2282          * lock owner of the truncate lock is our current thread, then
2283          * we must have skipped taking the lock earlier by in
2284          * hfs_lock_truncate() by setting HFS_LOCK_SKIP_IF_EXCLUSIVE in the
2285          * flags (as the current thread was current lock owner).
2286          *
2287          * If HFS_LOCK_SKIP_IF_EXCLUSIVE is not set (most of the time) then
2288          * we check the lockowner field to infer whether the lock was taken
2289          * exclusively or shared in order to know what underlying lock
2290          * routine to call.
2291          */
2292         if (flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) {
2293                 if (cp->c_truncatelockowner == thread) {
2294                         return;
2295                 }
2296         }
2297
2298         /* HFS_LOCK_EXCLUSIVE */
2299         if (thread == cp->c_truncatelockowner) {
2300                 cp->c_truncatelockowner = NULL;
2301                 lck_rw_unlock_exclusive(&cp->c_truncatelock);
2302         } else { /* HFS_LOCK_SHARED */
2303                 lck_rw_unlock_shared(&cp->c_truncatelock);
2304         }
2305 }