bsd/hfs/hfs_cnode.c

   1 /*
   2  * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/proc.h>
  31 #include <sys/vnode.h>
  32 #include <sys/mount.h>
  33 #include <sys/kernel.h>
  34 #include <sys/malloc.h>
  35 #include <sys/time.h>
  36 #include <sys/ubc.h>
  37 #include <sys/quota.h>
  38 #include <sys/kdebug.h>
  39 #include <libkern/OSByteOrder.h>
  40 #include <sys/buf_internal.h>
  41 #include <sys/namei.h>
  42
  43 #include <kern/locks.h>
  44
  45 #include <miscfs/specfs/specdev.h>
  46 #include <miscfs/fifofs/fifo.h>
  47
  48 #include <hfs/hfs.h>
  49 #include <hfs/hfs_catalog.h>
  50 #include <hfs/hfs_cnode.h>
  51 #include <hfs/hfs_quota.h>
  52 #include <hfs/hfs_format.h>
  53 #include <hfs/hfs_kdebug.h>
  54 #include <hfs/hfs_cprotect.h>
  55
  56 extern int prtactive;
  57
  58 extern lck_attr_t *  hfs_lock_attr;
  59 extern lck_grp_t *  hfs_mutex_group;
  60 extern lck_grp_t *  hfs_rwlock_group;
  61
  62 static void  hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *);
  63 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
  64 static int hfs_isordered(struct cnode *, struct cnode *);
  65
  66 extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
  67
  68
  69 __inline__ int hfs_checkdeleted (struct cnode *cp) {
  70         return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
  71 }
  72
  73 /*
  74  * Function used by a special fcntl() that decorates a cnode/vnode that
  75  * indicates it is backing another filesystem, like a disk image.
  76  *
  77  * the argument 'val' indicates whether or not to set the bit in the cnode flags
  78  *
  79  * Returns non-zero on failure. 0 on success
  80  */
  81 int hfs_set_backingstore (struct vnode *vp, int val) {
  82         struct cnode *cp = NULL;
  83         int err = 0;
  84
  85         cp = VTOC(vp);
  86         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
  87                 return EINVAL;
  88         }
  89
  90         /* lock the cnode */
  91         err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
  92         if (err) {
  93                 return err;
  94         }
  95
  96         if (val) {
  97                 cp->c_flag |= C_BACKINGSTORE;
  98         }
  99         else {
 100                 cp->c_flag &= ~C_BACKINGSTORE;
 101         }
 102
 103         /* unlock everything */
 104         hfs_unlock (cp);
 105
 106         return err;
 107 }
 108
 109 /*
 110  * Function used by a special fcntl() that check to see if a cnode/vnode
 111  * indicates it is backing another filesystem, like a disk image.
 112  *
 113  * the argument 'val' is an output argument for whether or not the bit is set
 114  *
 115  * Returns non-zero on failure. 0 on success
 116  */
 117
 118 int hfs_is_backingstore (struct vnode *vp, int *val) {
 119         struct cnode *cp = NULL;
 120         int err = 0;
 121
 122         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
 123                 *val = 0;
 124                 return 0;
 125         }
 126
 127         cp = VTOC(vp);
 128
 129         /* lock the cnode */
 130         err = hfs_lock (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
 131         if (err) {
 132                 return err;
 133         }
 134
 135         if (cp->c_flag & C_BACKINGSTORE) {
 136                 *val = 1;
 137         }
 138         else {
 139                 *val = 0;
 140         }
 141
 142         /* unlock everything */
 143         hfs_unlock (cp);
 144
 145         return err;
 146 }
 147
 148
 149 /*
 150  * hfs_cnode_teardown
 151  *
 152  * This is an internal function that is invoked from both hfs_vnop_inactive
 153  * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
 154  * being recycled and reclaimed, it is important that we do any post-processing
 155  * necessary for the cnode in both places.  Important tasks include things such as
 156  * releasing the blocks from an open-unlinked file when all references to it have dropped,
 157  * and handling resource forks separately from data forks.
 158  *
 159  * Note that we take only the vnode as an argument here (rather than the cnode).
 160  * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
 161  * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
 162  * vnode we need to reclaim if only the cnode is supplied.
 163  *
 164  * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
 165  * if both are invoked right after the other.  In the second call, most of this function's if()
 166  * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
 167  * As a quick check to see if this function is necessary, determine if the cnode is already
 168  * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that
 169  * remain for cnodes marked in such a fashion is to teardown their fork references and
 170  * release all directory hints and hardlink origins.  However, both of those are done
 171  * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
 172  * entry is no longer there.
 173  *
 174  * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
 175  * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
 176  * is totally gone by that point.
 177  *
 178  * Assumes that both truncate and cnode locks for 'cp' are held.
 179  */
 180 static
 181 int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
 182 {
 183         int forkcount = 0;
 184         enum vtype v_type;
 185         struct cnode *cp;
 186         int error = 0;
 187         bool started_tr = false;
 188         struct hfsmount *hfsmp = VTOHFS(vp);
 189         struct proc *p = vfs_context_proc(ctx);
 190         int truncated = 0;
 191     cat_cookie_t cookie;
 192     int cat_reserve = 0;
 193     int lockflags;
 194         int ea_error = 0;
 195
 196         v_type = vnode_vtype(vp);
 197         cp = VTOC(vp);
 198
 199         if (cp->c_datafork) {
 200                 ++forkcount;
 201         }
 202         if (cp->c_rsrcfork) {
 203                 ++forkcount;
 204         }
 205
 206         /*
 207          * Push file data out for normal files that haven't been evicted from
 208          * the namespace.  We only do this if this function was not called from reclaim,
 209          * because by that point the UBC information has been totally torn down.
 210          *
 211          * There should also be no way that a normal file that has NOT been deleted from
 212          * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
 213          * when the file becomes open-unlinked.
 214          */
 215         if ((v_type == VREG) &&
 216                 (!ISSET(cp->c_flag, C_DELETED)) &&
 217                 (!ISSET(cp->c_flag, C_NOEXISTS)) &&
 218                 (VTOF(vp)->ff_blocks) &&
 219                 (reclaim == 0)) {
 220                 /*
 221                  * If we're called from hfs_vnop_inactive, all this means is at the time
 222                  * the logic for deciding to call this function, there were not any lingering
 223                  * mmap/fd references for this file.  However, there is nothing preventing the system
 224                  * from creating a new reference in between the time that logic was checked
 225                  * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
 226                  * that there aren't any references is during vnop_reclaim.
 227                  */
 228                 hfs_filedone(vp, ctx, 0);
 229         }
 230
 231         /*
 232          * Remove any directory hints or cached origins
 233          */
 234         if (v_type == VDIR) {
 235                 hfs_reldirhints(cp, 0);
 236         }
 237         if (cp->c_flag & C_HARDLINK) {
 238                 hfs_relorigins(cp);
 239         }
 240
 241         /*
 242          * -- Handle open unlinked files --
 243          *
 244          * If the vnode is in use, it means a force unmount is in progress
 245          * in which case we defer cleaning up until either we come back
 246          * through here via hfs_vnop_reclaim, at which point the UBC
 247          * information will have been torn down and the vnode might no
 248          * longer be in use, or if it's still in use, it will get cleaned
 249          * up when next remounted.
 250          */
 251         if (ISSET(cp->c_flag, C_DELETED) && !vnode_isinuse(vp, 0)) {
 252                 /*
 253                  * This check is slightly complicated.  We should only truncate data
 254                  * in very specific cases for open-unlinked files.  This is because
 255                  * we want to ensure that the resource fork continues to be available
 256                  * if the caller has the data fork open.  However, this is not symmetric;
 257                  * someone who has the resource fork open need not be able to access the data
 258                  * fork once the data fork has gone inactive.
 259                  *
 260                  * If we're the last fork, then we have cleaning up to do.
 261                  *
 262                  * A) last fork, and vp == c_vp
 263                  *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
 264                  *
 265                  * B) last fork, and vp == c_rsrc_vp
 266                  *      Truncate ourselves, assume data fork has been cleaned due to C).
 267                  *
 268                  * If we're not the last fork, then things are a little different:
 269                  *
 270                  * C) not the last fork, vp == c_vp
 271                  *      Truncate ourselves.  Once the file has gone out of the namespace,
 272                  *      it cannot be further opened.  Further access to the rsrc fork may
 273                  *      continue, however.
 274                  *
 275                  * D) not the last fork, vp == c_rsrc_vp
 276                  *      Don't enter the block below, just clean up vnode and push it out of core.
 277                  */
 278
 279                 if ((v_type == VREG || v_type == VLNK) &&
 280                                 ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
 281
 282                         /* Truncate away our own fork data. (Case A, B, C above) */
 283                         if (VTOF(vp)->ff_blocks != 0) {
 284                                 /*
 285                                  * SYMLINKS only:
 286                                  *
 287                                  * Encapsulate the entire change (including truncating the link) in
 288                                  * nested transactions if we are modifying a symlink, because we know that its
 289                                  * file length will be at most 4k, and we can fit both the truncation and
 290                                  * any relevant bitmap changes into a single journal transaction.  We also want
 291                                  * the kill_block code to execute in the same transaction so that any dirty symlink
 292                                  * blocks will not be written. Otherwise, rely on
 293                                  * hfs_truncate doing its own transactions to ensure that we don't blow up
 294                                  * the journal.
 295                                  */
 296                                 if (!started_tr && (v_type == VLNK)) {
 297                                         if (hfs_start_transaction(hfsmp) != 0) {
 298                                                 error = EINVAL;
 299                                                 goto out;
 300                                         }
 301                                         else {
 302                                                 started_tr = true;
 303                                         }
 304                                 }
 305
 306                                 /*
 307                                  * At this point, we have decided that this cnode is
 308                                  * suitable for full removal.  We are about to deallocate
 309                                  * its blocks and remove its entry from the catalog.
 310                                  * If it was a symlink, then it's possible that the operation
 311                                  * which created it is still in the current transaction group
 312                                  * due to coalescing.  Take action here to kill the data blocks
 313                                  * of the symlink out of the journal before moving to
 314                                  * deallocate the blocks.  We need to be in the middle of
 315                                  * a transaction before calling buf_iterate like this.
 316                                  *
 317                                  * Note: we have to kill any potential symlink buffers out of
 318                                  * the journal prior to deallocating their blocks.  This is so
 319                                  * that we don't race with another thread that may be doing an
 320                                  * an allocation concurrently and pick up these blocks. It could
 321                                  * generate I/O against them which could go out ahead of our journal
 322                                  * transaction.
 323                                  */
 324
 325                                 if (hfsmp->jnl && vnode_islnk(vp)) {
 326                                         buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
 327                                 }
 328
 329
 330                                 /*
 331                                  * This truncate call (and the one below) is fine from VNOP_RECLAIM's
 332                                  * context because we're only removing blocks, not zero-filling new
 333                                  * ones.  The C_DELETED check above makes things much simpler.
 334                                  */
 335                                 error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx);
 336                                 if (error) {
 337                                         goto out;
 338                                 }
 339                                 truncated = 1;
 340
 341                                 /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
 342                                 if (started_tr) {
 343                                         hfs_end_transaction(hfsmp);
 344                                         started_tr = false;
 345                                 }
 346
 347                         }
 348
 349                         /*
 350                          * Truncate away the resource fork, if we represent the data fork and
 351                          * it is the last fork.  That means, by definition, the rsrc fork is not in
 352                          * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
 353                          * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
 354                          * to get rid of the resource fork's data. Note that because we are holding the
 355                          * cnode lock, it is impossible for a competing thread to create the resource fork
 356                          * vnode from underneath us while we do this.
 357                          *
 358                          * This is invoked via case A above only.
 359                          */
 360                         if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
 361                                 struct cat_lookup_buffer *lookup_rsrc = NULL;
 362                                 struct cat_desc *desc_ptr = NULL;
 363                                 lockflags = 0;
 364
 365                                 MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
 366                                 if (lookup_rsrc == NULL) {
 367                                         printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
 368                                         error = ENOMEM;
 369                                         goto out;
 370                                 }
 371                                 else {
 372                                         bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
 373                                 }
 374
 375                                 if (cp->c_desc.cd_namelen == 0) {
 376                                         /* Initialize the rsrc descriptor for lookup if necessary*/
 377                                         MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
 378
 379                                         lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
 380                                         lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
 381                                         lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
 382                                         lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;
 383
 384                                         desc_ptr = &lookup_rsrc->lookup_desc;
 385                                 }
 386                                 else {
 387                                         desc_ptr = &cp->c_desc;
 388                                 }
 389
 390                                 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 391
 392                                 error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL,
 393                                                 (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
 394
 395                                 hfs_systemfile_unlock (hfsmp, lockflags);
 396
 397                                 if (error) {
 398                                         FREE (lookup_rsrc, M_TEMP);
 399                                         goto out;
 400                                 }
 401
 402                                 /*
 403                                  * Make the filefork in our temporary struct look like a real
 404                                  * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
 405                                  */
 406                                 rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
 407                                 lookup_rsrc->lookup_fork.ff_cp = cp;
 408
 409                                 /*
 410                                  * If there were no errors, then we have the catalog's fork information
 411                                  * for the resource fork in question.  Go ahead and delete the data in it now.
 412                                  */
 413
 414                                 error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
 415                                 FREE(lookup_rsrc, M_TEMP);
 416
 417                                 if (error) {
 418                                         goto out;
 419                                 }
 420
 421                                 /*
 422                                  * This fileid's resource fork extents have now been fully deleted on-disk
 423                                  * and this CNID is no longer valid. At this point, we should be able to
 424                                  * zero out cp->c_blocks to indicate there is no data left in this file.
 425                                  */
 426                                 cp->c_blocks = 0;
 427                         }
 428                 }
 429
 430                 /*
 431                  * If we represent the last fork (or none in the case of a dir),
 432                  * and the cnode has become open-unlinked...
 433                  *
 434                  * We check c_blocks here because it is possible in the force
 435                  * unmount case for the data fork to be in use but the resource
 436                  * fork to not be in use in which case we will truncate the
 437                  * resource fork, but not the data fork.  It will get cleaned
 438                  * up upon next mount.
 439                  */
 440                 if (forkcount <= 1 && !cp->c_blocks) {
 441                         /*
 442                          * If it has EA's, then we need to get rid of them.
 443                          *
 444                          * Note that this must happen outside of any other transactions
 445                          * because it starts/ends its own transactions and grabs its
 446                          * own locks.  This is to prevent a file with a lot of attributes
 447                          * from creating a transaction that is too large (which panics).
 448                          */
 449                         if (ISSET(cp->c_attr.ca_recflags, kHFSHasAttributesMask))
 450                                 ea_error = hfs_removeallattr(hfsmp, cp->c_fileid, &started_tr);
 451
 452                         /*
 453                          * Remove the cnode's catalog entry and release all blocks it
 454                          * may have been using.
 455                          */
 456
 457                         /*
 458                          * Mark cnode in transit so that no one can get this
 459                          * cnode from cnode hash.
 460                          */
 461                         // hfs_chash_mark_in_transit(hfsmp, cp);
 462                         // XXXdbg - remove the cnode from the hash table since it's deleted
 463                         //          otherwise someone could go to sleep on the cnode and not
 464                         //          be woken up until this vnode gets recycled which could be
 465                         //          a very long time...
 466                         hfs_chashremove(hfsmp, cp);
 467
 468                         cp->c_flag |= C_NOEXISTS;   // XXXdbg
 469                         cp->c_rdev = 0;
 470
 471                         if (!started_tr) {
 472                                 if (hfs_start_transaction(hfsmp) != 0) {
 473                                         error = EINVAL;
 474                                         goto out;
 475                                 }
 476                                 started_tr = true;
 477                         }
 478
 479                         /*
 480                          * Reserve some space in the Catalog file.
 481                          */
 482                         if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
 483                                 goto out;
 484                         }
 485                         cat_reserve = 1;
 486
 487                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 488
 489                         if (cp->c_blocks > 0) {
 490                                 printf("hfs_inactive: deleting non-empty%sfile %d, "
 491                                            "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
 492                                            (int)cp->c_fileid, (int)cp->c_blocks);
 493                         }
 494
 495                         //
 496                         // release the name pointer in the descriptor so that
 497                         // cat_delete() will use the file-id to do the deletion.
 498                         // in the case of hard links this is imperative (in the
 499                         // case of regular files the fileid and cnid are the
 500                         // same so it doesn't matter).
 501                         //
 502                         cat_releasedesc(&cp->c_desc);
 503
 504                         /*
 505                          * The descriptor name may be zero,
 506                          * in which case the fileid is used.
 507                          */
 508                         error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
 509
 510                         if (error && truncated && (error != ENXIO)) {
 511                                 printf("hfs_inactive: couldn't delete a truncated file!");
 512                         }
 513
 514                         /* Update HFS Private Data dir */
 515                         if (error == 0) {
 516                                 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
 517                                 if (vnode_isdir(vp)) {
 518                                         DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
 519                                 }
 520                                 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
 521                                                                  &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
 522                         }
 523
 524                         hfs_systemfile_unlock(hfsmp, lockflags);
 525
 526                         if (error) {
 527                                 goto out;
 528                         }
 529
 530         #if QUOTA
 531                         if (hfsmp->hfs_flags & HFS_QUOTAS)
 532                                 (void)hfs_chkiq(cp, -1, NOCRED, 0);
 533         #endif /* QUOTA */
 534
 535                         /* Already set C_NOEXISTS at the beginning of this block */
 536                         cp->c_flag &= ~C_DELETED;
 537                         cp->c_touch_chgtime = TRUE;
 538                         cp->c_touch_modtime = TRUE;
 539
 540                         if (error == 0)
 541                                 hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
 542                 }
 543         } // if <open unlinked>
 544
 545         hfs_update(vp, reclaim ? HFS_UPDATE_FORCE : 0);
 546
 547         /*
 548          * Since we are about to finish what might be an inactive call, propagate
 549          * any remaining modified or touch bits from the cnode to the vnode.  This
 550          * serves as a hint to vnode recycling that we shouldn't recycle this vnode
 551          * synchronously.
 552          *
 553          * For now, if the node *only* has a dirty atime, we don't mark
 554          * the vnode as dirty.  VFS's asynchronous recycling can actually
 555          * lead to worse performance than having it synchronous.  When VFS
 556          * is fixed to be more performant, we can be more honest about
 557          * marking vnodes as dirty when it's only the atime that's dirty.
 558          */
 559         if (hfs_is_dirty(cp) == HFS_DIRTY || ISSET(cp->c_flag, C_DELETED)) {
 560                 vnode_setdirty(vp);
 561         } else {
 562                 vnode_cleardirty(vp);
 563         }
 564
 565 out:
 566     if (cat_reserve)
 567         cat_postflight(hfsmp, &cookie, p);
 568
 569     if (started_tr) {
 570         hfs_end_transaction(hfsmp);
 571         started_tr = false;
 572     }
 573
 574         return error;
 575 }
 576
 577
 578 /*
 579  * hfs_vnop_inactive
 580  *
 581  * The last usecount on the vnode has gone away, so we need to tear down
 582  * any remaining data still residing in the cnode.  If necessary, write out
 583  * remaining blocks or delete the cnode's entry in the catalog.
 584  */
 585 int
 586 hfs_vnop_inactive(struct vnop_inactive_args *ap)
 587 {
 588         struct vnode *vp = ap->a_vp;
 589         struct cnode *cp;
 590         struct hfsmount *hfsmp = VTOHFS(vp);
 591         struct proc *p = vfs_context_proc(ap->a_context);
 592         int error = 0;
 593         int took_trunc_lock = 0;
 594         enum vtype v_type;
 595
 596         v_type = vnode_vtype(vp);
 597         cp = VTOC(vp);
 598
 599         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
 600             (hfsmp->hfs_freezing_proc == p)) {
 601                 error = 0;
 602                 goto inactive_done;
 603         }
 604
 605         /*
 606          * For safety, do NOT call vnode_recycle from inside this function.  This can cause
 607          * problems in the following scenario:
 608          *
 609          * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
 610          *
 611          * If we're being invoked as a result of a reclaim that was already in-flight, then we
 612          * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
 613          * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
 614          * try to re-enter reclaim again and panic.
 615          *
 616          * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
 617          * 1) last usecount goes away on the vnode (vnode_rele)
 618          * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
 619          *              vnode_recycle called (vnode_put)
 620          * 3) vclean by way of reclaim
 621          *
 622          * In this function we would generally want to call vnode_recycle to speed things
 623          * along to ensure that we don't leak blocks due to open-unlinked files.  However, by
 624          * virtue of being in this function already, we can call hfs_cnode_teardown, which
 625          * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
 626          * there's no entry in the catalog and no backing store anymore.  If that's the case,
 627          * then we really don't care all that much when the vnode actually goes through reclaim.
 628          * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
 629          * unlinked file in the first place should have already called vnode_recycle on the vnode
 630          * to guarantee that it would go through reclaim in a speedy way.
 631          */
 632
 633         if (cp->c_flag & C_NOEXISTS) {
 634                 /*
 635                  * If the cnode has already had its cat entry removed, then
 636                  * just skip to the end. We don't need to do anything here.
 637                  */
 638                 error = 0;
 639                 goto inactive_done;
 640         }
 641
 642         if ((v_type == VREG || v_type == VLNK)) {
 643                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 644                 took_trunc_lock = 1;
 645         }
 646
 647         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 648
 649         /*
 650          * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
 651          * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
 652          */
 653         error = hfs_cnode_teardown (vp, ap->a_context, 0);
 654
 655     /*
 656      * Drop the truncate lock before unlocking the cnode
 657      * (which can potentially perform a vnode_put and
 658      * recycle the vnode which in turn might require the
 659      * truncate lock)
 660      */
 661         if (took_trunc_lock) {
 662             hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
 663         }
 664
 665         hfs_unlock(cp);
 666
 667 inactive_done:
 668
 669         return error;
 670 }
 671
 672
 673 /*
 674  * File clean-up (zero fill and shrink peof).
 675  */
 676
 677 int
 678 hfs_filedone(struct vnode *vp, vfs_context_t context,
 679                          hfs_file_done_opts_t opts)
 680 {
 681         struct cnode *cp;
 682         struct filefork *fp;
 683         struct hfsmount *hfsmp;
 684         off_t leof;
 685         u_int32_t blks, blocksize;
 686
 687         cp = VTOC(vp);
 688         fp = VTOF(vp);
 689         hfsmp = VTOHFS(vp);
 690         leof = fp->ff_size;
 691
 692         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
 693                 return (0);
 694
 695         hfs_flush_invalid_ranges(vp);
 696
 697         blocksize = VTOVCB(vp)->blockSize;
 698         blks = leof / blocksize;
 699         if (((off_t)blks * (off_t)blocksize) != leof)
 700                 blks++;
 701         /*
 702          * Shrink the peof to the smallest size neccessary to contain the leof.
 703          */
 704         if (blks < fp->ff_blocks) {
 705                 (void) hfs_truncate(vp, leof, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, context);
 706         }
 707
 708         if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) {
 709                 hfs_unlock(cp);
 710                 cluster_push(vp, IO_CLOSE);
 711                 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 712
 713                 /*
 714                  * If the hfs_truncate didn't happen to flush the vnode's
 715                  * information out to disk, force it to be updated now that
 716                  * all invalid ranges have been zero-filled and validated:
 717                  */
 718                 hfs_update(vp, 0);
 719         }
 720
 721         return (0);
 722 }
 723
 724
 725 /*
 726  * Reclaim a cnode so that it can be used for other purposes.
 727  */
 728 int
 729 hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 730 {
 731         struct vnode *vp = ap->a_vp;
 732         struct cnode *cp;
 733         struct filefork *fp = NULL;
 734         struct filefork *altfp = NULL;
 735         struct hfsmount *hfsmp = VTOHFS(vp);
 736         vfs_context_t ctx = ap->a_context;
 737         int reclaim_cnode = 0;
 738         int err = 0;
 739         enum vtype v_type;
 740
 741         v_type = vnode_vtype(vp);
 742         cp = VTOC(vp);
 743
 744         /*
 745          * We don't take the truncate lock since by the time reclaim comes along,
 746          * all dirty pages have been synced and nobody should be competing
 747          * with us for this thread.
 748          */
 749         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 750
 751         /*
 752          * Sync to disk any remaining data in the cnode/vnode.  This includes
 753          * a call to hfs_update if the cnode has outbound data.
 754          *
 755          * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
 756          * because the catalog entry for this cnode is already gone.
 757          */
 758         if (!ISSET(cp->c_flag, C_NOEXISTS)) {
 759                 err = hfs_cnode_teardown(vp, ctx, 1);
 760         }
 761
 762         /*
 763          * Keep track of an inactive hot file.  Don't bother on ssd's since
 764          * the tracking is done differently (it's done at read() time)
 765          */
 766         if (!vnode_isdir(vp) &&
 767             !vnode_issystem(vp) &&
 768             !(cp->c_flag & (C_DELETED | C_NOEXISTS)) &&
 769             !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
 770                 (void) hfs_addhotfile(vp);
 771         }
 772         vnode_removefsref(vp);
 773
 774         /*
 775          * Find file fork for this vnode (if any)
 776          * Also check if another fork is active
 777          */
 778         if (cp->c_vp == vp) {
 779                 fp = cp->c_datafork;
 780                 altfp = cp->c_rsrcfork;
 781
 782                 cp->c_datafork = NULL;
 783                 cp->c_vp = NULL;
 784         } else if (cp->c_rsrc_vp == vp) {
 785                 fp = cp->c_rsrcfork;
 786                 altfp = cp->c_datafork;
 787
 788                 cp->c_rsrcfork = NULL;
 789                 cp->c_rsrc_vp = NULL;
 790         } else {
 791                 panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
 792         }
 793         /*
 794          * On the last fork, remove the cnode from its hash chain.
 795          */
 796         if (altfp == NULL) {
 797                 /* If we can't remove it then the cnode must persist! */
 798                 if (hfs_chashremove(hfsmp, cp) == 0)
 799                         reclaim_cnode = 1;
 800                 /*
 801                  * Remove any directory hints
 802                  */
 803                 if (vnode_isdir(vp)) {
 804                         hfs_reldirhints(cp, 0);
 805                 }
 806
 807                 if(cp->c_flag & C_HARDLINK) {
 808                         hfs_relorigins(cp);
 809                 }
 810         }
 811         /* Release the file fork and related data */
 812         if (fp) {
 813                 /* Dump cached symlink data */
 814                 if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
 815                         FREE(fp->ff_symlinkptr, M_TEMP);
 816                 }
 817                 rl_remove_all(&fp->ff_invalidranges);
 818                 FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
 819         }
 820
 821         /*
 822          * If there was only one active fork then we can release the cnode.
 823          */
 824         if (reclaim_cnode) {
 825                 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
 826                 hfs_unlock(cp);
 827                 hfs_reclaim_cnode(hfsmp, cp);
 828         }
 829         else  {
 830                 /*
 831                  * cnode in use.  If it is a directory, it could have
 832                  * no live forks. Just release the lock.
 833                  */
 834                 hfs_unlock(cp);
 835         }
 836
 837         vnode_clearfsnode(vp);
 838         return (0);
 839 }
 840
 841
 842 extern int (**hfs_vnodeop_p) (void *);
 843 extern int (**hfs_specop_p)  (void *);
 844 #if FIFO
 845 extern int (**hfs_fifoop_p)  (void *);
 846 #endif
 847
 848 #if CONFIG_HFS_STD
 849 extern int (**hfs_std_vnodeop_p) (void *);
 850 #endif
 851
 852 /*
 853  * hfs_getnewvnode - get new default vnode
 854  *
 855  * The vnode is returned with an iocount and the cnode locked.
 856  * The cnode of the parent vnode 'dvp' may or may not be locked, depending on
 857  * the circumstances.   The cnode in question (if acquiring the resource fork),
 858  * may also already be locked at the time we enter this function.
 859  *
 860  * Note that there are both input and output flag arguments to this function.
 861  * If one of the input flags (specifically, GNV_USE_VP), is set, then
 862  * hfs_getnewvnode will use the parameter *vpp, which is traditionally only
 863  * an output parameter, as both an input and output parameter.  It will use
 864  * the vnode provided in the output, and pass it to vnode_create with the
 865  * proper flavor so that a new vnode is _NOT_ created on our behalf when
 866  * we dispatch to VFS.  This may be important in various HFS vnode creation
 867  * routines, such a create or get-resource-fork, because we risk deadlock if
 868  * jetsam is involved.
 869  *
 870  * Deadlock potential exists if jetsam is synchronously invoked while we are waiting
 871  * for a vnode to be recycled in order to give it the identity we want.  If jetsam
 872  * happens to target a process for termination that is blocked in-kernel, waiting to
 873  * acquire the cnode lock on our parent 'dvp', while our current thread has it locked,
 874  * neither side will make forward progress and the watchdog timer will eventually fire.
 875  * To prevent this, a caller of hfs_getnewvnode may choose to proactively force
 876  * any necessary vnode reclamation/recycling while it is not holding any locks and
 877  * thus not prone to deadlock.  If this is the case, GNV_USE_VP will be set and
 878  * the parameter will be used as described above.
 879  *
 880  *  !!! <NOTE> !!!!
 881  * In circumstances when GNV_USE_VP is set, this function _MUST_ clean up and either consume
 882  * or dispose of the provided vnode. We funnel all errors to a single return value so that
 883  * if provided_vp is still non-NULL, then we will dispose of the vnode. This will occur in
 884  * all error cases of this function --  anywhere we zero/NULL out the *vpp parameter. It may
 885  * also occur if the current thread raced with another to create the same vnode, and we
 886  * find the entry already present in the cnode hash.
 887  * !!! </NOTE> !!!
 888  */
 889 int
 890 hfs_getnewvnode(
 891         struct hfsmount *hfsmp,
 892         struct vnode *dvp,
 893         struct componentname *cnp,
 894         struct cat_desc *descp,
 895         int flags,
 896         struct cat_attr *attrp,
 897         struct cat_fork *forkp,
 898         struct vnode **vpp,
 899         int *out_flags)
 900 {
 901         struct mount *mp = HFSTOVFS(hfsmp);
 902         struct vnode *vp = NULL;
 903         struct vnode **cvpp;
 904         struct vnode *tvp = NULLVP;
 905         struct cnode *cp = NULL;
 906         struct filefork *fp = NULL;
 907         int hfs_standard = 0;
 908         int retval = 0;
 909         int issystemfile;
 910         int wantrsrc;
 911         int hflags = 0;
 912         int need_update_identity = 0;
 913         struct vnode_fsparam vfsp;
 914         enum vtype vtype;
 915
 916         struct vnode *provided_vp = NULL;
 917
 918
 919 #if QUOTA
 920         int i;
 921 #endif /* QUOTA */
 922
 923         hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
 924
 925         if (flags & GNV_USE_VP) {
 926                 /* Store the provided VP for later use */
 927                 provided_vp = *vpp;
 928         }
 929
 930         /* Zero out the vpp regardless of provided input */
 931         *vpp = NULL;
 932
 933         /* Zero out the out_flags */
 934         *out_flags = 0;
 935
 936         if (attrp->ca_fileid == 0) {
 937                 retval = ENOENT;
 938                 goto gnv_exit;
 939         }
 940
 941 #if !FIFO
 942         if (IFTOVT(attrp->ca_mode) == VFIFO) {
 943                 retval = ENOTSUP;
 944                 goto gnv_exit;
 945         }
 946 #endif /* !FIFO */
 947         vtype = IFTOVT(attrp->ca_mode);
 948         issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
 949         wantrsrc = flags & GNV_WANTRSRC;
 950
 951         /* Sanity check the vtype and mode */
 952         if (vtype == VBAD) {
 953                 /* Mark the FS as corrupt and bail out */
 954                 hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
 955                 retval = EINVAL;
 956                 goto gnv_exit;
 957         }
 958
 959 #ifdef HFS_CHECK_LOCK_ORDER
 960         /*
 961          * The only case where it's permissible to hold the parent cnode
 962          * lock is during a create operation (hfs_makenode) or when
 963          * we don't need the cnode lock (GNV_SKIPLOCK).
 964          */
 965         if ((dvp != NULL) &&
 966             (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
 967             VTOC(dvp)->c_lockowner == current_thread()) {
 968                 panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
 969         }
 970 #endif /* HFS_CHECK_LOCK_ORDER */
 971
 972         /*
 973          * Get a cnode (new or existing)
 974          */
 975         cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc,
 976                                                         (flags & GNV_SKIPLOCK), out_flags, &hflags);
 977
 978         /*
 979          * If the id is no longer valid for lookups we'll get back a NULL cp.
 980          */
 981         if (cp == NULL) {
 982                 retval = ENOENT;
 983                 goto gnv_exit;
 984         }
 985         /*
 986          * We may have been provided a vnode via
 987          * GNV_USE_VP.  In this case, we have raced with
 988          * a 2nd thread to create the target vnode. The provided
 989          * vnode that was passed in will be dealt with at the
 990          * end of the function, as we don't zero out the field
 991          * until we're ready to pass responsibility to VFS.
 992          */
 993
 994
 995         /*
 996          * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
 997          * descriptor in the cnode as needed if the cnode represents a hardlink.
 998          * We want the caller to get the most up-to-date copy of the descriptor
 999          * as possible. However, we only do anything here if there was a valid vnode.
1000          * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1001          * as it doesn't have a descriptor or cat_attr yet.
1002          *
1003          * If we are about to replace the descriptor with the user-supplied one, then validate
1004          * that the descriptor correctly acknowledges this item is a hardlink.  We could be
1005          * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1006          * result but the file was not yet a hardlink. With sufficient delay between there
1007          * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1008          * call below.  If the descriptor's CNID is the same as the fileID then it must
1009          * not yet have been a hardlink when the lookup occurred.
1010          */
1011
1012         if (!(hfs_checkdeleted(cp))) {
1013                 //
1014                 // If the bytes of the filename in the descp do not match the bytes in the
1015                 // cnp (and we're not looking up the resource fork), then we want to update
1016                 // the vnode identity to contain the bytes that HFS stores so that when an
1017                 // fsevent gets generated, it has the correct filename.  otherwise daemons
1018                 // that match filenames produced by fsevents with filenames they have stored
1019                 // elsewhere (e.g. bladerunner, backupd, mds), the filenames will not match.
1020                 // See: <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1021                 // for more details.
1022                 //
1023 #ifdef CN_WANTSRSRCFORK
1024                 if (*vpp && cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) {
1025 #else
1026                 if (*vpp && cnp && cnp->cn_nameptr && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) {
1027 #endif
1028                         vnode_update_identity (*vpp, dvp, (const char *)descp->cd_nameptr, descp->cd_namelen, 0, VNODE_UPDATE_NAME);
1029                 }
1030                 if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
1031                         /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1032                         if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
1033                                         (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
1034
1035                                 if ((flags & GNV_SKIPLOCK) == 0) {
1036                                         /*
1037                                          * Then we took the lock. Drop it before calling
1038                                          * vnode_put, which may invoke hfs_vnop_inactive and need to take
1039                                          * the cnode lock again.
1040                                          */
1041                                         hfs_unlock(cp);
1042                                 }
1043
1044                                 /*
1045                                  * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1046                                  * force a re-drive in the lookup routine.
1047                                  * Drop the iocount on the vnode obtained from
1048                                  * chash_getcnode if needed.
1049                                  */
1050                                 if (*vpp != NULL) {
1051                                         vnode_put (*vpp);
1052                                         *vpp = NULL;
1053                                 }
1054
1055                                 /*
1056                                  * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1057                                  * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1058                                  * the hash code peeks at those fields without holding the cnode lock because
1059                                  * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
1060                                  * call above.  Since we're bailing out, unset whatever flags we just set, and
1061                                  * wake up all waiters for this cnode.
1062                                  */
1063                                 if (hflags) {
1064                                         hfs_chashwakeup(hfsmp, cp, hflags);
1065                                 }
1066
1067                                 *out_flags = GNV_CAT_ATTRCHANGED;
1068                                 retval = ERECYCLE;
1069                                 goto gnv_exit;
1070                         }
1071                         else {
1072                                 /*
1073                                  * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1074                                  *
1075                                  * Replacing the descriptor here is fine because we looked up the item without
1076                                  * a vnode in hand before.  If a vnode existed, its identity must be attached to this
1077                                  * item.  We are not susceptible to the lookup fastpath issue at this point.
1078                                  */
1079                                 replace_desc(cp, descp);
1080
1081                                 /*
1082                                  * This item was a hardlink, and its name needed to be updated. By replacing the
1083                                  * descriptor above, we've now updated the cnode's internal representation of
1084                                  * its link ID/CNID, parent ID, and its name.  However, VFS must now be alerted
1085                                  * to the fact that this vnode now has a new parent, since we cannot guarantee
1086                                  * that the new link lived in the same directory as the alternative name for
1087                                  * this item.
1088                                  */
1089                                 if ((*vpp != NULL) && (cnp || cp->c_desc.cd_nameptr)) {
1090                                         /* we could be requesting the rsrc of a hardlink file... */
1091 #ifdef CN_WANTSRSRCFORK
1092                                         if (cp->c_desc.cd_nameptr && (cnp == NULL || !(cnp->cn_flags & CN_WANTSRSRCFORK))) {
1093 #else
1094                                         if (cp->c_desc.cd_nameptr) {
1095 #endif
1096                                                 //
1097                                                 // Update the identity with what we have stored on disk as
1098                                                 // the name of this file.  This is related to:
1099                                                 //    <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1100                                                 //
1101                                                 vnode_update_identity (*vpp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0,
1102                                                                (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
1103                                         } else if (cnp) {
1104                                                 vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash,
1105                                                                        (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
1106                                         }
1107                                 }
1108                         }
1109                 }
1110         }
1111
1112         /*
1113          * At this point, we have performed hardlink and open-unlinked checks
1114          * above.  We have now validated the state of the vnode that was given back
1115          * to us from the cnode hash code and find it safe to return.
1116          */
1117         if (*vpp != NULL) {
1118                 retval = 0;
1119                 goto gnv_exit;
1120         }
1121
1122         /*
1123          * If this is a new cnode then initialize it.
1124          */
1125         if (ISSET(cp->c_hflag, H_ALLOC)) {
1126                 lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
1127 #if HFS_COMPRESSION
1128                 cp->c_decmp = NULL;
1129 #endif
1130
1131                 /* Make sure its still valid (ie exists on disk). */
1132                 if (!(flags & GNV_CREATE)) {
1133                         int error = 0;
1134                         if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
1135                                 hfs_chash_abort(hfsmp, cp);
1136                                 if ((flags & GNV_SKIPLOCK) == 0) {
1137                                         hfs_unlock(cp);
1138                                 }
1139                                 hfs_reclaim_cnode(hfsmp, cp);
1140                                 *vpp = NULL;
1141                                 /*
1142                                  * If we hit this case, that means that the entry was there in the catalog when
1143                                  * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
1144                                  * that we checked the catalog and the time we went to get a vnode/cnode for it,
1145                                  * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
1146                                  * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1147                                  * an ENOENT.  To indicate to the caller that they should really double-check the
1148                                  * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1149                                  * in the output flags.
1150                                  */
1151                                 if (error == ENOENT) {
1152                                         *out_flags = GNV_CAT_DELETED;
1153                                         retval = ENOENT;
1154                                         goto gnv_exit;
1155                                 }
1156
1157                                 /*
1158                                  * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1159                                  * this function as an argument because the catalog may have changed w.r.t hardlink
1160                                  * link counts and the firstlink field.  If that validation check fails, then let
1161                                  * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1162                                  */
1163                                 if (error == ERECYCLE) {
1164                                         *out_flags = GNV_CAT_ATTRCHANGED;
1165                                         retval = ERECYCLE;
1166                                         goto gnv_exit;
1167                                 }
1168                         }
1169                 }
1170                 bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
1171                 bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
1172
1173                 /* The name was inherited so clear descriptor state... */
1174                 descp->cd_namelen = 0;
1175                 descp->cd_nameptr = NULL;
1176                 descp->cd_flags &= ~CD_HASBUF;
1177
1178                 /* Tag hardlinks */
1179                 if ((vtype == VREG || vtype == VDIR
1180                          || vtype == VSOCK || vtype == VFIFO)
1181                         && (descp->cd_cnid != attrp->ca_fileid
1182                                 || ISSET(attrp->ca_recflags, kHFSHasLinkChainMask))) {
1183                         cp->c_flag |= C_HARDLINK;
1184                 }
1185                 /*
1186                  * Fix-up dir link counts.
1187                  *
1188                  * Earlier versions of Leopard used ca_linkcount for posix
1189                  * nlink support (effectively the sub-directory count + 2).
1190                  * That is now accomplished using the ca_dircount field with
1191                  * the corresponding kHFSHasFolderCountMask flag.
1192                  *
1193                  * For directories the ca_linkcount is the true link count,
1194                  * tracking the number of actual hardlinks to a directory.
1195                  *
1196                  * We only do this if the mount has HFS_FOLDERCOUNT set;
1197                  * at the moment, we only set that for HFSX volumes.
1198                  */
1199                 if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&
1200                     (vtype == VDIR) &&
1201                     !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
1202                     (cp->c_attr.ca_linkcount > 1)) {
1203                         if (cp->c_attr.ca_entries == 0)
1204                                 cp->c_attr.ca_dircount = 0;
1205                         else
1206                                 cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
1207
1208                         cp->c_attr.ca_linkcount = 1;
1209                         cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
1210                         if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
1211                                 cp->c_flag |= C_MODIFIED;
1212                 }
1213 #if QUOTA
1214                 if (hfsmp->hfs_flags & HFS_QUOTAS) {
1215                         for (i = 0; i < MAXQUOTAS; i++)
1216                                 cp->c_dquot[i] = NODQUOT;
1217                 }
1218 #endif /* QUOTA */
1219                 /* Mark the output flag that we're vending a new cnode */
1220                 *out_flags |= GNV_NEW_CNODE;
1221         }
1222
1223         if (vtype == VDIR) {
1224                 if (cp->c_vp != NULL)
1225                         panic("hfs_getnewvnode: orphaned vnode (data)");
1226                 cvpp = &cp->c_vp;
1227         } else {
1228                 if (forkp && attrp->ca_blocks < forkp->cf_blocks)
1229                         panic("hfs_getnewvnode: bad ca_blocks (too small)");
1230                 /*
1231                  * Allocate and initialize a file fork...
1232                  */
1233                 MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork),
1234                         M_HFSFORK, M_WAITOK);
1235                 fp->ff_cp = cp;
1236                 if (forkp)
1237                         bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
1238                 else
1239                         bzero(&fp->ff_data, sizeof(struct cat_fork));
1240                 rl_init(&fp->ff_invalidranges);
1241                 fp->ff_sysfileinfo = 0;
1242
1243                 if (wantrsrc) {
1244                         if (cp->c_rsrcfork != NULL)
1245                                 panic("hfs_getnewvnode: orphaned rsrc fork");
1246                         if (cp->c_rsrc_vp != NULL)
1247                                 panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1248                         cp->c_rsrcfork = fp;
1249                         cvpp = &cp->c_rsrc_vp;
1250                         if ( (tvp = cp->c_vp) != NULLVP )
1251                                 cp->c_flag |= C_NEED_DVNODE_PUT;
1252                 } else {
1253                         if (cp->c_datafork != NULL)
1254                                 panic("hfs_getnewvnode: orphaned data fork");
1255                         if (cp->c_vp != NULL)
1256                                 panic("hfs_getnewvnode: orphaned vnode (data)");
1257                         cp->c_datafork = fp;
1258                         cvpp = &cp->c_vp;
1259                         if ( (tvp = cp->c_rsrc_vp) != NULLVP)
1260                                 cp->c_flag |= C_NEED_RVNODE_PUT;
1261                 }
1262         }
1263         if (tvp != NULLVP) {
1264                 /*
1265                  * grab an iocount on the vnode we weren't
1266                  * interested in (i.e. we want the resource fork
1267                  * but the cnode already has the data fork)
1268                  * to prevent it from being
1269                  * recycled by us when we call vnode_create
1270                  * which will result in a deadlock when we
1271                  * try to take the cnode lock in hfs_vnop_fsync or
1272                  * hfs_vnop_reclaim... vnode_get can be called here
1273                  * because we already hold the cnode lock which will
1274                  * prevent the vnode from changing identity until
1275                  * we drop it.. vnode_get will not block waiting for
1276                  * a change of state... however, it will return an
1277                  * error if the current iocount == 0 and we've already
1278                  * started to terminate the vnode... we don't need/want to
1279                  * grab an iocount in the case since we can't cause
1280                  * the fileystem to be re-entered on this thread for this vp
1281                  *
1282                  * the matching vnode_put will happen in hfs_unlock
1283                  * after we've dropped the cnode lock
1284                  */
1285                 if ( vnode_get(tvp) != 0)
1286                         cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
1287         }
1288         vfsp.vnfs_mp = mp;
1289         vfsp.vnfs_vtype = vtype;
1290         vfsp.vnfs_str = "hfs";
1291         if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
1292                 vfsp.vnfs_dvp = NULL;  /* no parent for me! */
1293                 vfsp.vnfs_cnp = NULL;  /* no name for me! */
1294         } else {
1295                 vfsp.vnfs_dvp = dvp;
1296                 vfsp.vnfs_cnp = cnp;
1297         }
1298
1299         vfsp.vnfs_fsnode = cp;
1300
1301         /*
1302          * Special Case HFS Standard VNOPs from HFS+, since
1303          * HFS standard is readonly/deprecated as of 10.6
1304          */
1305
1306 #if FIFO
1307         if (vtype == VFIFO )
1308                 vfsp.vnfs_vops = hfs_fifoop_p;
1309         else
1310 #endif
1311         if (vtype == VBLK || vtype == VCHR)
1312                 vfsp.vnfs_vops = hfs_specop_p;
1313 #if CONFIG_HFS_STD
1314         else if (hfs_standard)
1315                 vfsp.vnfs_vops = hfs_std_vnodeop_p;
1316 #endif
1317         else
1318                 vfsp.vnfs_vops = hfs_vnodeop_p;
1319
1320         if (vtype == VBLK || vtype == VCHR)
1321                 vfsp.vnfs_rdev = attrp->ca_rdev;
1322         else
1323                 vfsp.vnfs_rdev = 0;
1324
1325         if (forkp)
1326                 vfsp.vnfs_filesize = forkp->cf_size;
1327         else
1328                 vfsp.vnfs_filesize = 0;
1329
1330         vfsp.vnfs_flags = VNFS_ADDFSREF;
1331 #ifdef CN_WANTSRSRCFORK
1332         if (cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) {
1333 #else
1334         if (cnp && cnp->cn_nameptr && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) {
1335 #endif
1336                 //
1337                 // We don't want VFS to add an entry for this vnode because the name in the
1338                 // cnp does not match the bytes stored on disk for this file.  Instead we'll
1339                 // update the identity later after the vnode is created and we'll do so with
1340                 // the correct bytes for this filename.  For more details, see:
1341                 //   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1342                 //
1343                 vfsp.vnfs_flags |= VNFS_NOCACHE;
1344                 need_update_identity = 1;
1345         } else if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) {
1346                 vfsp.vnfs_flags |= VNFS_NOCACHE;
1347         }
1348
1349         /* Tag system files */
1350         vfsp.vnfs_marksystem = issystemfile;
1351
1352         /* Tag root directory */
1353         if (descp->cd_cnid == kHFSRootFolderID)
1354                 vfsp.vnfs_markroot = 1;
1355         else
1356                 vfsp.vnfs_markroot = 0;
1357
1358         /*
1359          * If provided_vp was non-NULL, then it is an already-allocated (but not
1360          * initialized) vnode. We simply need to initialize it to this identity.
1361          * If it was NULL, then assume that we need to call vnode_create with the
1362          * normal arguments/types.
1363          */
1364         if (provided_vp) {
1365                 vp = provided_vp;
1366                 /*
1367                  * After we assign the value of provided_vp into 'vp' (so that it can be
1368                  * mutated safely by vnode_initialize), we can NULL it out.  At this point, the disposal
1369                  * and handling of the provided vnode will be the responsibility of VFS, which will
1370                  * clean it up and vnode_put it properly if vnode_initialize fails.
1371                  */
1372                 provided_vp = NULL;
1373
1374                 retval = vnode_initialize (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp);
1375                 /* See error handling below for resolving provided_vp */
1376         }
1377         else {
1378                 /* Do a standard vnode_create */
1379                 retval = vnode_create (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp);
1380         }
1381
1382         /*
1383          * We used a local variable to hold the result of vnode_create/vnode_initialize so that
1384          * on error cases in vnode_create we won't accidentally harm the cnode's fields
1385          */
1386
1387         if (retval) {
1388                 /* Clean up if we encountered an error */
1389                 if (fp) {
1390                         if (fp == cp->c_datafork)
1391                                 cp->c_datafork = NULL;
1392                         else
1393                                 cp->c_rsrcfork = NULL;
1394
1395                         FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
1396                 }
1397                 /*
1398                  * If this is a newly created cnode or a vnode reclaim
1399                  * occurred during the attachment, then cleanup the cnode.
1400                  */
1401                 if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
1402                         hfs_chash_abort(hfsmp, cp);
1403                         hfs_reclaim_cnode(hfsmp, cp);
1404                 }
1405                 else {
1406                         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1407                         if ((flags & GNV_SKIPLOCK) == 0){
1408                                 hfs_unlock(cp);
1409                         }
1410                 }
1411                 *vpp = NULL;
1412                 goto gnv_exit;
1413         }
1414
1415         /* If no error, then assign the value into the cnode's fields  */
1416         *cvpp = vp;
1417
1418         vnode_settag(vp, VT_HFS);
1419         if (cp->c_flag & C_HARDLINK) {
1420                 vnode_setmultipath(vp);
1421         }
1422
1423         if (cp->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
1424                 vnode_setfastdevicecandidate(vp);
1425         }
1426
1427         if (cp->c_attr.ca_recflags & kHFSAutoCandidateMask) {
1428                 vnode_setautocandidate(vp);
1429         }
1430
1431
1432
1433
1434         if (vp && need_update_identity) {
1435                 //
1436                 // As above, update the name of the vnode if the bytes stored in hfs do not match
1437                 // the bytes in the cnp.  See this radar:
1438                 //    <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1439                 // for more details.
1440                 //
1441                 vnode_update_identity (vp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, VNODE_UPDATE_NAME);
1442         }
1443
1444         /*
1445          * Tag resource fork vnodes as needing an VNOP_INACTIVE
1446          * so that any deferred removes (open unlinked files)
1447          * have the chance to process the resource fork.
1448          */
1449         if (VNODE_IS_RSRC(vp)) {
1450                 int err;
1451
1452                 KERNEL_DEBUG_CONSTANT(HFSDBG_GETNEWVNODE, VM_KERNEL_ADDRPERM(cp->c_vp), VM_KERNEL_ADDRPERM(cp->c_rsrc_vp), 0, 0, 0);
1453
1454                 /* Force VL_NEEDINACTIVE on this vnode */
1455                 err = vnode_ref(vp);
1456                 if (err == 0) {
1457                         vnode_rele(vp);
1458                 }
1459         }
1460         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1461
1462         /*
1463          * Stop tracking an active hot file.
1464          */
1465         if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile && !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
1466                 (void) hfs_removehotfile(vp);
1467         }
1468
1469 #if CONFIG_PROTECT
1470         /* Initialize the cp data structures. The key should be in place now. */
1471         if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
1472                 cp_entry_init(cp, mp);
1473         }
1474 #endif
1475
1476         *vpp = vp;
1477         retval = 0;
1478
1479 gnv_exit:
1480         if (provided_vp) {
1481                 /* Release our empty vnode if it was not used */
1482                 vnode_put (provided_vp);
1483         }
1484         return retval;
1485 }
1486
1487
1488 static void
1489 hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *cp)
1490 {
1491 #if QUOTA
1492         int i;
1493
1494         for (i = 0; i < MAXQUOTAS; i++) {
1495                 if (cp->c_dquot[i] != NODQUOT) {
1496                         dqreclaim(cp->c_dquot[i]);
1497                         cp->c_dquot[i] = NODQUOT;
1498                 }
1499         }
1500 #endif /* QUOTA */
1501
1502         /*
1503          * If the descriptor has a name then release it
1504          */
1505         if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
1506                 const char *nameptr;
1507
1508                 nameptr = (const char *) cp->c_desc.cd_nameptr;
1509                 cp->c_desc.cd_nameptr = 0;
1510                 cp->c_desc.cd_flags &= ~CD_HASBUF;
1511                 cp->c_desc.cd_namelen = 0;
1512                 vfs_removename(nameptr);
1513         }
1514
1515         /*
1516          * We only call this function if we are in hfs_vnop_reclaim and
1517          * attempting to reclaim a cnode with only one live fork.  Because the vnode
1518          * went through reclaim, any future attempts to use this item will have to
1519          * go through lookup again, which will need to create a new vnode.  Thus,
1520          * destroying the locks below is safe.
1521          */
1522
1523         lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
1524         lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
1525 #if HFS_COMPRESSION
1526         if (cp->c_decmp) {
1527                 decmpfs_cnode_destroy(cp->c_decmp);
1528                 FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
1529         }
1530 #endif
1531 #if CONFIG_PROTECT
1532         cp_entry_destroy(hfsmp, cp->c_cpentry);
1533         cp->c_cpentry = NULL;
1534 #else
1535         (void)hfsmp;    // Prevent compiler warning
1536 #endif
1537
1538         bzero(cp, sizeof(struct cnode));
1539         FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
1540 }
1541
1542
1543 /*
1544  * hfs_valid_cnode
1545  *
1546  * This function is used to validate data that is stored in-core against what is contained
1547  * in the catalog.  Common uses include validating that the parent-child relationship still exist
1548  * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1549  * the point of the check.
1550  */
1551 int
1552 hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
1553                 cnid_t cnid, struct cat_attr *cattr, int *error)
1554 {
1555         struct cat_attr attr;
1556         struct cat_desc cndesc;
1557         int stillvalid = 0;
1558         int lockflags;
1559
1560         /* System files are always valid */
1561         if (cnid < kHFSFirstUserCatalogNodeID) {
1562                 *error = 0;
1563                 return (1);
1564         }
1565
1566         /* XXX optimization:  check write count in dvp */
1567
1568         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1569
1570         if (dvp && cnp) {
1571                 int lookup = 0;
1572                 struct cat_fork fork;
1573                 bzero(&cndesc, sizeof(cndesc));
1574                 cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
1575                 cndesc.cd_namelen = cnp->cn_namelen;
1576                 cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
1577                 cndesc.cd_hint = VTOC(dvp)->c_childhint;
1578
1579                 /*
1580                  * We have to be careful when calling cat_lookup.  The result argument
1581                  * 'attr' may get different results based on whether or not you ask
1582                  * for the filefork to be supplied as output.  This is because cat_lookupbykey
1583                  * will attempt to do basic validation/smoke tests against the resident
1584                  * extents if there are no overflow extent records, but it needs someplace
1585                  * in memory to store the on-disk fork structures.
1586                  *
1587                  * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1588                  * do the same here, to verify that block count differences are not
1589                  * due to calling the function with different styles.  cat_lookupbykey
1590                  * will request the volume be fsck'd if there is true on-disk corruption
1591                  * where the number of blocks does not match the number generated by
1592                  * summing the number of blocks in the resident extents.
1593                  */
1594
1595                 lookup = cat_lookup (hfsmp, &cndesc, 0, 0, NULL, &attr, &fork, NULL);
1596
1597                 if ((lookup == 0) && (cnid == attr.ca_fileid)) {
1598                         stillvalid = 1;
1599                         *error = 0;
1600                 }
1601                 else {
1602                         *error = ENOENT;
1603                 }
1604
1605                 /*
1606                  * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1607                  * race.  Specifically, if there is no vnode/cnode pair for the directory entry
1608                  * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
1609                  * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1610                  * changing in between the time we do the cat_lookup there and the time we re-grab the
1611                  * catalog lock above to do another cat_lookup.
1612                  *
1613                  * However, we need to check more than just the CNID and parent-child name relationships above.
1614                  * Hardlinks can suffer the same race in the following scenario:  Suppose we do a
1615                  * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have
1616                  * the cat_attr in hand (passed in above).  But in between then and now, the vnode was
1617                  * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1618                  * a chance to do anything.  This is possible if there are a lot of threads thrashing around
1619                  * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
1620                  * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1621                  * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
1622                  * already exists, as it does in the case of rename and delete.
1623                  */
1624                 if (stillvalid && cattr != NULL) {
1625                         if (cattr->ca_linkcount != attr.ca_linkcount) {
1626                                 stillvalid = 0;
1627                                 *error = ERECYCLE;
1628                                 goto notvalid;
1629                         }
1630
1631                         if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
1632                                 stillvalid = 0;
1633                                 *error = ERECYCLE;
1634                                 goto notvalid;
1635                         }
1636
1637                         if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
1638                                 stillvalid = 0;
1639                                 *error = ERECYCLE;
1640                                 goto notvalid;
1641                         }
1642
1643                         if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
1644                                 stillvalid = 0;
1645                                 *error = ERECYCLE;
1646                                 goto notvalid;
1647                         }
1648                 }
1649         } else {
1650                 if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
1651                         stillvalid = 1;
1652                         *error = 0;
1653                 }
1654                 else {
1655                         *error = ENOENT;
1656                 }
1657         }
1658 notvalid:
1659         hfs_systemfile_unlock(hfsmp, lockflags);
1660
1661         return (stillvalid);
1662 }
1663
1664
1665 /*
1666  * Per HI and Finder requirements, HFS should add in the
1667  * date/time that a particular directory entry was added
1668  * to the containing directory.
1669  * This is stored in the extended Finder Info for the
1670  * item in question.
1671  *
1672  * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1673  * We must ignore user attempts to set this part of the finderinfo, and
1674  * so we need to save a local copy of the date added, write in the user
1675  * finderinfo, then stuff the value back in.
1676  */
1677 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
1678         u_int8_t *finfo = NULL;
1679
1680         /* overlay the FinderInfo to the correct pointer, and advance */
1681         finfo = (u_int8_t*)attrp->ca_finderinfo;
1682         finfo = finfo + 16;
1683
1684         /*
1685          * Make sure to write it out as big endian, since that's how
1686          * finder info is defined.
1687          *
1688          * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1689          */
1690         if (S_ISREG(attrp->ca_mode)) {
1691                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1692                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1693                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1694         }
1695         else if (S_ISDIR(attrp->ca_mode)) {
1696                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1697                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1698                                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1699         }
1700         /* If it were neither directory/file, then we'd bail out */
1701         return;
1702 }
1703
1704 static u_int32_t
1705 hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode)
1706 {
1707         const uint8_t *finfo = NULL;
1708         u_int32_t dateadded = 0;
1709
1710
1711
1712         /* overlay the FinderInfo to the correct pointer, and advance */
1713         finfo = finderinfo + 16;
1714
1715         /*
1716          * FinderInfo is written out in big endian... make sure to convert it to host
1717          * native before we use it.
1718          */
1719         if (S_ISREG(mode)) {
1720                 const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
1721                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1722         }
1723         else if (S_ISDIR(mode)) {
1724                 const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo;
1725                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1726         }
1727
1728         return dateadded;
1729 }
1730
1731 u_int32_t
1732 hfs_get_dateadded(struct cnode *cp)
1733 {
1734         if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
1735                 /* Date added was never set.  Return 0. */
1736                 return (0);
1737         }
1738
1739         return (hfs_get_dateadded_internal((u_int8_t*)cp->c_finderinfo,
1740             cp->c_attr.ca_mode));
1741 }
1742
1743 u_int32_t
1744 hfs_get_dateadded_from_blob(const uint8_t *finderinfo, mode_t mode)
1745 {
1746         return (hfs_get_dateadded_internal(finderinfo, mode));
1747 }
1748
1749 /*
1750  * Per HI and Finder requirements, HFS maintains a "write/generation
1751  * count" for each file that is incremented on any write & pageout.
1752  * It should start at 1 to reserve "0" as a special value.  If it
1753  * should ever wrap around, it will skip using 0.
1754  *
1755  * Note that finderinfo is manipulated in hfs_vnop_setxattr and care
1756  * is and should be taken to ignore user attempts to set the part of
1757  * the finderinfo that records the generation counter.
1758  *
1759  * Any change to the generation counter *must* not be visible before
1760  * the change that caused it (for obvious reasons), and given the
1761  * limitations of our current architecture, the change to the
1762  * generation counter may occur some time afterwards (particularly in
1763  * the case where a file is mapped writable---more on that below).
1764  *
1765  * We make no guarantees about the consistency of a file.  In other
1766  * words, a reader that is operating concurrently with a writer might
1767  * see some, but not all of writer's changes, and the generation
1768  * counter will *not* necessarily tell you this has happened.  To
1769  * enforce consistency, clients must make their own arrangements
1770  * e.g. use file locking.
1771  *
1772  * We treat files that are mapped writable as a special case: when
1773  * that happens, clients requesting the generation count will be told
1774  * it has a generation count of zero and they use that knowledge as a
1775  * hint that the file is changing and it therefore might be prudent to
1776  * wait until it is no longer mapped writable.  Clients should *not*
1777  * rely on this behaviour however; we might decide that it's better
1778  * for us to publish the fact that a file is mapped writable via
1779  * alternate means and return the generation counter when it is mapped
1780  * writable as it still has some, albeit limited, use.  We reserve the
1781  * right to make this change.
1782  *
1783  * Lastly, it's important to realise that because data and metadata
1784  * take different paths through the system, it's possible upon crash
1785  * or sudden power loss and after a restart, that a change may be
1786  * visible to the rest of the system without a corresponding change to
1787  * the generation counter.  The reverse may also be true, but for all
1788  * practical applications this shouldn't be an issue.
1789  */
1790 void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) {
1791         u_int8_t *finfo = NULL;
1792
1793         /* overlay the FinderInfo to the correct pointer, and advance */
1794         finfo = (u_int8_t*)attrp->ca_finderinfo;
1795         finfo = finfo + 16;
1796
1797         /*
1798          * Make sure to write it out as big endian, since that's how
1799          * finder info is defined.
1800          *
1801          * Generation count is only supported for files.
1802          */
1803         if (S_ISREG(attrp->ca_mode)) {
1804                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1805                 extinfo->write_gen_counter = OSSwapHostToBigInt32(gencount);
1806         }
1807
1808         /* If it were neither directory/file, then we'd bail out */
1809         return;
1810 }
1811
1812 /*
1813  * Increase the gen count by 1; if it wraps around to 0, increment by
1814  * two.  The cnode *must* be locked exclusively by the caller.
1815  *
1816  * You may think holding the lock is unnecessary because we only need
1817  * to change the counter, but consider this sequence of events: thread
1818  * A calls hfs_incr_gencount and the generation counter is 2 upon
1819  * entry.  A context switch occurs and thread B increments the counter
1820  * to 3, thread C now gets the generation counter (for whatever
1821  * purpose), and then another thread makes another change and the
1822  * generation counter is incremented again---it's now 4.  Now thread A
1823  * continues and it sets the generation counter back to 3.  So you can
1824  * see, thread C would miss the change that caused the generation
1825  * counter to increment to 4 and for this reason the cnode *must*
1826  * always be locked exclusively.
1827  */
1828 uint32_t hfs_incr_gencount (struct cnode *cp) {
1829         u_int8_t *finfo = NULL;
1830         u_int32_t gcount = 0;
1831
1832         /* overlay the FinderInfo to the correct pointer, and advance */
1833         finfo = (u_int8_t*)cp->c_finderinfo;
1834         finfo = finfo + 16;
1835
1836         /*
1837          * FinderInfo is written out in big endian... make sure to convert it to host
1838          * native before we use it.
1839          *
1840          * NOTE: the write_gen_counter is stored in the same location in both the
1841          *       FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
1842          *       last 32-bit word) so it is safe to have one code path here.
1843          */
1844         if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode)) {
1845                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1846                 gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
1847
1848                 /* Was it zero to begin with (file originated in 10.8 or earlier?) */
1849                 if (gcount == 0) {
1850                         gcount++;
1851                 }
1852
1853                 /* now bump it */
1854                 gcount++;
1855
1856                 /* Did it wrap around ? */
1857                 if (gcount == 0) {
1858                         gcount++;
1859                 }
1860                 extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount);
1861
1862                 SET(cp->c_flag, C_MINOR_MOD);
1863         }
1864         else {
1865                 gcount = 0;
1866         }
1867
1868         return gcount;
1869 }
1870
1871 /*
1872  * There is no need for any locks here (other than an iocount on an
1873  * associated vnode) because reading and writing an aligned 32 bit
1874  * integer should be atomic on all platforms we support.
1875  */
1876 static u_int32_t
1877 hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode)
1878 {
1879         const uint8_t *finfo = NULL;
1880         u_int32_t gcount = 0;
1881
1882         /* overlay the FinderInfo to the correct pointer, and advance */
1883         finfo = finderinfo;
1884         finfo = finfo + 16;
1885
1886         /*
1887          * FinderInfo is written out in big endian... make sure to convert it to host
1888          * native before we use it.
1889          *
1890          * NOTE: the write_gen_counter is stored in the same location in both the
1891          *       FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
1892          *       last 32-bit word) so it is safe to have one code path here.
1893          */
1894         if (S_ISDIR(mode) || S_ISREG(mode)) {
1895                 const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
1896                 gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
1897
1898                 /*
1899                  * Is it zero?  File might originate in 10.8 or earlier. We lie and bump it to 1,
1900                  * since the incrementer code is able to handle this case and will double-increment
1901                  * for us.
1902                  */
1903                 if (gcount == 0) {
1904                         gcount++;
1905                 }
1906         }
1907
1908         return gcount;
1909 }
1910
1911 /* Getter for the gen count */
1912 u_int32_t hfs_get_gencount (struct cnode *cp) {
1913         return hfs_get_gencount_internal(cp->c_finderinfo, cp->c_attr.ca_mode);
1914 }
1915
1916 /* Getter for the gen count from a buffer (currently pointer to finderinfo)*/
1917 u_int32_t hfs_get_gencount_from_blob (const uint8_t *finfoblob, mode_t mode) {
1918         return hfs_get_gencount_internal(finfoblob, mode);
1919 }
1920
1921 void hfs_clear_might_be_dirty_flag(cnode_t *cp)
1922 {
1923         /*
1924          * If we're about to touch both mtime and ctime, we can clear the
1925          * C_MIGHT_BE_DIRTY_FROM_MAPPING since we can guarantee that
1926          * subsequent page-outs can only be for data made dirty before
1927          * now.
1928          */
1929         CLR(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING);
1930 }
1931
1932 /*
1933  * Touch cnode times based on c_touch_xxx flags
1934  *
1935  * cnode must be locked exclusive
1936  *
1937  * This will also update the volume modify time
1938  */
1939 void
1940 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
1941 {
1942         vfs_context_t ctx;
1943
1944         if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY) || ISSET(cp->c_flag, C_NOEXISTS)) {
1945                 cp->c_touch_acctime = FALSE;
1946                 cp->c_touch_chgtime = FALSE;
1947                 cp->c_touch_modtime = FALSE;
1948                 CLR(cp->c_flag, C_NEEDS_DATEADDED);
1949                 return;
1950         }
1951 #if CONFIG_HFS_STD
1952         else if (hfsmp->hfs_flags & HFS_STANDARD) {
1953         /* HFS Standard doesn't support access times */
1954                 cp->c_touch_acctime = FALSE;
1955         }
1956 #endif
1957
1958         ctx = vfs_context_current();
1959         /*
1960          * Skip access time updates if:
1961          *      . MNT_NOATIME is set
1962          *      . a file system freeze is in progress
1963          *      . a file system resize is in progress
1964          *      . the vnode associated with this cnode is marked for rapid aging
1965          */
1966         if (cp->c_touch_acctime) {
1967                 if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
1968                     hfsmp->hfs_freeze_state != HFS_THAWED ||
1969                     (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
1970                     (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
1971
1972                         cp->c_touch_acctime = FALSE;
1973                 }
1974         }
1975         if (cp->c_touch_acctime || cp->c_touch_chgtime ||
1976                 cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
1977                 struct timeval tv;
1978                 int touchvol = 0;
1979
1980                 if (cp->c_touch_modtime && cp->c_touch_chgtime)
1981                         hfs_clear_might_be_dirty_flag(cp);
1982
1983                 microtime(&tv);
1984
1985                 if (cp->c_touch_acctime) {
1986                         /*
1987                          * When the access time is the only thing changing, we
1988                          * won't necessarily write it to disk immediately.  We
1989                          * only do the atime update at vnode recycle time, when
1990                          * fsync is called or when there's another reason to write
1991                          * to the metadata.
1992                          */
1993                         cp->c_atime = tv.tv_sec;
1994                         cp->c_touch_acctime = FALSE;
1995                 }
1996                 if (cp->c_touch_modtime) {
1997                         cp->c_touch_modtime = FALSE;
1998                         time_t new_time = tv.tv_sec;
1999 #if CONFIG_HFS_STD
2000                         /*
2001                          * HFS dates that WE set must be adjusted for DST
2002                          */
2003                         if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
2004                                 new_time += 3600;
2005                         }
2006 #endif
2007                         if (cp->c_mtime != new_time) {
2008                                 cp->c_mtime = new_time;
2009                                 cp->c_flag |= C_MINOR_MOD;
2010                                 touchvol = 1;
2011                         }
2012                 }
2013                 if (cp->c_touch_chgtime) {
2014                         cp->c_touch_chgtime = FALSE;
2015                         if (cp->c_ctime != tv.tv_sec) {
2016                                 cp->c_ctime = tv.tv_sec;
2017                                 cp->c_flag |= C_MINOR_MOD;
2018                                 touchvol = 1;
2019                         }
2020                 }
2021
2022                 if (cp->c_flag & C_NEEDS_DATEADDED) {
2023                         hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
2024                         cp->c_flag |= C_MINOR_MOD;
2025                         /* untwiddle the bit */
2026                         cp->c_flag &= ~C_NEEDS_DATEADDED;
2027                         touchvol = 1;
2028                 }
2029
2030                 /* Touch the volume modtime if needed */
2031                 if (touchvol) {
2032                         hfs_note_header_minor_change(hfsmp);
2033                         HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
2034                 }
2035         }
2036 }
2037
2038 // Use this if you don't want to check the return code
2039 void hfs_lock_always(cnode_t *cp, enum hfs_locktype locktype)
2040 {
2041         hfs_lock(cp, locktype, HFS_LOCK_ALWAYS);
2042 }
2043
2044 /*
2045  * Lock a cnode.
2046  * N.B. If you add any failure cases, *make* sure hfs_lock_always works
2047  */
2048 int
2049 hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2050 {
2051         thread_t thread = current_thread();
2052
2053         if (cp->c_lockowner == thread) {
2054                 /*
2055                  * Only the extents and bitmap files support lock recursion
2056                  * here.  The other system files support lock recursion in
2057                  * hfs_systemfile_lock.  Eventually, we should change to
2058                  * handle recursion solely in hfs_systemfile_lock.
2059                  */
2060                 if ((cp->c_fileid == kHFSExtentsFileID) ||
2061                     (cp->c_fileid == kHFSAllocationFileID)) {
2062                         cp->c_syslockcount++;
2063                 } else {
2064                         panic("hfs_lock: locking against myself!");
2065                 }
2066         } else if (locktype == HFS_SHARED_LOCK) {
2067                 lck_rw_lock_shared(&cp->c_rwlock);
2068                 cp->c_lockowner = HFS_SHARED_OWNER;
2069
2070         } else { /* HFS_EXCLUSIVE_LOCK */
2071                 lck_rw_lock_exclusive(&cp->c_rwlock);
2072                 cp->c_lockowner = thread;
2073
2074                 /* Only the extents and bitmap files support lock recursion. */
2075                 if ((cp->c_fileid == kHFSExtentsFileID) ||
2076                     (cp->c_fileid == kHFSAllocationFileID)) {
2077                         cp->c_syslockcount = 1;
2078                 }
2079         }
2080
2081 #ifdef HFS_CHECK_LOCK_ORDER
2082         /*
2083          * Regular cnodes (non-system files) cannot be locked
2084          * while holding the journal lock or a system file lock.
2085          */
2086         if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
2087             ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
2088                 vnode_t vp = NULLVP;
2089
2090                 /* Find corresponding vnode. */
2091                 if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
2092                         vp = cp->c_vp;
2093                 } else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
2094                         vp = cp->c_rsrc_vp;
2095                 }
2096                 if (vp != NULLVP) {
2097                         struct hfsmount *hfsmp = VTOHFS(vp);
2098
2099                         if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
2100                                 /* This will eventually be a panic here. */
2101                                 printf("hfs_lock: bad lock order (cnode after journal)\n");
2102                         }
2103                         if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
2104                                 panic("hfs_lock: bad lock order (cnode after catalog)");
2105                         }
2106                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
2107                                 panic("hfs_lock: bad lock order (cnode after attribute)");
2108                         }
2109                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
2110                                 panic("hfs_lock: bad lock order (cnode after extents)");
2111                         }
2112                 }
2113         }
2114 #endif /* HFS_CHECK_LOCK_ORDER */
2115
2116         /*
2117          * Skip cnodes for regular files that no longer exist
2118          * (marked deleted, catalog entry gone).
2119          */
2120         if (((flags & HFS_LOCK_ALLOW_NOEXISTS) == 0) &&
2121             ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
2122             (cp->c_flag & C_NOEXISTS)) {
2123                 hfs_unlock(cp);
2124                 return (ENOENT);
2125         }
2126         return (0);
2127 }
2128
2129 bool hfs_lock_upgrade(cnode_t *cp)
2130 {
2131         if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock)) {
2132                 cp->c_lockowner = current_thread();
2133                 return true;
2134         } else
2135                 return false;
2136 }
2137
2138 /*
2139  * Lock a pair of cnodes.
2140  */
2141 int
2142 hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfs_locktype locktype)
2143 {
2144         struct cnode *first, *last;
2145         int error;
2146
2147         /*
2148          * If cnodes match then just lock one.
2149          */
2150         if (cp1 == cp2) {
2151                 return hfs_lock(cp1, locktype, HFS_LOCK_DEFAULT);
2152         }
2153
2154         /*
2155          * Lock in cnode address order.
2156          */
2157         if (cp1 < cp2) {
2158                 first = cp1;
2159                 last = cp2;
2160         } else {
2161                 first = cp2;
2162                 last = cp1;
2163         }
2164
2165         if ( (error = hfs_lock(first, locktype, HFS_LOCK_DEFAULT))) {
2166                 return (error);
2167         }
2168         if ( (error = hfs_lock(last, locktype, HFS_LOCK_DEFAULT))) {
2169                 hfs_unlock(first);
2170                 return (error);
2171         }
2172         return (0);
2173 }
2174
2175 /*
2176  * Check ordering of two cnodes. Return true if they are are in-order.
2177  */
2178 static int
2179 hfs_isordered(struct cnode *cp1, struct cnode *cp2)
2180 {
2181         if (cp1 == cp2)
2182                 return (0);
2183         if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
2184                 return (1);
2185         if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
2186                 return (0);
2187         /*
2188          * Locking order is cnode address order.
2189          */
2190         return (cp1 < cp2);
2191 }
2192
2193 /*
2194  * Acquire 4 cnode locks.
2195  *   - locked in cnode address order (lesser address first).
2196  *   - all or none of the locks are taken
2197  *   - only one lock taken per cnode (dup cnodes are skipped)
2198  *   - some of the cnode pointers may be null
2199  */
2200 int
2201 hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
2202              struct cnode *cp4, enum hfs_locktype locktype, struct cnode **error_cnode)
2203 {
2204         struct cnode * a[3];
2205         struct cnode * b[3];
2206         struct cnode * list[4];
2207         struct cnode * tmp;
2208         int i, j, k;
2209         int error;
2210         if (error_cnode) {
2211                 *error_cnode = NULL;
2212         }
2213
2214         if (hfs_isordered(cp1, cp2)) {
2215                 a[0] = cp1; a[1] = cp2;
2216         } else {
2217                 a[0] = cp2; a[1] = cp1;
2218         }
2219         if (hfs_isordered(cp3, cp4)) {
2220                 b[0] = cp3; b[1] = cp4;
2221         } else {
2222                 b[0] = cp4; b[1] = cp3;
2223         }
2224         a[2] = (struct cnode *)0xffffffff;  /* sentinel value */
2225         b[2] = (struct cnode *)0xffffffff;  /* sentinel value */
2226
2227         /*
2228          * Build the lock list, skipping over duplicates
2229          */
2230         for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
2231                 tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
2232                 if (k == 0 || tmp != list[k-1])
2233                         list[k++] = tmp;
2234         }
2235
2236         /*
2237          * Now we can lock using list[0 - k].
2238          * Skip over NULL entries.
2239          */
2240         for (i = 0; i < k; ++i) {
2241                 if (list[i])
2242                         if ((error = hfs_lock(list[i], locktype, HFS_LOCK_DEFAULT))) {
2243                                 /* Only stuff error_cnode if requested */
2244                                 if (error_cnode) {
2245                                         *error_cnode = list[i];
2246                                 }
2247                                 /* Drop any locks we acquired. */
2248                                 while (--i >= 0) {
2249                                         if (list[i])
2250                                                 hfs_unlock(list[i]);
2251                                 }
2252                                 return (error);
2253                         }
2254         }
2255         return (0);
2256 }
2257
2258
2259 /*
2260  * Unlock a cnode.
2261  */
2262 void
2263 hfs_unlock(struct cnode *cp)
2264 {
2265         vnode_t rvp = NULLVP;
2266         vnode_t vp = NULLVP;
2267         u_int32_t c_flag;
2268
2269         /*
2270          * Only the extents and bitmap file's support lock recursion.
2271          */
2272         if ((cp->c_fileid == kHFSExtentsFileID) ||
2273             (cp->c_fileid == kHFSAllocationFileID)) {
2274                 if (--cp->c_syslockcount > 0) {
2275                         return;
2276                 }
2277         }
2278
2279         const thread_t thread = current_thread();
2280
2281         if (cp->c_lockowner == thread) {
2282                 c_flag = cp->c_flag;
2283
2284                 // If we have the truncate lock, we must defer the puts
2285                 if (cp->c_truncatelockowner == thread) {
2286                         if (ISSET(c_flag, C_NEED_DVNODE_PUT)
2287                                 && !cp->c_need_dvnode_put_after_truncate_unlock) {
2288                                 CLR(c_flag, C_NEED_DVNODE_PUT);
2289                                 cp->c_need_dvnode_put_after_truncate_unlock = true;
2290                         }
2291                         if (ISSET(c_flag, C_NEED_RVNODE_PUT)
2292                                 && !cp->c_need_rvnode_put_after_truncate_unlock) {
2293                                 CLR(c_flag, C_NEED_RVNODE_PUT);
2294                                 cp->c_need_rvnode_put_after_truncate_unlock = true;
2295                         }
2296                 }
2297
2298                 CLR(cp->c_flag, (C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE
2299                                                  | C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT));
2300
2301                 if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
2302                 vp = cp->c_vp;
2303                 }
2304                 if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
2305                 rvp = cp->c_rsrc_vp;
2306                 }
2307
2308             cp->c_lockowner = NULL;
2309             lck_rw_unlock_exclusive(&cp->c_rwlock);
2310         } else {
2311             lck_rw_unlock_shared(&cp->c_rwlock);
2312         }
2313
2314         /* Perform any vnode post processing after cnode lock is dropped. */
2315         if (vp) {
2316                 if (c_flag & C_NEED_DATA_SETSIZE) {
2317                         ubc_setsize(vp, VTOF(vp)->ff_size);
2318 #if HFS_COMPRESSION
2319                         /*
2320                          * If this is a compressed file, we need to reset the
2321                          * compression state.  We will have set the size to zero
2322                          * above and it will get fixed up later (in exactly the
2323                          * same way that new vnodes are fixed up).  Note that we
2324                          * should only be able to get here if the truncate lock is
2325                          * held exclusively and so we do the reset when that's
2326                          * unlocked.
2327                          */
2328                         decmpfs_cnode *dp = VTOCMP(vp);
2329                         if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN)
2330                                 cp->c_need_decmpfs_reset = true;
2331 #endif
2332                 }
2333                 if (c_flag & C_NEED_DVNODE_PUT)
2334                         vnode_put(vp);
2335         }
2336         if (rvp) {
2337                 if (c_flag & C_NEED_RSRC_SETSIZE)
2338                         ubc_setsize(rvp, VTOF(rvp)->ff_size);
2339                 if (c_flag & C_NEED_RVNODE_PUT)
2340                         vnode_put(rvp);
2341         }
2342 }
2343
2344 /*
2345  * Unlock a pair of cnodes.
2346  */
2347 void
2348 hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
2349 {
2350         hfs_unlock(cp1);
2351         if (cp2 != cp1)
2352                 hfs_unlock(cp2);
2353 }
2354
2355 /*
2356  * Unlock a group of cnodes.
2357  */
2358 void
2359 hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
2360 {
2361         struct cnode * list[4];
2362         int i, k = 0;
2363
2364         if (cp1) {
2365                 hfs_unlock(cp1);
2366                 list[k++] = cp1;
2367         }
2368         if (cp2) {
2369                 for (i = 0; i < k; ++i) {
2370                         if (list[i] == cp2)
2371                                 goto skip1;
2372                 }
2373                 hfs_unlock(cp2);
2374                 list[k++] = cp2;
2375         }
2376 skip1:
2377         if (cp3) {
2378                 for (i = 0; i < k; ++i) {
2379                         if (list[i] == cp3)
2380                                 goto skip2;
2381                 }
2382                 hfs_unlock(cp3);
2383                 list[k++] = cp3;
2384         }
2385 skip2:
2386         if (cp4) {
2387                 for (i = 0; i < k; ++i) {
2388                         if (list[i] == cp4)
2389                                 return;
2390                 }
2391                 hfs_unlock(cp4);
2392         }
2393 }
2394
2395
2396 /*
2397  * Protect a cnode against a truncation.
2398  *
2399  * Used mainly by read/write since they don't hold the
2400  * cnode lock across calls to the cluster layer.
2401  *
2402  * The process doing a truncation must take the lock
2403  * exclusive. The read/write processes can take it
2404  * shared.  The locktype argument is the same as supplied to
2405  * hfs_lock.
2406  */
2407 void
2408 hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2409 {
2410         thread_t thread = current_thread();
2411
2412         if (cp->c_truncatelockowner == thread) {
2413                 /*
2414                  * Ignore grabbing the lock if it the current thread already
2415                  * holds exclusive lock.
2416                  *
2417                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2418                  * the file does not change sizes while we are paging in.  However,
2419                  * we may already hold the lock exclusive due to another
2420                  * VNOP from earlier in the call stack.  So if we already hold
2421                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2422                  * it's in the recursive case.
2423                  */
2424                 if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
2425                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2426                 }
2427         } else if (locktype == HFS_SHARED_LOCK) {
2428                 lck_rw_lock_shared(&cp->c_truncatelock);
2429                 cp->c_truncatelockowner = HFS_SHARED_OWNER;
2430         } else { /* HFS_EXCLUSIVE_LOCK */
2431                 lck_rw_lock_exclusive(&cp->c_truncatelock);
2432                 cp->c_truncatelockowner = thread;
2433         }
2434 }
2435
2436 bool hfs_truncate_lock_upgrade(struct cnode *cp)
2437 {
2438         assert(cp->c_truncatelockowner == HFS_SHARED_OWNER);
2439         if (!lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock))
2440                 return false;
2441         cp->c_truncatelockowner = current_thread();
2442         return true;
2443 }
2444
2445 void hfs_truncate_lock_downgrade(struct cnode *cp)
2446 {
2447         assert(cp->c_truncatelockowner == current_thread());
2448         lck_rw_lock_exclusive_to_shared(&cp->c_truncatelock);
2449         cp->c_truncatelockowner = HFS_SHARED_OWNER;
2450 }
2451
2452 /*
2453  * Attempt to get the truncate lock.  If it cannot be acquired, error out.
2454  * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2455  * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2456  * temporarily need to disable V2 semantics.
2457  */
2458 int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2459 {
2460         thread_t thread = current_thread();
2461         boolean_t didlock = false;
2462
2463         if (cp->c_truncatelockowner == thread) {
2464                 /*
2465                  * Ignore grabbing the lock if the current thread already
2466                  * holds exclusive lock.
2467                  *
2468                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2469                  * the file does not change sizes while we are paging in.  However,
2470                  * we may already hold the lock exclusive due to another
2471                  * VNOP from earlier in the call stack.  So if we already hold
2472                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2473                  * it's in the recursive case.
2474                  */
2475                 if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
2476                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2477                 }
2478         } else if (locktype == HFS_SHARED_LOCK) {
2479                 didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
2480                 if (didlock) {
2481                         cp->c_truncatelockowner = HFS_SHARED_OWNER;
2482                 }
2483         } else { /* HFS_EXCLUSIVE_LOCK */
2484                 didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
2485                 if (didlock) {
2486                         cp->c_truncatelockowner = thread;
2487                 }
2488         }
2489
2490         return didlock;
2491 }
2492
2493
2494 /*
2495  * Unlock the truncate lock, which protects against size changes.
2496  *
2497  * If HFS_LOCK_SKIP_IF_EXCLUSIVE flag was set, it means that a previous
2498  * hfs_lock_truncate() might have skipped grabbing a lock because
2499  * the current thread was already holding the lock exclusive and
2500  * we may need to return from this function without actually unlocking
2501  * the truncate lock.
2502  */
2503 void
2504 hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags)
2505 {
2506         thread_t thread = current_thread();
2507
2508         /*
2509          * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current
2510          * lock owner of the truncate lock is our current thread, then
2511          * we must have skipped taking the lock earlier by in
2512          * hfs_lock_truncate() by setting HFS_LOCK_SKIP_IF_EXCLUSIVE in the
2513          * flags (as the current thread was current lock owner).
2514          *
2515          * If HFS_LOCK_SKIP_IF_EXCLUSIVE is not set (most of the time) then
2516          * we check the lockowner field to infer whether the lock was taken
2517          * exclusively or shared in order to know what underlying lock
2518          * routine to call.
2519          */
2520         if (flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) {
2521                 if (cp->c_truncatelockowner == thread) {
2522                         return;
2523                 }
2524         }
2525
2526         /* HFS_LOCK_EXCLUSIVE */
2527         if (thread == cp->c_truncatelockowner) {
2528                 vnode_t vp = NULL, rvp = NULL;
2529
2530                 /*
2531                  * If there are pending set sizes, the cnode lock should be dropped
2532                  * first.
2533                  */
2534 #if DEBUG
2535                 assert(!(cp->c_lockowner == thread
2536                                  && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE)));
2537 #elif DEVELOPMENT
2538                 if (cp->c_lockowner == thread
2539                         && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE)) {
2540                         printf("hfs: hfs_unlock_truncate called with C_NEED_DATA/RSRC_SETSIZE set (caller: 0x%llx)\n",
2541                                    (uint64_t)VM_KERNEL_UNSLIDE(__builtin_return_address(0)));
2542                 }
2543 #endif
2544
2545                 if (cp->c_need_dvnode_put_after_truncate_unlock) {
2546                         vp = cp->c_vp;
2547                         cp->c_need_dvnode_put_after_truncate_unlock = false;
2548                 }
2549                 if (cp->c_need_rvnode_put_after_truncate_unlock) {
2550                         rvp = cp->c_rsrc_vp;
2551                         cp->c_need_rvnode_put_after_truncate_unlock = false;
2552                 }
2553
2554 #if HFS_COMPRESSION
2555                 bool reset_decmpfs = cp->c_need_decmpfs_reset;
2556                 cp->c_need_decmpfs_reset = false;
2557 #endif
2558
2559                 cp->c_truncatelockowner = NULL;
2560                 lck_rw_unlock_exclusive(&cp->c_truncatelock);
2561
2562 #if HFS_COMPRESSION
2563                 if (reset_decmpfs) {
2564                         decmpfs_cnode *dp = cp->c_decmp;
2565                         if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN)
2566                                 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2567                 }
2568 #endif
2569
2570                 // Do the puts now
2571                 if (vp)
2572                         vnode_put(vp);
2573                 if (rvp)
2574                         vnode_put(rvp);
2575         } else { /* HFS_LOCK_SHARED */
2576                 lck_rw_unlock_shared(&cp->c_truncatelock);
2577         }
2578 }