bsd/hfs/hfs_cnode.c

   1 /*
   2  * Copyright (c) 2002-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/proc.h>
  31 #include <sys/vnode.h>
  32 #include <sys/mount.h>
  33 #include <sys/kernel.h>
  34 #include <sys/malloc.h>
  35 #include <sys/time.h>
  36 #include <sys/ubc.h>
  37 #include <sys/quota.h>
  38 #include <sys/kdebug.h>
  39 #include <libkern/OSByteOrder.h>
  40 #include <sys/buf_internal.h>
  41
  42 #include <kern/locks.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45 #include <miscfs/fifofs/fifo.h>
  46
  47 #include <hfs/hfs.h>
  48 #include <hfs/hfs_catalog.h>
  49 #include <hfs/hfs_cnode.h>
  50 #include <hfs/hfs_quota.h>
  51 #include <hfs/hfs_format.h>
  52 #include <hfs/hfs_kdebug.h>
  53
  54 extern int prtactive;
  55
  56 extern lck_attr_t *  hfs_lock_attr;
  57 extern lck_grp_t *  hfs_mutex_group;
  58 extern lck_grp_t *  hfs_rwlock_group;
  59
  60 static void  hfs_reclaim_cnode(struct cnode *);
  61 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
  62 static int hfs_isordered(struct cnode *, struct cnode *);
  63
  64 extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
  65
  66
  67 __inline__ int hfs_checkdeleted (struct cnode *cp) {
  68         return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
  69 }
  70
  71 /*
  72  * Function used by a special fcntl() that decorates a cnode/vnode that
  73  * indicates it is backing another filesystem, like a disk image.
  74  *
  75  * the argument 'val' indicates whether or not to set the bit in the cnode flags
  76  *
  77  * Returns non-zero on failure. 0 on success
  78  */
  79 int hfs_set_backingstore (struct vnode *vp, int val) {
  80         struct cnode *cp = NULL;
  81         int err = 0;
  82
  83         cp = VTOC(vp);
  84         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
  85                 return EINVAL;
  86         }
  87
  88         /* lock the cnode */
  89         err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
  90         if (err) {
  91                 return err;
  92         }
  93
  94         if (val) {
  95                 cp->c_flag |= C_BACKINGSTORE;
  96         }
  97         else {
  98                 cp->c_flag &= ~C_BACKINGSTORE;
  99         }
 100
 101         /* unlock everything */
 102         hfs_unlock (cp);
 103
 104         return err;
 105 }
 106
 107 /*
 108  * Function used by a special fcntl() that check to see if a cnode/vnode
 109  * indicates it is backing another filesystem, like a disk image.
 110  *
 111  * the argument 'val' is an output argument for whether or not the bit is set
 112  *
 113  * Returns non-zero on failure. 0 on success
 114  */
 115
 116 int hfs_is_backingstore (struct vnode *vp, int *val) {
 117         struct cnode *cp = NULL;
 118         int err = 0;
 119
 120         if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
 121                 *val = 0;
 122                 return 0;
 123         }
 124
 125         cp = VTOC(vp);
 126
 127         /* lock the cnode */
 128         err = hfs_lock (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
 129         if (err) {
 130                 return err;
 131         }
 132
 133         if (cp->c_flag & C_BACKINGSTORE) {
 134                 *val = 1;
 135         }
 136         else {
 137                 *val = 0;
 138         }
 139
 140         /* unlock everything */
 141         hfs_unlock (cp);
 142
 143         return err;
 144 }
 145
 146
 147 /*
 148  * hfs_cnode_teardown
 149  *
 150  * This is an internal function that is invoked from both hfs_vnop_inactive
 151  * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
 152  * being recycled and reclaimed, it is important that we do any post-processing
 153  * necessary for the cnode in both places.  Important tasks include things such as
 154  * releasing the blocks from an open-unlinked file when all references to it have dropped,
 155  * and handling resource forks separately from data forks.
 156  *
 157  * Note that we take only the vnode as an argument here (rather than the cnode).
 158  * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
 159  * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
 160  * vnode we need to reclaim if only the cnode is supplied.
 161  *
 162  * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
 163  * if both are invoked right after the other.  In the second call, most of this function's if()
 164  * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
 165  * As a quick check to see if this function is necessary, determine if the cnode is already
 166  * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that
 167  * remain for cnodes marked in such a fashion is to teardown their fork references and
 168  * release all directory hints and hardlink origins.  However, both of those are done
 169  * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
 170  * entry is no longer there.
 171  *
 172  * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
 173  * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
 174  * is totally gone by that point.
 175  *
 176  * Assumes that both truncate and cnode locks for 'cp' are held.
 177  */
 178 static
 179 int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim)
 180 {
 181         int forkcount = 0;
 182         enum vtype v_type;
 183         struct cnode *cp;
 184         int error = 0;
 185         int started_tr = 0;
 186         struct hfsmount *hfsmp = VTOHFS(vp);
 187         struct proc *p = vfs_context_proc(ctx);
 188         int truncated = 0;
 189     cat_cookie_t cookie;
 190     int cat_reserve = 0;
 191     int lockflags;
 192         int ea_error = 0;
 193
 194         v_type = vnode_vtype(vp);
 195         cp = VTOC(vp);
 196
 197         if (cp->c_datafork) {
 198                 ++forkcount;
 199         }
 200         if (cp->c_rsrcfork) {
 201                 ++forkcount;
 202         }
 203
 204
 205         /*
 206          * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
 207          * The dirty regions would have already been synced to disk, so informing UBC
 208          * that they can toss the pages doesn't help anyone at this point.
 209          *
 210          * Note that this is a performance problem if the vnode goes straight to reclaim
 211          * (and skips inactive), since there would be no way for anyone to notify the UBC
 212          * that all pages in this file are basically useless.
 213          */
 214         if (reclaim == 0) {
 215                 /*
 216                  * Check whether we are tearing down a cnode with only one remaining fork.
 217                  * If there are blocks in its filefork, then we need to unlock the cnode
 218                  * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
 219                  * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
 220                  * panic.
 221                  */
 222
 223                 if ((v_type == VREG || v_type == VLNK) &&
 224                         (cp->c_flag & C_DELETED) &&
 225                         (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
 226                         hfs_unlock(cp);
 227                         /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
 228                         ubc_setsize(vp, 0);
 229                         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 230                 }
 231         }
 232
 233         /*
 234          * Push file data out for normal files that haven't been evicted from
 235          * the namespace.  We only do this if this function was not called from reclaim,
 236          * because by that point the UBC information has been totally torn down.
 237          *
 238          * There should also be no way that a normal file that has NOT been deleted from
 239          * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
 240          * when the file becomes open-unlinked.
 241          */
 242         if ((v_type == VREG) &&
 243                 (!ISSET(cp->c_flag, C_DELETED)) &&
 244                 (!ISSET(cp->c_flag, C_NOEXISTS)) &&
 245                 (VTOF(vp)->ff_blocks) &&
 246                 (reclaim == 0)) {
 247                 /*
 248                  * Note that if content protection is enabled, then this is where we will
 249                  * attempt to issue IOs for all dirty regions of this file.
 250                  *
 251                  * If we're called from hfs_vnop_inactive, all this means is at the time
 252                  * the logic for deciding to call this function, there were not any lingering
 253                  * mmap/fd references for this file.  However, there is nothing preventing the system
 254                  * from creating a new reference in between the time that logic was checked
 255                  * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
 256                  * that there aren't any references is during vnop_reclaim.
 257                  */
 258                 hfs_filedone(vp, ctx, 0);
 259         }
 260
 261         /*
 262          * We're holding the cnode lock now.  Stall behind any shadow BPs that may
 263          * be involved with this vnode if it is a symlink.  We don't want to allow
 264          * the blocks that we're about to release to be put back into the pool if there
 265          * is pending I/O to them.
 266          */
 267         if (v_type == VLNK) {
 268                 /*
 269                  * This will block if the asynchronous journal flush is in progress.
 270                  * If this symlink is not being renamed over and doesn't have any open FDs,
 271                  * then we'll remove it from the journal's bufs below in kill_block.
 272                  */
 273                 buf_wait_for_shadow_io (vp, 0);
 274         }
 275
 276         /*
 277          * Remove any directory hints or cached origins
 278          */
 279         if (v_type == VDIR) {
 280                 hfs_reldirhints(cp, 0);
 281         }
 282         if (cp->c_flag & C_HARDLINK) {
 283                 hfs_relorigins(cp);
 284         }
 285
 286         /*
 287          * This check is slightly complicated.  We should only truncate data
 288          * in very specific cases for open-unlinked files.  This is because
 289          * we want to ensure that the resource fork continues to be available
 290          * if the caller has the data fork open.  However, this is not symmetric;
 291          * someone who has the resource fork open need not be able to access the data
 292          * fork once the data fork has gone inactive.
 293          *
 294          * If we're the last fork, then we have cleaning up to do.
 295          *
 296          * A) last fork, and vp == c_vp
 297          *      Truncate away own fork data. If rsrc fork is not in core, truncate it too.
 298          *
 299          * B) last fork, and vp == c_rsrc_vp
 300          *      Truncate ourselves, assume data fork has been cleaned due to C).
 301          *
 302          * If we're not the last fork, then things are a little different:
 303          *
 304          * C) not the last fork, vp == c_vp
 305          *      Truncate ourselves.  Once the file has gone out of the namespace,
 306          *      it cannot be further opened.  Further access to the rsrc fork may
 307          *      continue, however.
 308          *
 309          * D) not the last fork, vp == c_rsrc_vp
 310          *      Don't enter the block below, just clean up vnode and push it out of core.
 311          */
 312
 313         if ((v_type == VREG || v_type == VLNK) &&
 314                         (cp->c_flag & C_DELETED) &&
 315                         ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
 316
 317                 /* Truncate away our own fork data. (Case A, B, C above) */
 318                 if (VTOF(vp)->ff_blocks != 0) {
 319
 320                         /*
 321                          * SYMLINKS only:
 322                          *
 323                          * Encapsulate the entire change (including truncating the link) in
 324                          * nested transactions if we are modifying a symlink, because we know that its
 325                          * file length will be at most 4k, and we can fit both the truncation and
 326                          * any relevant bitmap changes into a single journal transaction.  We also want
 327                          * the kill_block code to execute in the same transaction so that any dirty symlink
 328                          * blocks will not be written. Otherwise, rely on
 329                          * hfs_truncate doing its own transactions to ensure that we don't blow up
 330                          * the journal.
 331                          */
 332                         if ((started_tr == 0) && (v_type == VLNK)) {
 333                                 if (hfs_start_transaction(hfsmp) != 0) {
 334                                         error = EINVAL;
 335                                         goto out;
 336                                 }
 337                                 else {
 338                                         started_tr = 1;
 339                                 }
 340                         }
 341
 342                         /*
 343                          * At this point, we have decided that this cnode is
 344                          * suitable for full removal.  We are about to deallocate
 345                          * its blocks and remove its entry from the catalog.
 346                          * If it was a symlink, then it's possible that the operation
 347                          * which created it is still in the current transaction group
 348                          * due to coalescing.  Take action here to kill the data blocks
 349                          * of the symlink out of the journal before moving to
 350                          * deallocate the blocks.  We need to be in the middle of
 351                          * a transaction before calling buf_iterate like this.
 352                          *
 353                          * Note: we have to kill any potential symlink buffers out of
 354                          * the journal prior to deallocating their blocks.  This is so
 355                          * that we don't race with another thread that may be doing an
 356                          * an allocation concurrently and pick up these blocks. It could
 357                          * generate I/O against them which could go out ahead of our journal
 358                          * transaction.
 359                          */
 360
 361                         if (hfsmp->jnl && vnode_islnk(vp)) {
 362                                 buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
 363                         }
 364
 365
 366                         /*
 367                          * This truncate call (and the one below) is fine from VNOP_RECLAIM's
 368                          * context because we're only removing blocks, not zero-filling new
 369                          * ones.  The C_DELETED check above makes things much simpler.
 370                          */
 371                         error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx);
 372                         if (error) {
 373                                 goto out;
 374                         }
 375                         truncated = 1;
 376
 377                         /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
 378                         if (started_tr) {
 379                                 hfs_end_transaction(hfsmp);
 380                                 started_tr = 0;
 381                         }
 382
 383                 }
 384
 385                 /*
 386                  * Truncate away the resource fork, if we represent the data fork and
 387                  * it is the last fork.  That means, by definition, the rsrc fork is not in
 388                  * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
 389                  * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
 390                  * to get rid of the resource fork's data. Note that because we are holding the
 391                  * cnode lock, it is impossible for a competing thread to create the resource fork
 392                  * vnode from underneath us while we do this.
 393                  *
 394                  * This is invoked via case A above only.
 395                  */
 396                 if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
 397                         struct cat_lookup_buffer *lookup_rsrc = NULL;
 398                         struct cat_desc *desc_ptr = NULL;
 399                         lockflags = 0;
 400
 401                         MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
 402                         if (lookup_rsrc == NULL) {
 403                                 printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
 404                                 error = ENOMEM;
 405                                 goto out;
 406                         }
 407                         else {
 408                                 bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
 409                         }
 410
 411                         if (cp->c_desc.cd_namelen == 0) {
 412                                 /* Initialize the rsrc descriptor for lookup if necessary*/
 413                                 MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
 414
 415                                 lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
 416                                 lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
 417                                 lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
 418                                 lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;
 419
 420                                 desc_ptr = &lookup_rsrc->lookup_desc;
 421                         }
 422                         else {
 423                                 desc_ptr = &cp->c_desc;
 424                         }
 425
 426                         lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 427
 428                         error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL,
 429                                         (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
 430
 431                         hfs_systemfile_unlock (hfsmp, lockflags);
 432
 433                         if (error) {
 434                                 FREE (lookup_rsrc, M_TEMP);
 435                                 goto out;
 436                         }
 437
 438                         /*
 439                          * Make the filefork in our temporary struct look like a real
 440                          * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
 441                          */
 442                         rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
 443                         lookup_rsrc->lookup_fork.ff_cp = cp;
 444
 445                         /*
 446                          * If there were no errors, then we have the catalog's fork information
 447                          * for the resource fork in question.  Go ahead and delete the data in it now.
 448                          */
 449
 450                         error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
 451                         FREE(lookup_rsrc, M_TEMP);
 452
 453                         if (error) {
 454                                 goto out;
 455                         }
 456
 457                         /*
 458                          * This fileid's resource fork extents have now been fully deleted on-disk
 459                          * and this CNID is no longer valid. At this point, we should be able to
 460                          * zero out cp->c_blocks to indicate there is no data left in this file.
 461                          */
 462                         cp->c_blocks = 0;
 463                 }
 464         }
 465
 466         /*
 467          * If we represent the last fork (or none in the case of a dir),
 468          * and the cnode has become open-unlinked,
 469          * AND it has EA's, then we need to get rid of them.
 470          *
 471          * Note that this must happen outside of any other transactions
 472          * because it starts/ends its own transactions and grabs its
 473          * own locks.  This is to prevent a file with a lot of attributes
 474          * from creating a transaction that is too large (which panics).
 475          */
 476     if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
 477                 (cp->c_flag & C_DELETED) &&
 478                 (forkcount <= 1)) {
 479
 480         ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
 481     }
 482
 483
 484         /*
 485          * If the cnode represented an open-unlinked file, then now
 486          * actually remove the cnode's catalog entry and release all blocks
 487          * it may have been using.
 488          */
 489     if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
 490         /*
 491          * Mark cnode in transit so that no one can get this
 492          * cnode from cnode hash.
 493          */
 494                 // hfs_chash_mark_in_transit(hfsmp, cp);
 495                 // XXXdbg - remove the cnode from the hash table since it's deleted
 496                 //          otherwise someone could go to sleep on the cnode and not
 497                 //          be woken up until this vnode gets recycled which could be
 498                 //          a very long time...
 499         hfs_chashremove(hfsmp, cp);
 500
 501         cp->c_flag |= C_NOEXISTS;   // XXXdbg
 502         cp->c_rdev = 0;
 503
 504         if (started_tr == 0) {
 505             if (hfs_start_transaction(hfsmp) != 0) {
 506                                 error = EINVAL;
 507                                 goto out;
 508             }
 509             started_tr = 1;
 510         }
 511
 512         /*
 513          * Reserve some space in the Catalog file.
 514          */
 515         if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
 516             goto out;
 517         }
 518         cat_reserve = 1;
 519
 520         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
 521
 522         if (cp->c_blocks > 0) {
 523             printf("hfs_inactive: deleting non-empty%sfile %d, "
 524                    "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
 525                    (int)cp->c_fileid, (int)cp->c_blocks);
 526         }
 527
 528                 //
 529         // release the name pointer in the descriptor so that
 530         // cat_delete() will use the file-id to do the deletion.
 531         // in the case of hard links this is imperative (in the
 532         // case of regular files the fileid and cnid are the
 533         // same so it doesn't matter).
 534         //
 535         cat_releasedesc(&cp->c_desc);
 536
 537         /*
 538          * The descriptor name may be zero,
 539          * in which case the fileid is used.
 540          */
 541         error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
 542
 543         if (error && truncated && (error != ENXIO)) {
 544             printf("hfs_inactive: couldn't delete a truncated file!");
 545         }
 546
 547         /* Update HFS Private Data dir */
 548         if (error == 0) {
 549             hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
 550             if (vnode_isdir(vp)) {
 551                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
 552             }
 553             (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
 554                                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
 555         }
 556
 557         hfs_systemfile_unlock(hfsmp, lockflags);
 558
 559         if (error) {
 560                         goto out;
 561                 }
 562
 563 #if QUOTA
 564         if (hfsmp->hfs_flags & HFS_QUOTAS)
 565             (void)hfs_chkiq(cp, -1, NOCRED, 0);
 566 #endif /* QUOTA */
 567
 568         /* Already set C_NOEXISTS at the beginning of this block */
 569         cp->c_flag &= ~C_DELETED;
 570         cp->c_touch_chgtime = TRUE;
 571         cp->c_touch_modtime = TRUE;
 572
 573         if (error == 0)
 574             hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
 575     }
 576
 577         /*
 578      * A file may have had delayed allocations, in which case hfs_update
 579      * would not have updated the catalog record (cat_update).  We need
 580      * to do that now, before we lose our fork data.  We also need to
 581      * force the update, or hfs_update will again skip the cat_update.
 582          *
 583          * If the file has C_NOEXISTS set, then we can skip the hfs_update call
 584          * because the catalog entry has already been removed.  There would be no point
 585      * to looking up the entry in the catalog to modify it when we already know it's gone
 586          */
 587     if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
 588                 ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime ||
 589                  cp->c_touch_chgtime || cp->c_touch_modtime)) {
 590
 591                         if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
 592                                 cp->c_flag |= C_FORCEUPDATE;
 593                         }
 594                         hfs_update(vp, 0);
 595                 }
 596
 597         /*
 598          * Since we are about to finish what might be an inactive call, propagate
 599          * any remaining modified or touch bits from the cnode to the vnode.  This
 600          * serves as a hint to vnode recycling that we shouldn't recycle this vnode
 601          * synchronously.
 602          */
 603         if (ISSET(cp->c_flag, C_MODIFIED) || ISSET(cp->c_flag, C_FORCEUPDATE) ||
 604                 cp->c_touch_acctime || cp->c_touch_chgtime ||
 605                 cp->c_touch_modtime || ISSET(cp->c_flag, C_NEEDS_DATEADDED) ||
 606                 ISSET(cp->c_flag, C_DELETED)) {
 607                 vnode_setdirty(vp);
 608         } else {
 609                 vnode_cleardirty(vp);
 610         }
 611
 612 out:
 613     if (cat_reserve)
 614         cat_postflight(hfsmp, &cookie, p);
 615
 616     // XXXdbg - have to do this because a goto could have come here
 617     if (started_tr) {
 618         hfs_end_transaction(hfsmp);
 619         started_tr = 0;
 620     }
 621
 622 #if 0
 623 #if CONFIG_PROTECT
 624         /*
 625          * cnode truncate lock and cnode lock are both held exclusive here.
 626          *
 627          * Go ahead and flush the keys out if this cnode is the last fork
 628          * and it is not class F.  Class F keys should not be purged because they only
 629          * exist in memory and have no persistent keys.  Only do this
 630          * if we haven't already done it yet (maybe a vnode skipped inactive
 631          * and went straight to reclaim).  This function gets called from both reclaim and
 632          * inactive, so it will happen first in inactive if possible.
 633          *
 634          * We need to be mindful that all pending IO for this file has already been
 635          * issued and completed before we bzero out the key.  This is because
 636          * if it isn't, tossing the key here could result in garbage IO being
 637          * written (by using the bzero'd key) if the writes are happening asynchronously.
 638          *
 639          * In addition, class A files may have already been purged due to the
 640          * lock event occurring.
 641          */
 642         if (forkcount == 1) {
 643                 struct cprotect *entry = cp->c_cpentry;
 644                 if ((entry) && ( CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_F)) {
 645                         if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
 646                                 cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
 647                                 bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
 648                                 bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
 649                         }
 650                 }
 651         }
 652 #endif
 653 #endif
 654
 655         return error;
 656 }
 657
 658
 659 /*
 660  * hfs_vnop_inactive
 661  *
 662  * The last usecount on the vnode has gone away, so we need to tear down
 663  * any remaining data still residing in the cnode.  If necessary, write out
 664  * remaining blocks or delete the cnode's entry in the catalog.
 665  */
 666 int
 667 hfs_vnop_inactive(struct vnop_inactive_args *ap)
 668 {
 669         struct vnode *vp = ap->a_vp;
 670         struct cnode *cp;
 671         struct hfsmount *hfsmp = VTOHFS(vp);
 672         struct proc *p = vfs_context_proc(ap->a_context);
 673         int error = 0;
 674         int took_trunc_lock = 0;
 675         enum vtype v_type;
 676
 677         v_type = vnode_vtype(vp);
 678         cp = VTOC(vp);
 679
 680         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
 681             (hfsmp->hfs_freezing_proc == p)) {
 682                 error = 0;
 683                 goto inactive_done;
 684         }
 685
 686         /*
 687          * For safety, do NOT call vnode_recycle from inside this function.  This can cause
 688          * problems in the following scenario:
 689          *
 690          * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
 691          *
 692          * If we're being invoked as a result of a reclaim that was already in-flight, then we
 693          * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
 694          * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
 695          * try to re-enter reclaim again and panic.
 696          *
 697          * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
 698          * 1) last usecount goes away on the vnode (vnode_rele)
 699          * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
 700          *              vnode_recycle called (vnode_put)
 701          * 3) vclean by way of reclaim
 702          *
 703          * In this function we would generally want to call vnode_recycle to speed things
 704          * along to ensure that we don't leak blocks due to open-unlinked files.  However, by
 705          * virtue of being in this function already, we can call hfs_cnode_teardown, which
 706          * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
 707          * there's no entry in the catalog and no backing store anymore.  If that's the case,
 708          * then we really don't care all that much when the vnode actually goes through reclaim.
 709          * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
 710          * unlinked file in the first place should have already called vnode_recycle on the vnode
 711          * to guarantee that it would go through reclaim in a speedy way.
 712          */
 713
 714         if (cp->c_flag & C_NOEXISTS) {
 715                 /*
 716                  * If the cnode has already had its cat entry removed, then
 717                  * just skip to the end. We don't need to do anything here.
 718                  */
 719                 error = 0;
 720                 goto inactive_done;
 721         }
 722
 723         if ((v_type == VREG || v_type == VLNK)) {
 724                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 725                 took_trunc_lock = 1;
 726         }
 727
 728         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 729
 730         /*
 731          * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
 732          * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
 733          */
 734         error = hfs_cnode_teardown (vp, ap->a_context, 0);
 735
 736     /*
 737      * Drop the truncate lock before unlocking the cnode
 738      * (which can potentially perform a vnode_put and
 739      * recycle the vnode which in turn might require the
 740      * truncate lock)
 741      */
 742         if (took_trunc_lock) {
 743             hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
 744         }
 745
 746         hfs_unlock(cp);
 747
 748 inactive_done:
 749
 750         return error;
 751 }
 752
 753
 754 /*
 755  * File clean-up (zero fill and shrink peof).
 756  */
 757
 758 int
 759 hfs_filedone(struct vnode *vp, vfs_context_t context,
 760                          hfs_file_done_opts_t opts)
 761 {
 762         struct cnode *cp;
 763         struct filefork *fp;
 764         struct hfsmount *hfsmp;
 765         struct rl_entry *invalid_range;
 766         off_t leof;
 767         u_int32_t blks, blocksize;
 768         /* flags for zero-filling sparse ranges */
 769         int cluster_flags = IO_CLOSE;
 770         int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 771
 772         cp = VTOC(vp);
 773         fp = VTOF(vp);
 774         hfsmp = VTOHFS(vp);
 775         leof = fp->ff_size;
 776
 777         if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
 778                 return (0);
 779
 780         if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) {
 781 #if CONFIG_PROTECT
 782                 /*
 783                  * Figure out if we need to do synchronous IO.
 784                  *
 785                  * If the file represents a content-protected file, we may need
 786                  * to issue synchronous IO when we dispatch to the cluster layer.
 787                  * If we didn't, then the IO would go out to the disk asynchronously.
 788                  * If the vnode hits the end of inactive before getting reclaimed, the
 789                  * content protection keys would be wiped/bzeroed out, and we'd end up
 790                  * trying to issue the IO with an invalid key.  This will lead to file
 791                  * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
 792                  * have completed (though they may be in the track cache).
 793                  */
 794                 if (cp_fs_protected(VTOVFS(vp))) {
 795                         cluster_flags |= IO_SYNC;
 796                         cluster_zero_flags |= IO_SYNC;
 797                 }
 798 #endif
 799
 800                 hfs_unlock(cp);
 801                 (void) cluster_push(vp, cluster_flags);
 802                 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 803         }
 804
 805         /*
 806          * Explicitly zero out the areas of file
 807          * that are currently marked invalid.
 808          */
 809         while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
 810                 off_t start = invalid_range->rl_start;
 811                 off_t end = invalid_range->rl_end;
 812
 813                 /* The range about to be written must be validated
 814                  * first, so that VNOP_BLOCKMAP() will return the
 815                  * appropriate mapping for the cluster code:
 816                  */
 817                 rl_remove(start, end, &fp->ff_invalidranges);
 818
 819                 hfs_unlock(cp);
 820                 (void) cluster_write(vp, (struct uio *) 0,
 821                                      leof, end + 1, start, (off_t)0, cluster_zero_flags);
 822                 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 823                 cp->c_flag |= C_MODIFIED;
 824         }
 825         cp->c_flag &= ~C_ZFWANTSYNC;
 826         cp->c_zftimeout = 0;
 827         blocksize = VTOVCB(vp)->blockSize;
 828         blks = leof / blocksize;
 829         if (((off_t)blks * (off_t)blocksize) != leof)
 830                 blks++;
 831         /*
 832          * Shrink the peof to the smallest size neccessary to contain the leof.
 833          */
 834         if (blks < fp->ff_blocks) {
 835                 (void) hfs_truncate(vp, leof, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, context);
 836         }
 837
 838         if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) {
 839                 hfs_unlock(cp);
 840                 (void) cluster_push(vp, cluster_flags);
 841                 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 842
 843                 /*
 844                  * If the hfs_truncate didn't happen to flush the vnode's
 845                  * information out to disk, force it to be updated now that
 846                  * all invalid ranges have been zero-filled and validated:
 847                  */
 848                 if (cp->c_flag & C_MODIFIED) {
 849                         hfs_update(vp, 0);
 850                 }
 851         }
 852
 853         return (0);
 854 }
 855
 856
 857 /*
 858  * Reclaim a cnode so that it can be used for other purposes.
 859  */
 860 int
 861 hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 862 {
 863         struct vnode *vp = ap->a_vp;
 864         struct cnode *cp;
 865         struct filefork *fp = NULL;
 866         struct filefork *altfp = NULL;
 867         struct hfsmount *hfsmp = VTOHFS(vp);
 868         vfs_context_t ctx = ap->a_context;
 869         int reclaim_cnode = 0;
 870         int err = 0;
 871         enum vtype v_type;
 872
 873         v_type = vnode_vtype(vp);
 874         cp = VTOC(vp);
 875
 876         /*
 877          * We don't take the truncate lock since by the time reclaim comes along,
 878          * all dirty pages have been synced and nobody should be competing
 879          * with us for this thread.
 880          */
 881         (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 882
 883         /*
 884          * Sync to disk any remaining data in the cnode/vnode.  This includes
 885          * a call to hfs_update if the cnode has outbound data.
 886          *
 887          * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
 888          * because the catalog entry for this cnode is already gone.
 889          */
 890         if (!ISSET(cp->c_flag, C_NOEXISTS)) {
 891                 err = hfs_cnode_teardown(vp, ctx, 1);
 892         }
 893
 894         /*
 895          * Keep track of an inactive hot file.
 896          */
 897         if (!vnode_isdir(vp) &&
 898             !vnode_issystem(vp) &&
 899             !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) {
 900                 (void) hfs_addhotfile(vp);
 901         }
 902         vnode_removefsref(vp);
 903
 904         /*
 905          * Find file fork for this vnode (if any)
 906          * Also check if another fork is active
 907          */
 908         if (cp->c_vp == vp) {
 909                 fp = cp->c_datafork;
 910                 altfp = cp->c_rsrcfork;
 911
 912                 cp->c_datafork = NULL;
 913                 cp->c_vp = NULL;
 914         } else if (cp->c_rsrc_vp == vp) {
 915                 fp = cp->c_rsrcfork;
 916                 altfp = cp->c_datafork;
 917
 918                 cp->c_rsrcfork = NULL;
 919                 cp->c_rsrc_vp = NULL;
 920         } else {
 921                 panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
 922         }
 923         /*
 924          * On the last fork, remove the cnode from its hash chain.
 925          */
 926         if (altfp == NULL) {
 927                 /* If we can't remove it then the cnode must persist! */
 928                 if (hfs_chashremove(hfsmp, cp) == 0)
 929                         reclaim_cnode = 1;
 930                 /*
 931                  * Remove any directory hints
 932                  */
 933                 if (vnode_isdir(vp)) {
 934                         hfs_reldirhints(cp, 0);
 935                 }
 936
 937                 if(cp->c_flag & C_HARDLINK) {
 938                         hfs_relorigins(cp);
 939                 }
 940         }
 941         /* Release the file fork and related data */
 942         if (fp) {
 943                 /* Dump cached symlink data */
 944                 if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
 945                         FREE(fp->ff_symlinkptr, M_TEMP);
 946                 }
 947                 FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
 948         }
 949
 950         /*
 951          * If there was only one active fork then we can release the cnode.
 952          */
 953         if (reclaim_cnode) {
 954                 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
 955                 hfs_unlock(cp);
 956                 hfs_reclaim_cnode(cp);
 957         }
 958         else  {
 959                 /*
 960                  * cnode in use.  If it is a directory, it could have
 961                  * no live forks. Just release the lock.
 962                  */
 963                 hfs_unlock(cp);
 964         }
 965
 966         vnode_clearfsnode(vp);
 967         return (0);
 968 }
 969
 970
 971 extern int (**hfs_vnodeop_p) (void *);
 972 extern int (**hfs_specop_p)  (void *);
 973 #if FIFO
 974 extern int (**hfs_fifoop_p)  (void *);
 975 #endif
 976
 977 #if CONFIG_HFS_STD
 978 extern int (**hfs_std_vnodeop_p) (void *);
 979 #endif
 980
 981 /*
 982  * hfs_getnewvnode - get new default vnode
 983  *
 984  * The vnode is returned with an iocount and the cnode locked
 985  */
 986 int
 987 hfs_getnewvnode(
 988         struct hfsmount *hfsmp,
 989         struct vnode *dvp,
 990         struct componentname *cnp,
 991         struct cat_desc *descp,
 992         int flags,
 993         struct cat_attr *attrp,
 994         struct cat_fork *forkp,
 995         struct vnode **vpp,
 996         int *out_flags)
 997 {
 998         struct mount *mp = HFSTOVFS(hfsmp);
 999         struct vnode *vp = NULL;
1000         struct vnode **cvpp;
1001         struct vnode *tvp = NULLVP;
1002         struct cnode *cp = NULL;
1003         struct filefork *fp = NULL;
1004         int hfs_standard = 0;
1005         int retval;
1006         int issystemfile;
1007         int wantrsrc;
1008         int hflags = 0;
1009         struct vnode_fsparam vfsp;
1010         enum vtype vtype;
1011 #if QUOTA
1012         int i;
1013 #endif /* QUOTA */
1014
1015         hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
1016
1017         if (attrp->ca_fileid == 0) {
1018                 *vpp = NULL;
1019                 return (ENOENT);
1020         }
1021
1022 #if !FIFO
1023         if (IFTOVT(attrp->ca_mode) == VFIFO) {
1024                 *vpp = NULL;
1025                 return (ENOTSUP);
1026         }
1027 #endif /* !FIFO */
1028         vtype = IFTOVT(attrp->ca_mode);
1029         issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
1030         wantrsrc = flags & GNV_WANTRSRC;
1031
1032         /* Sanity check the vtype and mode */
1033         if (vtype == VBAD) {
1034                 /* Mark the FS as corrupt and bail out */
1035                 hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
1036                 return EINVAL;
1037         }
1038
1039         /* Zero out the out_flags */
1040         *out_flags = 0;
1041
1042 #ifdef HFS_CHECK_LOCK_ORDER
1043         /*
1044          * The only case were its permissible to hold the parent cnode
1045          * lock is during a create operation (hfs_makenode) or when
1046          * we don't need the cnode lock (GNV_SKIPLOCK).
1047          */
1048         if ((dvp != NULL) &&
1049             (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
1050             VTOC(dvp)->c_lockowner == current_thread()) {
1051                 panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
1052         }
1053 #endif /* HFS_CHECK_LOCK_ORDER */
1054
1055         /*
1056          * Get a cnode (new or existing)
1057          */
1058         cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc,
1059                                                         (flags & GNV_SKIPLOCK), out_flags, &hflags);
1060
1061         /*
1062          * If the id is no longer valid for lookups we'll get back a NULL cp.
1063          */
1064         if (cp == NULL) {
1065                 return (ENOENT);
1066         }
1067
1068         /*
1069          * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
1070          * descriptor in the cnode as needed if the cnode represents a hardlink.
1071          * We want the caller to get the most up-to-date copy of the descriptor
1072          * as possible. However, we only do anything here if there was a valid vnode.
1073          * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1074          * as it doesn't have a descriptor or cat_attr yet.
1075          *
1076          * If we are about to replace the descriptor with the user-supplied one, then validate
1077          * that the descriptor correctly acknowledges this item is a hardlink.  We could be
1078          * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1079          * result but the file was not yet a hardlink. With sufficient delay between there
1080          * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1081          * call below.  If the descriptor's CNID is the same as the fileID then it must
1082          * not yet have been a hardlink when the lookup occurred.
1083          */
1084
1085         if (!(hfs_checkdeleted(cp))) {
1086                 if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
1087                         /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1088                         if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
1089                                         (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
1090                                 if ((flags & GNV_SKIPLOCK) == 0) {
1091                                         /*
1092                                          * Then we took the lock. Drop it before calling
1093                                          * vnode_put, which may invoke hfs_vnop_inactive and need to take
1094                                          * the cnode lock again.
1095                                          */
1096                                         hfs_unlock(cp);
1097                                 }
1098
1099                                 /*
1100                                  * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1101                                  * force a re-drive in the lookup routine.
1102                                  * Drop the iocount on the vnode obtained from
1103                                  * chash_getcnode if needed.
1104                                  */
1105                                 if (*vpp != NULL) {
1106                                         vnode_put (*vpp);
1107                                         *vpp = NULL;
1108                                 }
1109
1110                                 /*
1111                                  * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1112                                  * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1113                                  * the hash code peeks at those fields without holding the cnode lock because
1114                                  * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
1115                                  * call above.  Since we're bailing out, unset whatever flags we just set, and
1116                                  * wake up all waiters for this cnode.
1117                                  */
1118                                 if (hflags) {
1119                                         hfs_chashwakeup(hfsmp, cp, hflags);
1120                                 }
1121
1122                                 *out_flags = GNV_CAT_ATTRCHANGED;
1123                                 return ERECYCLE;
1124                         }
1125                         else {
1126                                 /*
1127                                  * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1128                                  *
1129                                  * Replacing the descriptor here is fine because we looked up the item without
1130                                  * a vnode in hand before.  If a vnode existed, its identity must be attached to this
1131                                  * item.  We are not susceptible to the lookup fastpath issue at this point.
1132                                  */
1133                                 replace_desc(cp, descp);
1134
1135                                 /*
1136                                  * This item was a hardlink, and its name needed to be updated. By replacing the
1137                                  * descriptor above, we've now updated the cnode's internal representation of
1138                                  * its link ID/CNID, parent ID, and its name.  However, VFS must now be alerted
1139                                  * to the fact that this vnode now has a new parent, since we cannot guarantee
1140                                  * that the new link lived in the same directory as the alternative name for
1141                                  * this item.
1142                                  */
1143                                 if ((*vpp != NULL) && (cnp)) {
1144                                         /* we could be requesting the rsrc of a hardlink file... */
1145                                         vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash,
1146                                                         (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
1147                                 }
1148                         }
1149                 }
1150         }
1151
1152         /* Check if we found a matching vnode */
1153         if (*vpp != NULL) {
1154                 return (0);
1155         }
1156
1157         /*
1158          * If this is a new cnode then initialize it.
1159          */
1160         if (ISSET(cp->c_hflag, H_ALLOC)) {
1161                 lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
1162 #if HFS_COMPRESSION
1163                 cp->c_decmp = NULL;
1164 #endif
1165
1166                 /* Make sure its still valid (ie exists on disk). */
1167                 if (!(flags & GNV_CREATE)) {
1168                         int error = 0;
1169                         if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
1170                                 hfs_chash_abort(hfsmp, cp);
1171                                 if ((flags & GNV_SKIPLOCK) == 0) {
1172                                         hfs_unlock(cp);
1173                                 }
1174                                 hfs_reclaim_cnode(cp);
1175                                 *vpp = NULL;
1176                                 /*
1177                                  * If we hit this case, that means that the entry was there in the catalog when
1178                                  * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
1179                                  * that we checked the catalog and the time we went to get a vnode/cnode for it,
1180                                  * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
1181                                  * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1182                                  * an ENOENT.  To indicate to the caller that they should really double-check the
1183                                  * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1184                                  * in the output flags.
1185                                  */
1186                                 if (error == ENOENT) {
1187                                         *out_flags = GNV_CAT_DELETED;
1188                                         return ENOENT;
1189                                 }
1190
1191                                 /*
1192                                  * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1193                                  * this function as an argument because the catalog may have changed w.r.t hardlink
1194                                  * link counts and the firstlink field.  If that validation check fails, then let
1195                                  * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1196                                  */
1197                                 if (error == ERECYCLE) {
1198                                         *out_flags = GNV_CAT_ATTRCHANGED;
1199                                         return (ERECYCLE);
1200                                 }
1201                         }
1202                 }
1203                 bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
1204                 bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
1205
1206                 /* The name was inherited so clear descriptor state... */
1207                 descp->cd_namelen = 0;
1208                 descp->cd_nameptr = NULL;
1209                 descp->cd_flags &= ~CD_HASBUF;
1210
1211                 /* Tag hardlinks */
1212                 if ((vtype == VREG || vtype == VDIR) &&
1213                     ((descp->cd_cnid != attrp->ca_fileid) ||
1214                      (attrp->ca_recflags & kHFSHasLinkChainMask))) {
1215                         cp->c_flag |= C_HARDLINK;
1216                 }
1217                 /*
1218                  * Fix-up dir link counts.
1219                  *
1220                  * Earlier versions of Leopard used ca_linkcount for posix
1221                  * nlink support (effectively the sub-directory count + 2).
1222                  * That is now accomplished using the ca_dircount field with
1223                  * the corresponding kHFSHasFolderCountMask flag.
1224                  *
1225                  * For directories the ca_linkcount is the true link count,
1226                  * tracking the number of actual hardlinks to a directory.
1227                  *
1228                  * We only do this if the mount has HFS_FOLDERCOUNT set;
1229                  * at the moment, we only set that for HFSX volumes.
1230                  */
1231                 if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&
1232                     (vtype == VDIR) &&
1233                     !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
1234                     (cp->c_attr.ca_linkcount > 1)) {
1235                         if (cp->c_attr.ca_entries == 0)
1236                                 cp->c_attr.ca_dircount = 0;
1237                         else
1238                                 cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
1239
1240                         cp->c_attr.ca_linkcount = 1;
1241                         cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
1242                         if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
1243                                 cp->c_flag |= C_MODIFIED;
1244                 }
1245 #if QUOTA
1246                 if (hfsmp->hfs_flags & HFS_QUOTAS) {
1247                         for (i = 0; i < MAXQUOTAS; i++)
1248                                 cp->c_dquot[i] = NODQUOT;
1249                 }
1250 #endif /* QUOTA */
1251                 /* Mark the output flag that we're vending a new cnode */
1252                 *out_flags |= GNV_NEW_CNODE;
1253         }
1254
1255         if (vtype == VDIR) {
1256                 if (cp->c_vp != NULL)
1257                         panic("hfs_getnewvnode: orphaned vnode (data)");
1258                 cvpp = &cp->c_vp;
1259         } else {
1260                 if (forkp && attrp->ca_blocks < forkp->cf_blocks)
1261                         panic("hfs_getnewvnode: bad ca_blocks (too small)");
1262                 /*
1263                  * Allocate and initialize a file fork...
1264                  */
1265                 MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork),
1266                         M_HFSFORK, M_WAITOK);
1267                 fp->ff_cp = cp;
1268                 if (forkp)
1269                         bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
1270                 else
1271                         bzero(&fp->ff_data, sizeof(struct cat_fork));
1272                 rl_init(&fp->ff_invalidranges);
1273                 fp->ff_sysfileinfo = 0;
1274
1275                 if (wantrsrc) {
1276                         if (cp->c_rsrcfork != NULL)
1277                                 panic("hfs_getnewvnode: orphaned rsrc fork");
1278                         if (cp->c_rsrc_vp != NULL)
1279                                 panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1280                         cp->c_rsrcfork = fp;
1281                         cvpp = &cp->c_rsrc_vp;
1282                         if ( (tvp = cp->c_vp) != NULLVP )
1283                                 cp->c_flag |= C_NEED_DVNODE_PUT;
1284                 } else {
1285                         if (cp->c_datafork != NULL)
1286                                 panic("hfs_getnewvnode: orphaned data fork");
1287                         if (cp->c_vp != NULL)
1288                                 panic("hfs_getnewvnode: orphaned vnode (data)");
1289                         cp->c_datafork = fp;
1290                         cvpp = &cp->c_vp;
1291                         if ( (tvp = cp->c_rsrc_vp) != NULLVP)
1292                                 cp->c_flag |= C_NEED_RVNODE_PUT;
1293                 }
1294         }
1295         if (tvp != NULLVP) {
1296                 /*
1297                  * grab an iocount on the vnode we weren't
1298                  * interested in (i.e. we want the resource fork
1299                  * but the cnode already has the data fork)
1300                  * to prevent it from being
1301                  * recycled by us when we call vnode_create
1302                  * which will result in a deadlock when we
1303                  * try to take the cnode lock in hfs_vnop_fsync or
1304                  * hfs_vnop_reclaim... vnode_get can be called here
1305                  * because we already hold the cnode lock which will
1306                  * prevent the vnode from changing identity until
1307                  * we drop it.. vnode_get will not block waiting for
1308                  * a change of state... however, it will return an
1309                  * error if the current iocount == 0 and we've already
1310                  * started to terminate the vnode... we don't need/want to
1311                  * grab an iocount in the case since we can't cause
1312                  * the fileystem to be re-entered on this thread for this vp
1313                  *
1314                  * the matching vnode_put will happen in hfs_unlock
1315                  * after we've dropped the cnode lock
1316                  */
1317                 if ( vnode_get(tvp) != 0)
1318                         cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
1319         }
1320         vfsp.vnfs_mp = mp;
1321         vfsp.vnfs_vtype = vtype;
1322         vfsp.vnfs_str = "hfs";
1323         if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
1324                 vfsp.vnfs_dvp = NULL;  /* no parent for me! */
1325                 vfsp.vnfs_cnp = NULL;  /* no name for me! */
1326         } else {
1327                 vfsp.vnfs_dvp = dvp;
1328                 vfsp.vnfs_cnp = cnp;
1329         }
1330         vfsp.vnfs_fsnode = cp;
1331
1332         /*
1333          * Special Case HFS Standard VNOPs from HFS+, since
1334          * HFS standard is readonly/deprecated as of 10.6
1335          */
1336
1337 #if FIFO
1338         if (vtype == VFIFO )
1339                 vfsp.vnfs_vops = hfs_fifoop_p;
1340         else
1341 #endif
1342         if (vtype == VBLK || vtype == VCHR)
1343                 vfsp.vnfs_vops = hfs_specop_p;
1344 #if CONFIG_HFS_STD
1345         else if (hfs_standard)
1346                 vfsp.vnfs_vops = hfs_std_vnodeop_p;
1347 #endif
1348         else
1349                 vfsp.vnfs_vops = hfs_vnodeop_p;
1350
1351         if (vtype == VBLK || vtype == VCHR)
1352                 vfsp.vnfs_rdev = attrp->ca_rdev;
1353         else
1354                 vfsp.vnfs_rdev = 0;
1355
1356         if (forkp)
1357                 vfsp.vnfs_filesize = forkp->cf_size;
1358         else
1359                 vfsp.vnfs_filesize = 0;
1360
1361         vfsp.vnfs_flags = VNFS_ADDFSREF;
1362         if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
1363                 vfsp.vnfs_flags |= VNFS_NOCACHE;
1364
1365         /* Tag system files */
1366         vfsp.vnfs_marksystem = issystemfile;
1367
1368         /* Tag root directory */
1369         if (descp->cd_cnid == kHFSRootFolderID)
1370                 vfsp.vnfs_markroot = 1;
1371         else
1372                 vfsp.vnfs_markroot = 0;
1373
1374         if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) {
1375                 if (fp) {
1376                         if (fp == cp->c_datafork)
1377                                 cp->c_datafork = NULL;
1378                         else
1379                                 cp->c_rsrcfork = NULL;
1380
1381                         FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
1382                 }
1383                 /*
1384                  * If this is a newly created cnode or a vnode reclaim
1385                  * occurred during the attachment, then cleanup the cnode.
1386                  */
1387                 if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
1388                         hfs_chash_abort(hfsmp, cp);
1389                         hfs_reclaim_cnode(cp);
1390                 }
1391                 else {
1392                         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1393                         if ((flags & GNV_SKIPLOCK) == 0){
1394                                 hfs_unlock(cp);
1395                         }
1396                 }
1397                 *vpp = NULL;
1398                 return (retval);
1399         }
1400         vp = *cvpp;
1401         vnode_settag(vp, VT_HFS);
1402         if (cp->c_flag & C_HARDLINK) {
1403                 vnode_setmultipath(vp);
1404         }
1405         /*
1406          * Tag resource fork vnodes as needing an VNOP_INACTIVE
1407          * so that any deferred removes (open unlinked files)
1408          * have the chance to process the resource fork.
1409          */
1410         if (VNODE_IS_RSRC(vp)) {
1411                 int err;
1412
1413                 KERNEL_DEBUG_CONSTANT(HFSDBG_GETNEWVNODE, VM_KERNEL_ADDRPERM(cp->c_vp), VM_KERNEL_ADDRPERM(cp->c_rsrc_vp), 0, 0, 0);
1414
1415                 /* Force VL_NEEDINACTIVE on this vnode */
1416                 err = vnode_ref(vp);
1417                 if (err == 0) {
1418                         vnode_rele(vp);
1419                 }
1420         }
1421         hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1422
1423         /*
1424          * Stop tracking an active hot file.
1425          */
1426         if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
1427                 (void) hfs_removehotfile(vp);
1428         }
1429
1430 #if CONFIG_PROTECT
1431         /* Initialize the cp data structures. The key should be in place now. */
1432         if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
1433                 cp_entry_init(cp, mp);
1434         }
1435 #endif
1436
1437         *vpp = vp;
1438         return (0);
1439 }
1440
1441
1442 static void
1443 hfs_reclaim_cnode(struct cnode *cp)
1444 {
1445 #if QUOTA
1446         int i;
1447
1448         for (i = 0; i < MAXQUOTAS; i++) {
1449                 if (cp->c_dquot[i] != NODQUOT) {
1450                         dqreclaim(cp->c_dquot[i]);
1451                         cp->c_dquot[i] = NODQUOT;
1452                 }
1453         }
1454 #endif /* QUOTA */
1455
1456         /*
1457          * If the descriptor has a name then release it
1458          */
1459         if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
1460                 const char *nameptr;
1461
1462                 nameptr = (const char *) cp->c_desc.cd_nameptr;
1463                 cp->c_desc.cd_nameptr = 0;
1464                 cp->c_desc.cd_flags &= ~CD_HASBUF;
1465                 cp->c_desc.cd_namelen = 0;
1466                 vfs_removename(nameptr);
1467         }
1468
1469         /*
1470          * We only call this function if we are in hfs_vnop_reclaim and
1471          * attempting to reclaim a cnode with only one live fork.  Because the vnode
1472          * went through reclaim, any future attempts to use this item will have to
1473          * go through lookup again, which will need to create a new vnode.  Thus,
1474          * destroying the locks below is safe.
1475          */
1476
1477         lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
1478         lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
1479 #if HFS_COMPRESSION
1480         if (cp->c_decmp) {
1481                 decmpfs_cnode_destroy(cp->c_decmp);
1482                 FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
1483         }
1484 #endif
1485 #if CONFIG_PROTECT
1486         cp_entry_destroy(cp->c_cpentry);
1487         cp->c_cpentry = NULL;
1488 #endif
1489
1490
1491         bzero(cp, sizeof(struct cnode));
1492         FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
1493 }
1494
1495
1496 /*
1497  * hfs_valid_cnode
1498  *
1499  * This function is used to validate data that is stored in-core against what is contained
1500  * in the catalog.  Common uses include validating that the parent-child relationship still exist
1501  * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1502  * the point of the check.
1503  */
1504 int
1505 hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
1506                 cnid_t cnid, struct cat_attr *cattr, int *error)
1507 {
1508         struct cat_attr attr;
1509         struct cat_desc cndesc;
1510         int stillvalid = 0;
1511         int lockflags;
1512
1513         /* System files are always valid */
1514         if (cnid < kHFSFirstUserCatalogNodeID) {
1515                 *error = 0;
1516                 return (1);
1517         }
1518
1519         /* XXX optimization:  check write count in dvp */
1520
1521         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1522
1523         if (dvp && cnp) {
1524                 int lookup = 0;
1525                 struct cat_fork fork;
1526                 bzero(&cndesc, sizeof(cndesc));
1527                 cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
1528                 cndesc.cd_namelen = cnp->cn_namelen;
1529                 cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
1530                 cndesc.cd_hint = VTOC(dvp)->c_childhint;
1531
1532                 /*
1533                  * We have to be careful when calling cat_lookup.  The result argument
1534                  * 'attr' may get different results based on whether or not you ask
1535                  * for the filefork to be supplied as output.  This is because cat_lookupbykey
1536                  * will attempt to do basic validation/smoke tests against the resident
1537                  * extents if there are no overflow extent records, but it needs someplace
1538                  * in memory to store the on-disk fork structures.
1539                  *
1540                  * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1541                  * do the same here, to verify that block count differences are not
1542                  * due to calling the function with different styles.  cat_lookupbykey
1543                  * will request the volume be fsck'd if there is true on-disk corruption
1544                  * where the number of blocks does not match the number generated by
1545                  * summing the number of blocks in the resident extents.
1546                  */
1547
1548                 lookup = cat_lookup (hfsmp, &cndesc, 0, 0, NULL, &attr, &fork, NULL);
1549
1550                 if ((lookup == 0) && (cnid == attr.ca_fileid)) {
1551                         stillvalid = 1;
1552                         *error = 0;
1553                 }
1554                 else {
1555                         *error = ENOENT;
1556                 }
1557
1558                 /*
1559                  * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1560                  * race.  Specifically, if there is no vnode/cnode pair for the directory entry
1561                  * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
1562                  * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1563                  * changing in between the time we do the cat_lookup there and the time we re-grab the
1564                  * catalog lock above to do another cat_lookup.
1565                  *
1566                  * However, we need to check more than just the CNID and parent-child name relationships above.
1567                  * Hardlinks can suffer the same race in the following scenario:  Suppose we do a
1568                  * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have
1569                  * the cat_attr in hand (passed in above).  But in between then and now, the vnode was
1570                  * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1571                  * a chance to do anything.  This is possible if there are a lot of threads thrashing around
1572                  * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
1573                  * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1574                  * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
1575                  * already exists, as it does in the case of rename and delete.
1576                  */
1577                 if (stillvalid && cattr != NULL) {
1578                         if (cattr->ca_linkcount != attr.ca_linkcount) {
1579                                 stillvalid = 0;
1580                                 *error = ERECYCLE;
1581                                 goto notvalid;
1582                         }
1583
1584                         if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
1585                                 stillvalid = 0;
1586                                 *error = ERECYCLE;
1587                                 goto notvalid;
1588                         }
1589
1590                         if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
1591                                 stillvalid = 0;
1592                                 *error = ERECYCLE;
1593                                 goto notvalid;
1594                         }
1595
1596                         if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
1597                                 stillvalid = 0;
1598                                 *error = ERECYCLE;
1599                                 goto notvalid;
1600                         }
1601                 }
1602         } else {
1603                 if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
1604                         stillvalid = 1;
1605                         *error = 0;
1606                 }
1607                 else {
1608                         *error = ENOENT;
1609                 }
1610         }
1611 notvalid:
1612         hfs_systemfile_unlock(hfsmp, lockflags);
1613
1614         return (stillvalid);
1615 }
1616
1617
1618 /*
1619  * Per HI and Finder requirements, HFS should add in the
1620  * date/time that a particular directory entry was added
1621  * to the containing directory.
1622  * This is stored in the extended Finder Info for the
1623  * item in question.
1624  *
1625  * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1626  * We must ignore user attempts to set this part of the finderinfo, and
1627  * so we need to save a local copy of the date added, write in the user
1628  * finderinfo, then stuff the value back in.
1629  */
1630 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
1631         u_int8_t *finfo = NULL;
1632
1633         /* overlay the FinderInfo to the correct pointer, and advance */
1634         finfo = (u_int8_t*)attrp->ca_finderinfo;
1635         finfo = finfo + 16;
1636
1637         /*
1638          * Make sure to write it out as big endian, since that's how
1639          * finder info is defined.
1640          *
1641          * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1642          */
1643         if (S_ISREG(attrp->ca_mode)) {
1644                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1645                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1646                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1647         }
1648         else if (S_ISDIR(attrp->ca_mode)) {
1649                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1650                 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1651                                 attrp->ca_recflags |= kHFSHasDateAddedMask;
1652         }
1653         /* If it were neither directory/file, then we'd bail out */
1654         return;
1655 }
1656
1657 static u_int32_t
1658 hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode)
1659 {
1660         u_int8_t *finfo = NULL;
1661         u_int32_t dateadded = 0;
1662
1663
1664
1665         /* overlay the FinderInfo to the correct pointer, and advance */
1666         finfo = (u_int8_t*)finderinfo + 16;
1667
1668         /*
1669          * FinderInfo is written out in big endian... make sure to convert it to host
1670          * native before we use it.
1671          */
1672         if (S_ISREG(mode)) {
1673                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1674                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1675         }
1676         else if (S_ISDIR(mode)) {
1677                 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1678                 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1679         }
1680
1681         return dateadded;
1682 }
1683
1684 u_int32_t
1685 hfs_get_dateadded(struct cnode *cp)
1686 {
1687         if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
1688                 /* Date added was never set.  Return 0. */
1689                 return (0);
1690         }
1691
1692         return (hfs_get_dateadded_internal((u_int8_t*)cp->c_finderinfo,
1693             cp->c_attr.ca_mode));
1694 }
1695
1696 u_int32_t
1697 hfs_get_dateadded_from_blob(const uint8_t *finderinfo, mode_t mode)
1698 {
1699         return (hfs_get_dateadded_internal(finderinfo, mode));
1700 }
1701
1702 /*
1703  * Per HI and Finder requirements, HFS maintains a "write/generation
1704  * count" for each file that is incremented on any write & pageout.
1705  * It should start at 1 to reserve "0" as a special value.  If it
1706  * should ever wrap around, it will skip using 0.
1707  *
1708  * Note that finderinfo is manipulated in hfs_vnop_setxattr and care
1709  * is and should be taken to ignore user attempts to set the part of
1710  * the finderinfo that records the generation counter.
1711  *
1712  * Any change to the generation counter *must* not be visible before
1713  * the change that caused it (for obvious reasons), and given the
1714  * limitations of our current architecture, the change to the
1715  * generation counter may occur some time afterwards (particularly in
1716  * the case where a file is mapped writable---more on that below).
1717  *
1718  * We make no guarantees about the consistency of a file.  In other
1719  * words, a reader that is operating concurrently with a writer might
1720  * see some, but not all of writer's changes, and the generation
1721  * counter will *not* necessarily tell you this has happened.  To
1722  * enforce consistency, clients must make their own arrangements
1723  * e.g. use file locking.
1724  *
1725  * We treat files that are mapped writable as a special case: when
1726  * that happens, clients requesting the generation count will be told
1727  * it has a generation count of zero and they use that knowledge as a
1728  * hint that the file is changing and it therefore might be prudent to
1729  * wait until it is no longer mapped writable.  Clients should *not*
1730  * rely on this behaviour however; we might decide that it's better
1731  * for us to publish the fact that a file is mapped writable via
1732  * alternate means and return the generation counter when it is mapped
1733  * writable as it still has some, albeit limited, use.  We reserve the
1734  * right to make this change.
1735  *
1736  * Lastly, it's important to realise that because data and metadata
1737  * take different paths through the system, it's possible upon crash
1738  * or sudden power loss and after a restart, that a change may be
1739  * visible to the rest of the system without a corresponding change to
1740  * the generation counter.  The reverse may also be true, but for all
1741  * practical applications this shouldn't be an issue.
1742  */
1743 void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) {
1744         u_int8_t *finfo = NULL;
1745
1746         /* overlay the FinderInfo to the correct pointer, and advance */
1747         finfo = (u_int8_t*)attrp->ca_finderinfo;
1748         finfo = finfo + 16;
1749
1750         /*
1751          * Make sure to write it out as big endian, since that's how
1752          * finder info is defined.
1753          *
1754          * Generation count is only supported for files.
1755          */
1756         if (S_ISREG(attrp->ca_mode)) {
1757                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1758                 extinfo->write_gen_counter = OSSwapHostToBigInt32(gencount);
1759         }
1760
1761         /* If it were neither directory/file, then we'd bail out */
1762         return;
1763 }
1764
1765 /*
1766  * Increase the gen count by 1; if it wraps around to 0, increment by
1767  * two.  The cnode *must* be locked exclusively by the caller.
1768  *
1769  * You may think holding the lock is unnecessary because we only need
1770  * to change the counter, but consider this sequence of events: thread
1771  * A calls hfs_incr_gencount and the generation counter is 2 upon
1772  * entry.  A context switch occurs and thread B increments the counter
1773  * to 3, thread C now gets the generation counter (for whatever
1774  * purpose), and then another thread makes another change and the
1775  * generation counter is incremented again---it's now 4.  Now thread A
1776  * continues and it sets the generation counter back to 3.  So you can
1777  * see, thread C would miss the change that caused the generation
1778  * counter to increment to 4 and for this reason the cnode *must*
1779  * always be locked exclusively.
1780  */
1781 uint32_t hfs_incr_gencount (struct cnode *cp) {
1782         u_int8_t *finfo = NULL;
1783         u_int32_t gcount = 0;
1784
1785         /* overlay the FinderInfo to the correct pointer, and advance */
1786         finfo = (u_int8_t*)cp->c_finderinfo;
1787         finfo = finfo + 16;
1788
1789         /*
1790          * FinderInfo is written out in big endian... make sure to convert it to host
1791          * native before we use it.
1792          *
1793          * NOTE: the write_gen_counter is stored in the same location in both the
1794          *       FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
1795          *       last 32-bit word) so it is safe to have one code path here.
1796          */
1797         if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode)) {
1798                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1799                 gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
1800
1801                 /* Was it zero to begin with (file originated in 10.8 or earlier?) */
1802                 if (gcount == 0) {
1803                         gcount++;
1804                 }
1805
1806                 /* now bump it */
1807                 gcount++;
1808
1809                 /* Did it wrap around ? */
1810                 if (gcount == 0) {
1811                         gcount++;
1812                 }
1813                 extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount);
1814
1815                 SET(cp->c_flag, C_MODIFIED);
1816         }
1817         else {
1818                 gcount = 0;
1819         }
1820
1821         return gcount;
1822 }
1823
1824 /*
1825  * There is no need for any locks here (other than an iocount on an
1826  * associated vnode) because reading and writing an aligned 32 bit
1827  * integer should be atomic on all platforms we support.
1828  */
1829 static u_int32_t
1830 hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode)
1831 {
1832         u_int8_t *finfo = NULL;
1833         u_int32_t gcount = 0;
1834
1835         /* overlay the FinderInfo to the correct pointer, and advance */
1836         finfo = (u_int8_t*)finderinfo;
1837         finfo = finfo + 16;
1838
1839         /*
1840          * FinderInfo is written out in big endian... make sure to convert it to host
1841          * native before we use it.
1842          *
1843          * NOTE: the write_gen_counter is stored in the same location in both the
1844          *       FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
1845          *       last 32-bit word) so it is safe to have one code path here.
1846          */
1847         if (S_ISDIR(mode) || S_ISREG(mode)) {
1848                 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1849                 gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
1850
1851                 /*
1852                  * Is it zero?  File might originate in 10.8 or earlier. We lie and bump it to 1,
1853                  * since the incrementer code is able to handle this case and will double-increment
1854                  * for us.
1855                  */
1856                 if (gcount == 0) {
1857                         gcount++;
1858                 }
1859         }
1860
1861         return gcount;
1862 }
1863
1864 /* Getter for the gen count */
1865 u_int32_t hfs_get_gencount (struct cnode *cp) {
1866         return hfs_get_gencount_internal(cp->c_finderinfo, cp->c_attr.ca_mode);
1867 }
1868
1869 /* Getter for the gen count from a buffer (currently pointer to finderinfo)*/
1870 u_int32_t hfs_get_gencount_from_blob (const uint8_t *finfoblob, mode_t mode) {
1871         return hfs_get_gencount_internal(finfoblob, mode);
1872 }
1873
1874 void hfs_clear_might_be_dirty_flag(cnode_t *cp)
1875 {
1876         /*
1877          * If we're about to touch both mtime and ctime, we can clear the
1878          * C_MIGHT_BE_DIRTY_FROM_MAPPING since we can guarantee that
1879          * subsequent page-outs can only be for data made dirty before
1880          * now.
1881          */
1882         CLR(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING);
1883 }
1884
1885 /*
1886  * Touch cnode times based on c_touch_xxx flags
1887  *
1888  * cnode must be locked exclusive
1889  *
1890  * This will also update the volume modify time
1891  */
1892 void
1893 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
1894 {
1895         vfs_context_t ctx;
1896         /* don't modify times if volume is read-only */
1897         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1898                 cp->c_touch_acctime = FALSE;
1899                 cp->c_touch_chgtime = FALSE;
1900                 cp->c_touch_modtime = FALSE;
1901                 return;
1902         }
1903 #if CONFIG_HFS_STD
1904         else if (hfsmp->hfs_flags & HFS_STANDARD) {
1905         /* HFS Standard doesn't support access times */
1906                 cp->c_touch_acctime = FALSE;
1907         }
1908 #endif
1909
1910         ctx = vfs_context_current();
1911         /*
1912          * Skip access time updates if:
1913          *      . MNT_NOATIME is set
1914          *      . a file system freeze is in progress
1915          *      . a file system resize is in progress
1916          *      . the vnode associated with this cnode is marked for rapid aging
1917          */
1918         if (cp->c_touch_acctime) {
1919                 if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
1920                     hfsmp->hfs_freeze_state != HFS_THAWED ||
1921                     (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
1922                     (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
1923
1924                         cp->c_touch_acctime = FALSE;
1925                 }
1926         }
1927         if (cp->c_touch_acctime || cp->c_touch_chgtime ||
1928                 cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
1929                 struct timeval tv;
1930                 int touchvol = 0;
1931
1932                 if (cp->c_touch_modtime && cp->c_touch_chgtime)
1933                         hfs_clear_might_be_dirty_flag(cp);
1934
1935                 microtime(&tv);
1936
1937                 if (cp->c_touch_acctime) {
1938                         cp->c_atime = tv.tv_sec;
1939                         /*
1940                          * When the access time is the only thing changing
1941                          * then make sure its sufficiently newer before
1942                          * committing it to disk.
1943                          */
1944                         if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) >
1945                               ATIME_ONDISK_ACCURACY)) {
1946                                 cp->c_flag |= C_MODIFIED;
1947                         }
1948                         cp->c_touch_acctime = FALSE;
1949                 }
1950                 if (cp->c_touch_modtime) {
1951                         cp->c_mtime = tv.tv_sec;
1952                         cp->c_touch_modtime = FALSE;
1953                         cp->c_flag |= C_MODIFIED;
1954                         touchvol = 1;
1955 #if CONFIG_HFS_STD
1956                         /*
1957                          * HFS dates that WE set must be adjusted for DST
1958                          */
1959                         if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
1960                                 cp->c_mtime += 3600;
1961                         }
1962 #endif
1963                 }
1964                 if (cp->c_touch_chgtime) {
1965                         cp->c_ctime = tv.tv_sec;
1966                         cp->c_touch_chgtime = FALSE;
1967                         cp->c_flag |= C_MODIFIED;
1968                         touchvol = 1;
1969                 }
1970
1971                 if (cp->c_flag & C_NEEDS_DATEADDED) {
1972                         hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
1973                         cp->c_flag |= C_MODIFIED;
1974                         /* untwiddle the bit */
1975                         cp->c_flag &= ~C_NEEDS_DATEADDED;
1976                         touchvol = 1;
1977                 }
1978
1979                 /* Touch the volume modtime if needed */
1980                 if (touchvol) {
1981                         MarkVCBDirty(hfsmp);
1982                         HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
1983                 }
1984         }
1985 }
1986
1987 // Use this if you don't want to check the return code
1988 void hfs_lock_always(cnode_t *cp, enum hfs_locktype locktype)
1989 {
1990         hfs_lock(cp, locktype, HFS_LOCK_ALWAYS);
1991 }
1992
1993 /*
1994  * Lock a cnode.
1995  * N.B. If you add any failure cases, *make* sure hfs_lock_always works
1996  */
1997 int
1998 hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
1999 {
2000         thread_t thread = current_thread();
2001
2002         if (cp->c_lockowner == thread) {
2003                 /* Only the extents and bitmap files support lock recursion. */
2004                 if ((cp->c_fileid == kHFSExtentsFileID) ||
2005                     (cp->c_fileid == kHFSAllocationFileID)) {
2006                         cp->c_syslockcount++;
2007                 } else {
2008                         panic("hfs_lock: locking against myself!");
2009                 }
2010         } else if (locktype == HFS_SHARED_LOCK) {
2011                 lck_rw_lock_shared(&cp->c_rwlock);
2012                 cp->c_lockowner = HFS_SHARED_OWNER;
2013
2014         } else { /* HFS_EXCLUSIVE_LOCK */
2015                 lck_rw_lock_exclusive(&cp->c_rwlock);
2016                 cp->c_lockowner = thread;
2017
2018                 /* Only the extents and bitmap files support lock recursion. */
2019                 if ((cp->c_fileid == kHFSExtentsFileID) ||
2020                     (cp->c_fileid == kHFSAllocationFileID)) {
2021                         cp->c_syslockcount = 1;
2022                 }
2023         }
2024
2025 #ifdef HFS_CHECK_LOCK_ORDER
2026         /*
2027          * Regular cnodes (non-system files) cannot be locked
2028          * while holding the journal lock or a system file lock.
2029          */
2030         if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
2031             ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
2032                 vnode_t vp = NULLVP;
2033
2034                 /* Find corresponding vnode. */
2035                 if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
2036                         vp = cp->c_vp;
2037                 } else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
2038                         vp = cp->c_rsrc_vp;
2039                 }
2040                 if (vp != NULLVP) {
2041                         struct hfsmount *hfsmp = VTOHFS(vp);
2042
2043                         if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
2044                                 /* This will eventually be a panic here. */
2045                                 printf("hfs_lock: bad lock order (cnode after journal)\n");
2046                         }
2047                         if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
2048                                 panic("hfs_lock: bad lock order (cnode after catalog)");
2049                         }
2050                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
2051                                 panic("hfs_lock: bad lock order (cnode after attribute)");
2052                         }
2053                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
2054                                 panic("hfs_lock: bad lock order (cnode after extents)");
2055                         }
2056                 }
2057         }
2058 #endif /* HFS_CHECK_LOCK_ORDER */
2059
2060         /*
2061          * Skip cnodes for regular files that no longer exist
2062          * (marked deleted, catalog entry gone).
2063          */
2064         if (((flags & HFS_LOCK_ALLOW_NOEXISTS) == 0) &&
2065             ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
2066             (cp->c_flag & C_NOEXISTS)) {
2067                 hfs_unlock(cp);
2068                 return (ENOENT);
2069         }
2070         return (0);
2071 }
2072
2073 /*
2074  * Lock a pair of cnodes.
2075  */
2076 int
2077 hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfs_locktype locktype)
2078 {
2079         struct cnode *first, *last;
2080         int error;
2081
2082         /*
2083          * If cnodes match then just lock one.
2084          */
2085         if (cp1 == cp2) {
2086                 return hfs_lock(cp1, locktype, HFS_LOCK_DEFAULT);
2087         }
2088
2089         /*
2090          * Lock in cnode address order.
2091          */
2092         if (cp1 < cp2) {
2093                 first = cp1;
2094                 last = cp2;
2095         } else {
2096                 first = cp2;
2097                 last = cp1;
2098         }
2099
2100         if ( (error = hfs_lock(first, locktype, HFS_LOCK_DEFAULT))) {
2101                 return (error);
2102         }
2103         if ( (error = hfs_lock(last, locktype, HFS_LOCK_DEFAULT))) {
2104                 hfs_unlock(first);
2105                 return (error);
2106         }
2107         return (0);
2108 }
2109
2110 /*
2111  * Check ordering of two cnodes. Return true if they are are in-order.
2112  */
2113 static int
2114 hfs_isordered(struct cnode *cp1, struct cnode *cp2)
2115 {
2116         if (cp1 == cp2)
2117                 return (0);
2118         if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
2119                 return (1);
2120         if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
2121                 return (0);
2122         /*
2123          * Locking order is cnode address order.
2124          */
2125         return (cp1 < cp2);
2126 }
2127
2128 /*
2129  * Acquire 4 cnode locks.
2130  *   - locked in cnode address order (lesser address first).
2131  *   - all or none of the locks are taken
2132  *   - only one lock taken per cnode (dup cnodes are skipped)
2133  *   - some of the cnode pointers may be null
2134  */
2135 int
2136 hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
2137              struct cnode *cp4, enum hfs_locktype locktype, struct cnode **error_cnode)
2138 {
2139         struct cnode * a[3];
2140         struct cnode * b[3];
2141         struct cnode * list[4];
2142         struct cnode * tmp;
2143         int i, j, k;
2144         int error;
2145         if (error_cnode) {
2146                 *error_cnode = NULL;
2147         }
2148
2149         if (hfs_isordered(cp1, cp2)) {
2150                 a[0] = cp1; a[1] = cp2;
2151         } else {
2152                 a[0] = cp2; a[1] = cp1;
2153         }
2154         if (hfs_isordered(cp3, cp4)) {
2155                 b[0] = cp3; b[1] = cp4;
2156         } else {
2157                 b[0] = cp4; b[1] = cp3;
2158         }
2159         a[2] = (struct cnode *)0xffffffff;  /* sentinel value */
2160         b[2] = (struct cnode *)0xffffffff;  /* sentinel value */
2161
2162         /*
2163          * Build the lock list, skipping over duplicates
2164          */
2165         for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
2166                 tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
2167                 if (k == 0 || tmp != list[k-1])
2168                         list[k++] = tmp;
2169         }
2170
2171         /*
2172          * Now we can lock using list[0 - k].
2173          * Skip over NULL entries.
2174          */
2175         for (i = 0; i < k; ++i) {
2176                 if (list[i])
2177                         if ((error = hfs_lock(list[i], locktype, HFS_LOCK_DEFAULT))) {
2178                                 /* Only stuff error_cnode if requested */
2179                                 if (error_cnode) {
2180                                         *error_cnode = list[i];
2181                                 }
2182                                 /* Drop any locks we acquired. */
2183                                 while (--i >= 0) {
2184                                         if (list[i])
2185                                                 hfs_unlock(list[i]);
2186                                 }
2187                                 return (error);
2188                         }
2189         }
2190         return (0);
2191 }
2192
2193
2194 /*
2195  * Unlock a cnode.
2196  */
2197 void
2198 hfs_unlock(struct cnode *cp)
2199 {
2200         vnode_t rvp = NULLVP;
2201         vnode_t vp = NULLVP;
2202         u_int32_t c_flag;
2203
2204         /*
2205          * Only the extents and bitmap file's support lock recursion.
2206          */
2207         if ((cp->c_fileid == kHFSExtentsFileID) ||
2208             (cp->c_fileid == kHFSAllocationFileID)) {
2209                 if (--cp->c_syslockcount > 0) {
2210                         return;
2211                 }
2212         }
2213
2214         const thread_t thread = current_thread();
2215
2216         if (cp->c_lockowner == thread) {
2217                 c_flag = cp->c_flag;
2218
2219                 // If we have the truncate lock, we must defer the puts
2220                 if (cp->c_truncatelockowner == thread) {
2221                         if (ISSET(c_flag, C_NEED_DVNODE_PUT)
2222                                 && !cp->c_need_dvnode_put_after_truncate_unlock) {
2223                                 CLR(c_flag, C_NEED_DVNODE_PUT);
2224                                 cp->c_need_dvnode_put_after_truncate_unlock = true;
2225                         }
2226                         if (ISSET(c_flag, C_NEED_RVNODE_PUT)
2227                                 && !cp->c_need_rvnode_put_after_truncate_unlock) {
2228                                 CLR(c_flag, C_NEED_RVNODE_PUT);
2229                                 cp->c_need_rvnode_put_after_truncate_unlock = true;
2230                         }
2231                 }
2232
2233                 CLR(cp->c_flag, (C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE
2234                                                  | C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT));
2235
2236                 if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
2237                 vp = cp->c_vp;
2238                 }
2239                 if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
2240                 rvp = cp->c_rsrc_vp;
2241                 }
2242
2243             cp->c_lockowner = NULL;
2244             lck_rw_unlock_exclusive(&cp->c_rwlock);
2245         } else {
2246             lck_rw_unlock_shared(&cp->c_rwlock);
2247         }
2248
2249         /* Perform any vnode post processing after cnode lock is dropped. */
2250         if (vp) {
2251                 if (c_flag & C_NEED_DATA_SETSIZE) {
2252                         ubc_setsize(vp, VTOF(vp)->ff_size);
2253 #if HFS_COMPRESSION
2254                         /*
2255                          * If this is a compressed file, we need to reset the
2256                          * compression state.  We will have set the size to zero
2257                          * above and it will get fixed up later (in exactly the
2258                          * same way that new vnodes are fixed up).  Note that we
2259                          * should only be able to get here if the truncate lock is
2260                          * held exclusively and so we do the reset when that's
2261                          * unlocked.
2262                          */
2263                         decmpfs_cnode *dp = VTOCMP(vp);
2264                         if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN)
2265                                 cp->c_need_decmpfs_reset = true;
2266 #endif
2267                 }
2268                 if (c_flag & C_NEED_DVNODE_PUT)
2269                         vnode_put(vp);
2270         }
2271         if (rvp) {
2272                 if (c_flag & C_NEED_RSRC_SETSIZE)
2273                         ubc_setsize(rvp, VTOF(rvp)->ff_size);
2274                 if (c_flag & C_NEED_RVNODE_PUT)
2275                         vnode_put(rvp);
2276         }
2277 }
2278
2279 /*
2280  * Unlock a pair of cnodes.
2281  */
2282 void
2283 hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
2284 {
2285         hfs_unlock(cp1);
2286         if (cp2 != cp1)
2287                 hfs_unlock(cp2);
2288 }
2289
2290 /*
2291  * Unlock a group of cnodes.
2292  */
2293 void
2294 hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
2295 {
2296         struct cnode * list[4];
2297         int i, k = 0;
2298
2299         if (cp1) {
2300                 hfs_unlock(cp1);
2301                 list[k++] = cp1;
2302         }
2303         if (cp2) {
2304                 for (i = 0; i < k; ++i) {
2305                         if (list[i] == cp2)
2306                                 goto skip1;
2307                 }
2308                 hfs_unlock(cp2);
2309                 list[k++] = cp2;
2310         }
2311 skip1:
2312         if (cp3) {
2313                 for (i = 0; i < k; ++i) {
2314                         if (list[i] == cp3)
2315                                 goto skip2;
2316                 }
2317                 hfs_unlock(cp3);
2318                 list[k++] = cp3;
2319         }
2320 skip2:
2321         if (cp4) {
2322                 for (i = 0; i < k; ++i) {
2323                         if (list[i] == cp4)
2324                                 return;
2325                 }
2326                 hfs_unlock(cp4);
2327         }
2328 }
2329
2330
2331 /*
2332  * Protect a cnode against a truncation.
2333  *
2334  * Used mainly by read/write since they don't hold the
2335  * cnode lock across calls to the cluster layer.
2336  *
2337  * The process doing a truncation must take the lock
2338  * exclusive. The read/write processes can take it
2339  * shared.  The locktype argument is the same as supplied to
2340  * hfs_lock.
2341  */
2342 void
2343 hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2344 {
2345         thread_t thread = current_thread();
2346
2347         if (cp->c_truncatelockowner == thread) {
2348                 /*
2349                  * Ignore grabbing the lock if it the current thread already
2350                  * holds exclusive lock.
2351                  *
2352                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2353                  * the file does not change sizes while we are paging in.  However,
2354                  * we may already hold the lock exclusive due to another
2355                  * VNOP from earlier in the call stack.  So if we already hold
2356                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2357                  * it's in the recursive case.
2358                  */
2359                 if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
2360                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2361                 }
2362         } else if (locktype == HFS_SHARED_LOCK) {
2363                 lck_rw_lock_shared(&cp->c_truncatelock);
2364                 cp->c_truncatelockowner = HFS_SHARED_OWNER;
2365         } else { /* HFS_EXCLUSIVE_LOCK */
2366                 lck_rw_lock_exclusive(&cp->c_truncatelock);
2367                 cp->c_truncatelockowner = thread;
2368         }
2369 }
2370
2371
2372 /*
2373  * Attempt to get the truncate lock.  If it cannot be acquired, error out.
2374  * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2375  * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2376  * temporarily need to disable V2 semantics.
2377  */
2378 int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
2379 {
2380         thread_t thread = current_thread();
2381         boolean_t didlock = false;
2382
2383         if (cp->c_truncatelockowner == thread) {
2384                 /*
2385                  * Ignore grabbing the lock if the current thread already
2386                  * holds exclusive lock.
2387                  *
2388                  * This is needed on the hfs_vnop_pagein path where we need to ensure
2389                  * the file does not change sizes while we are paging in.  However,
2390                  * we may already hold the lock exclusive due to another
2391                  * VNOP from earlier in the call stack.  So if we already hold
2392                  * the truncate lock exclusive, allow it to proceed, but ONLY if
2393                  * it's in the recursive case.
2394                  */
2395                 if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
2396                         panic("hfs_lock_truncate: cnode %p locked!", cp);
2397                 }
2398         } else if (locktype == HFS_SHARED_LOCK) {
2399                 didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
2400                 if (didlock) {
2401                         cp->c_truncatelockowner = HFS_SHARED_OWNER;
2402                 }
2403         } else { /* HFS_EXCLUSIVE_LOCK */
2404                 didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
2405                 if (didlock) {
2406                         cp->c_truncatelockowner = thread;
2407                 }
2408         }
2409
2410         return didlock;
2411 }
2412
2413
2414 /*
2415  * Unlock the truncate lock, which protects against size changes.
2416  *
2417  * If HFS_LOCK_SKIP_IF_EXCLUSIVE flag was set, it means that a previous
2418  * hfs_lock_truncate() might have skipped grabbing a lock because
2419  * the current thread was already holding the lock exclusive and
2420  * we may need to return from this function without actually unlocking
2421  * the truncate lock.
2422  */
2423 void
2424 hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags)
2425 {
2426         thread_t thread = current_thread();
2427
2428         /*
2429          * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current
2430          * lock owner of the truncate lock is our current thread, then
2431          * we must have skipped taking the lock earlier by in
2432          * hfs_lock_truncate() by setting HFS_LOCK_SKIP_IF_EXCLUSIVE in the
2433          * flags (as the current thread was current lock owner).
2434          *
2435          * If HFS_LOCK_SKIP_IF_EXCLUSIVE is not set (most of the time) then
2436          * we check the lockowner field to infer whether the lock was taken
2437          * exclusively or shared in order to know what underlying lock
2438          * routine to call.
2439          */
2440         if (flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) {
2441                 if (cp->c_truncatelockowner == thread) {
2442                         return;
2443                 }
2444         }
2445
2446         /* HFS_LOCK_EXCLUSIVE */
2447         if (thread == cp->c_truncatelockowner) {
2448                 vnode_t vp = NULL, rvp = NULL;
2449
2450                 /*
2451                  * Deal with any pending set sizes.  We need to call
2452                  * ubc_setsize before we drop the exclusive lock.  Ideally,
2453                  * hfs_unlock should be called before hfs_unlock_truncate but
2454                  * that's a lot to ask people to remember :-)
2455                  */
2456                 if (cp->c_lockowner == thread
2457                         && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE)) {
2458                         // hfs_unlock will do the setsize calls for us
2459                         hfs_unlock(cp);
2460                         hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
2461                 }
2462
2463                 if (cp->c_need_dvnode_put_after_truncate_unlock) {
2464                         vp = cp->c_vp;
2465                         cp->c_need_dvnode_put_after_truncate_unlock = false;
2466                 }
2467                 if (cp->c_need_rvnode_put_after_truncate_unlock) {
2468                         rvp = cp->c_rsrc_vp;
2469                         cp->c_need_rvnode_put_after_truncate_unlock = false;
2470                 }
2471
2472 #if HFS_COMPRESSION
2473                 bool reset_decmpfs = cp->c_need_decmpfs_reset;
2474                 cp->c_need_decmpfs_reset = false;
2475 #endif
2476
2477                 cp->c_truncatelockowner = NULL;
2478                 lck_rw_unlock_exclusive(&cp->c_truncatelock);
2479
2480 #if HFS_COMPRESSION
2481                 if (reset_decmpfs) {
2482                         decmpfs_cnode *dp = cp->c_decmp;
2483                         if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN)
2484                                 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2485                 }
2486 #endif
2487
2488                 // Do the puts now
2489                 if (vp)
2490                         vnode_put(vp);
2491                 if (rvp)
2492                         vnode_put(rvp);
2493         } else { /* HFS_LOCK_SHARED */
2494                 lck_rw_unlock_shared(&cp->c_truncatelock);
2495         }
2496 }