core/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/vm.h>
  42 #include <sys/buf.h>
  43 #include <sys/ubc.h>
  44 #include <sys/unistd.h>
  45 #include <sys/utfconv.h>
  46 #include <sys/kauth.h>
  47 #include <sys/fcntl.h>
  48 #include <sys/fsctl.h>
  49 #include <sys/mount.h>
  50 #include <sys/sysctl.h>
  51 #include <kern/clock.h>
  52 #include <stdbool.h>
  53 #include <miscfs/specfs/specdev.h>
  54 #include <libkern/OSAtomic.h>
  55 #include <IOKit/IOLib.h>
  56
  57 /* for parsing boot-args */
  58 #include <pexpert/pexpert.h>
  59 #include <kern/kalloc.h>
  60 #include <kern/zalloc.h>
  61
  62 #include "hfs_iokit.h"
  63 #include "hfs.h"
  64 #include "hfs_catalog.h"
  65 #include "hfs_dbg.h"
  66 #include "hfs_mount.h"
  67 #include "hfs_endian.h"
  68 #include "hfs_cnode.h"
  69 #include "hfs_fsctl.h"
  70 #include "hfs_cprotect.h"
  71
  72 #include "FileMgrInternal.h"
  73 #include "BTreesInternal.h"
  74 #include "HFSUnicodeWrappers.h"
  75
  76 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
  77 extern int hfs_resize_debug;
  78
  79 static void ReleaseMetaFileVNode(struct vnode *vp);
  80 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  81
  82 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  83 static void hfs_thaw_locked(struct hfsmount *hfsmp);
  84
  85 #define HFS_MOUNT_DEBUG 1
  86
  87
  88 //*******************************************************************************
  89 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  90 //       hence are not in the right byte order on little endian machines. It is
  91 //       the responsibility of the finder and other clients to swap the data.
  92 //*******************************************************************************
  93
  94 //*******************************************************************************
  95 //      Routine:        hfs_MountHFSVolume
  96 //
  97 //
  98 //*******************************************************************************
  99 unsigned char hfs_catname[] = "Catalog B-tree";
 100 unsigned char hfs_extname[] = "Extents B-tree";
 101 unsigned char hfs_vbmname[] = "Volume Bitmap";
 102 unsigned char hfs_attrname[] = "Attribute B-tree";
 103 unsigned char hfs_startupname[] = "Startup File";
 104
 105 #if CONFIG_HFS_STD
 106 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 107                 __unused struct proc *p)
 108 {
 109         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 110         int error;
 111         ByteCount utf8chars;
 112         struct cat_desc cndesc;
 113         struct cat_attr cnattr;
 114         struct cat_fork fork;
 115         int newvnode_flags = 0;
 116
 117         /* Block size must be a multiple of 512 */
 118         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 119             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 120                 return (EINVAL);
 121
 122         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 123         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 124             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 125                 return (EINVAL);
 126         }
 127         hfsmp->hfs_flags |= HFS_STANDARD;
 128         /*
 129          * The MDB seems OK: transfer info from it into VCB
 130          * Note - the VCB starts out clear (all zeros)
 131          *
 132          */
 133         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 134         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 135         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 136         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 137         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 138         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 139         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 140         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 141         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 142         vcb->allocLimit         = vcb->totalBlocks;
 143         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 144         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 145         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 146         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 147         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 148         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 149         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 150         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 151         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 152         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 153         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 154         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 155                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 156
 157         /* convert hfs encoded name into UTF-8 string */
 158         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 159         /*
 160          * When an HFS name cannot be encoded with the current
 161          * volume encoding we use MacRoman as a fallback.
 162          */
 163         if (error || (utf8chars == 0)) {
 164                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 165                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 166                 if (error) {
 167                         goto MtVolErr;
 168                 }
 169         }
 170
 171         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 172         vcb->vcbVBMIOSize = kHFSBlockSize;
 173
 174         /* Generate the partition-based AVH location */
 175         hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 176                                                   hfsmp->hfs_logical_block_count);
 177
 178         /* HFS standard is read-only, so just stuff the FS location in here, too */
 179         hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 180
 181         bzero(&cndesc, sizeof(cndesc));
 182         cndesc.cd_parentcnid = kHFSRootParentID;
 183         cndesc.cd_flags |= CD_ISMETA;
 184         bzero(&cnattr, sizeof(cnattr));
 185         cnattr.ca_linkcount = 1;
 186         cnattr.ca_mode = S_IFREG;
 187         bzero(&fork, sizeof(fork));
 188
 189         /*
 190          * Set up Extents B-tree vnode
 191          */
 192         cndesc.cd_nameptr = hfs_extname;
 193         cndesc.cd_namelen = strlen((char *)hfs_extname);
 194         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 195         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 196         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 197         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 198         fork.cf_vblocks = 0;
 199         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 200         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 201         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 202         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 203         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 204         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 205         cnattr.ca_blocks = fork.cf_blocks;
 206
 207         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 208                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 209         if (error) {
 210                 if (HFS_MOUNT_DEBUG) {
 211                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 212                 }
 213                 goto MtVolErr;
 214         }
 215         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 216                                          (KeyCompareProcPtr)CompareExtentKeys));
 217         if (error) {
 218                 if (HFS_MOUNT_DEBUG) {
 219                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 220                 }
 221                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 222                 goto MtVolErr;
 223         }
 224         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 225
 226         /*
 227          * Set up Catalog B-tree vnode...
 228          */
 229         cndesc.cd_nameptr = hfs_catname;
 230         cndesc.cd_namelen = strlen((char *)hfs_catname);
 231         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 232         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 233         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 234         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 235         fork.cf_vblocks = 0;
 236         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 237         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 238         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 239         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 240         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 241         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 242         cnattr.ca_blocks = fork.cf_blocks;
 243
 244         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 245                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 246         if (error) {
 247                 if (HFS_MOUNT_DEBUG) {
 248                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 249                 }
 250                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 251                 goto MtVolErr;
 252         }
 253         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 254                                          (KeyCompareProcPtr)CompareCatalogKeys));
 255         if (error) {
 256                 if (HFS_MOUNT_DEBUG) {
 257                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 258                 }
 259                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 260                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 261                 goto MtVolErr;
 262         }
 263         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 264
 265         /*
 266          * Set up dummy Allocation file vnode (used only for locking bitmap)
 267          */
 268         cndesc.cd_nameptr = hfs_vbmname;
 269         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 270         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 271         bzero(&fork, sizeof(fork));
 272         cnattr.ca_blocks = 0;
 273
 274         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 275                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 276         if (error) {
 277                 if (HFS_MOUNT_DEBUG) {
 278                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 279                 }
 280                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 281                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 282                 goto MtVolErr;
 283         }
 284         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 285
 286         /* mark the volume dirty (clear clean unmount bit) */
 287         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 288
 289     if (error == noErr) {
 290                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 291                 if (HFS_MOUNT_DEBUG) {
 292                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 293                 }
 294         }
 295
 296     if (error == noErr) {
 297                 /* If the disk isn't write protected.. */
 298         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 299             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 300                 }
 301         }
 302
 303         /*
 304          * all done with system files so we can unlock now...
 305          */
 306         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 307         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 308         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 309
 310         if (error == noErr) {
 311                 /* If successful, then we can just return once we've unlocked the cnodes */
 312                 return error;
 313         }
 314
 315     //--        Release any resources allocated so far before exiting with an error:
 316 MtVolErr:
 317         hfsUnmount(hfsmp, NULL);
 318
 319     return (error);
 320 }
 321
 322 #endif
 323
 324 //*******************************************************************************
 325 //
 326 // Sanity check Volume Header Block:
 327 //              Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
 328 //              not been endian-swapped and represents the on-disk contents of this sector.
 329 //              This routine will not change the endianness of vhp block.
 330 //
 331 //*******************************************************************************
 332 OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
 333 {
 334         u_int16_t signature;
 335         u_int16_t hfs_version;
 336         u_int32_t blockSize;
 337
 338         signature = SWAP_BE16(vhp->signature);
 339         hfs_version = SWAP_BE16(vhp->version);
 340
 341         if (signature == kHFSPlusSigWord) {
 342                 if (hfs_version != kHFSPlusVersion) {
 343                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
 344                         return (EINVAL);
 345                 }
 346         } else if (signature == kHFSXSigWord) {
 347                 if (hfs_version != kHFSXVersion) {
 348                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
 349                         return (EINVAL);
 350                 }
 351         } else {
 352                 /* Removed printf for invalid HFS+ signature because it gives
 353                  * false error for UFS root volume
 354                  */
 355                 if (HFS_MOUNT_DEBUG) {
 356                         printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
 357                 }
 358                 return (EINVAL);
 359         }
 360
 361         /* Block size must be at least 512 and a power of 2 */
 362         blockSize = SWAP_BE32(vhp->blockSize);
 363         if (blockSize < 512 || !powerof2(blockSize)) {
 364                 if (HFS_MOUNT_DEBUG) {
 365                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
 366                 }
 367                 return (EINVAL);
 368         }
 369
 370         if (blockSize < hfsmp->hfs_logical_block_size) {
 371                 if (HFS_MOUNT_DEBUG) {
 372                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 373                                         blockSize, hfsmp->hfs_logical_block_size);
 374                 }
 375                 return (EINVAL);
 376         }
 377         return 0;
 378 }
 379
 380 //*******************************************************************************
 381 //      Routine:        hfs_MountHFSPlusVolume
 382 //
 383 //
 384 //*******************************************************************************
 385
 386 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 387         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 388 {
 389         register ExtendedVCB *vcb;
 390         struct cat_desc cndesc;
 391         struct cat_attr cnattr;
 392         struct cat_fork cfork;
 393         u_int32_t blockSize;
 394         daddr64_t spare_sectors;
 395         struct BTreeInfoRec btinfo;
 396         u_int16_t  signature;
 397         u_int16_t  hfs_version;
 398         int newvnode_flags = 0;
 399         int  i;
 400         OSErr retval;
 401         char converted_volname[256];
 402         size_t volname_length = 0;
 403         size_t conv_volname_length = 0;
 404         bool async_bitmap_scan;
 405
 406         signature = SWAP_BE16(vhp->signature);
 407         hfs_version = SWAP_BE16(vhp->version);
 408
 409         retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
 410         if (retval)
 411                 return retval;
 412
 413         if (signature == kHFSXSigWord) {
 414                 /* The in-memory signature is always 'H+'. */
 415                 signature = kHFSPlusSigWord;
 416                 hfsmp->hfs_flags |= HFS_X;
 417         }
 418
 419         blockSize = SWAP_BE32(vhp->blockSize);
 420         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 421         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 422             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 423                 if (HFS_MOUNT_DEBUG) {
 424                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 425                 }
 426                 return (EINVAL);
 427         }
 428
 429         /* Make sure we can live with the physical block size. */
 430         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 431             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
 432                 if (HFS_MOUNT_DEBUG) {
 433                         printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
 434                                         hfsmp->hfs_logical_block_size);
 435                 }
 436                 return (ENXIO);
 437         }
 438
 439         /*
 440          * If allocation block size is less than the physical block size,
 441          * same data could be cached in two places and leads to corruption.
 442          *
 443          * HFS Plus reserves one allocation block for the Volume Header.
 444          * If the physical size is larger, then when we read the volume header,
 445          * we will also end up reading in the next allocation block(s).
 446          * If those other allocation block(s) is/are modified, and then the volume
 447          * header is modified, the write of the volume header's buffer will write
 448          * out the old contents of the other allocation blocks.
 449          *
 450          * We assume that the physical block size is same as logical block size.
 451          * The physical block size value is used to round down the offsets for
 452          * reading and writing the primary and alternate volume headers.
 453          *
 454          * The same logic to ensure good hfs_physical_block_size is also in
 455          * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
 456          * later are doing the I/Os using same block size.
 457          */
 458         if (blockSize < hfsmp->hfs_physical_block_size) {
 459                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 460                 hfsmp->hfs_log_per_phys = 1;
 461         }
 462
 463         /*
 464          * The VolumeHeader seems OK: transfer info from it into VCB
 465          * Note - the VCB starts out clear (all zeros)
 466          */
 467         vcb = HFSTOVCB(hfsmp);
 468
 469         vcb->vcbSigWord = signature;
 470         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 471         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 472         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 473         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 474         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 475         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 476         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 477         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 478         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 479
 480         /* copy 32 bytes of Finder info */
 481         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 482
 483         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 484         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 485                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 486
 487         /* Now fill in the Extended VCB info */
 488         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 489         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 490         vcb->allocLimit         = vcb->totalBlocks;
 491         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 492         vcb->blockSize          = blockSize;
 493         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 494         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 495
 496         vcb->hfsPlusIOPosOffset = embeddedOffset;
 497
 498         /* Default to no free block reserve */
 499         vcb->reserveBlocks = 0;
 500
 501         /*
 502          * Update the logical block size in the mount struct
 503          * (currently set up from the wrapper MDB) using the
 504          * new blocksize value:
 505          */
 506         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 507         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 508
 509         /*
 510          * Validate and initialize the location of the alternate volume header.
 511          *
 512          * Note that there may be spare sectors beyond the end of the filesystem that still
 513          * belong to our partition.
 514          */
 515
 516         spare_sectors = hfsmp->hfs_logical_block_count -
 517                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 518                            hfsmp->hfs_logical_block_size);
 519
 520         /*
 521          * Differentiate between "innocuous" spare sectors and the more unusual
 522          * degenerate case:
 523          *
 524          * *** Innocuous spare sectors exist if:
 525          *
 526          * A) the number of bytes assigned to the partition (by multiplying logical
 527          * block size * logical block count) is greater than the filesystem size
 528          * (by multiplying allocation block count and allocation block size)
 529          *
 530          * and
 531          *
 532          * B) the remainder is less than the size of a full allocation block's worth of bytes.
 533          *
 534          * This handles the normal case where there may be a few extra sectors, but the two
 535          * are fundamentally in sync.
 536          *
 537          * *** Degenerate spare sectors exist if:
 538          * A) The number of bytes assigned to the partition (by multiplying logical
 539          * block size * logical block count) is greater than the filesystem size
 540          * (by multiplying allocation block count and block size).
 541          *
 542          * and
 543          *
 544          * B) the remainder is greater than a full allocation's block worth of bytes.
 545          * In this case,  a smaller file system exists in a larger partition.
 546          * This can happen in various ways, including when volume is resized but the
 547          * partition is yet to be resized.  Under this condition, we have to assume that
 548          * a partition management software may resize the partition to match
 549          * the file system size in the future.  Therefore we should update
 550          * alternate volume header at two locations on the disk,
 551          *   a. 1024 bytes before end of the partition
 552          *   b. 1024 bytes before end of the file system
 553          */
 554
 555         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 556                 /*
 557                  * Handle the degenerate case above. FS < partition size.
 558                  * AVH located at 1024 bytes from the end of the partition
 559                  */
 560                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 561                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 562
 563                 /* AVH located at 1024 bytes from the end of the filesystem */
 564                 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 565                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 566                                                 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
 567         }
 568         else {
 569                 /* Innocuous spare sectors; Partition & FS notion are in sync */
 570                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 571                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 572
 573                 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 574         }
 575         if (hfs_resize_debug) {
 576                 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 577                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 578         }
 579
 580         bzero(&cndesc, sizeof(cndesc));
 581         cndesc.cd_parentcnid = kHFSRootParentID;
 582         cndesc.cd_flags |= CD_ISMETA;
 583         bzero(&cnattr, sizeof(cnattr));
 584         cnattr.ca_linkcount = 1;
 585         cnattr.ca_mode = S_IFREG;
 586
 587         /*
 588          * Set up Extents B-tree vnode
 589          */
 590         cndesc.cd_nameptr = hfs_extname;
 591         cndesc.cd_namelen = strlen((char *)hfs_extname);
 592         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 593
 594         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 595         cfork.cf_new_size= 0;
 596         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 597         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 598         cfork.cf_vblocks = 0;
 599         cnattr.ca_blocks = cfork.cf_blocks;
 600         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 601                 cfork.cf_extents[i].startBlock =
 602                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 603                 cfork.cf_extents[i].blockCount =
 604                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 605         }
 606         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 607                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 608         if (retval)
 609         {
 610                 if (HFS_MOUNT_DEBUG) {
 611                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 612                 }
 613                 goto ErrorExit;
 614         }
 615
 616         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 617
 618         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 619                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 620
 621         hfs_unlock(hfsmp->hfs_extents_cp);
 622
 623         if (retval)
 624         {
 625                 if (HFS_MOUNT_DEBUG) {
 626                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 627                 }
 628                 goto ErrorExit;
 629         }
 630         /*
 631          * Set up Catalog B-tree vnode
 632          */
 633         cndesc.cd_nameptr = hfs_catname;
 634         cndesc.cd_namelen = strlen((char *)hfs_catname);
 635         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 636
 637         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 638         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 639         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 640         cfork.cf_vblocks = 0;
 641         cnattr.ca_blocks = cfork.cf_blocks;
 642         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 643                 cfork.cf_extents[i].startBlock =
 644                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 645                 cfork.cf_extents[i].blockCount =
 646                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 647         }
 648         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 649                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 650         if (retval) {
 651                 if (HFS_MOUNT_DEBUG) {
 652                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 653                 }
 654                 goto ErrorExit;
 655         }
 656         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 657
 658         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 659                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 660
 661         if (retval) {
 662                 if (HFS_MOUNT_DEBUG) {
 663                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 664                 }
 665                 hfs_unlock(hfsmp->hfs_catalog_cp);
 666                 goto ErrorExit;
 667         }
 668         if ((hfsmp->hfs_flags & HFS_X) &&
 669             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 670                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 671                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 672                         /* Install a case-sensitive key compare */
 673                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 674                                           (KeyCompareProcPtr)cat_binarykeycompare);
 675                 }
 676         }
 677
 678         hfs_unlock(hfsmp->hfs_catalog_cp);
 679
 680         /*
 681          * Set up Allocation file vnode
 682          */
 683         cndesc.cd_nameptr = hfs_vbmname;
 684         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 685         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 686
 687         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 688         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 689         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 690         cfork.cf_vblocks = 0;
 691         cnattr.ca_blocks = cfork.cf_blocks;
 692         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 693                 cfork.cf_extents[i].startBlock =
 694                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 695                 cfork.cf_extents[i].blockCount =
 696                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 697         }
 698         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 699                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 700         if (retval) {
 701                 if (HFS_MOUNT_DEBUG) {
 702                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 703                 }
 704                 goto ErrorExit;
 705         }
 706         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 707         hfs_unlock(hfsmp->hfs_allocation_cp);
 708
 709         /*
 710          * Set up Attribute B-tree vnode
 711          */
 712         if (vhp->attributesFile.totalBlocks != 0) {
 713                 cndesc.cd_nameptr = hfs_attrname;
 714                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 715                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 716
 717                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 718                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 719                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 720                 cfork.cf_vblocks = 0;
 721                 cnattr.ca_blocks = cfork.cf_blocks;
 722                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 723                         cfork.cf_extents[i].startBlock =
 724                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 725                         cfork.cf_extents[i].blockCount =
 726                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 727                 }
 728                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 729                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 730                 if (retval) {
 731                         if (HFS_MOUNT_DEBUG) {
 732                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 733                         }
 734                         goto ErrorExit;
 735                 }
 736                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 737                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 738                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 739                 hfs_unlock(hfsmp->hfs_attribute_cp);
 740                 if (retval) {
 741                         if (HFS_MOUNT_DEBUG) {
 742                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 743                         }
 744                         goto ErrorExit;
 745                 }
 746
 747                 /* Initialize vnode for virtual attribute data file that spans the
 748                  * entire file system space for performing I/O to attribute btree
 749                  * We hold iocount on the attrdata vnode for the entire duration
 750                  * of mount (similar to btree vnodes)
 751                  */
 752                 retval = init_attrdata_vnode(hfsmp);
 753                 if (retval) {
 754                         if (HFS_MOUNT_DEBUG) {
 755                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 756                         }
 757                         goto ErrorExit;
 758                 }
 759         }
 760
 761         /*
 762          * Set up Startup file vnode
 763          */
 764         if (vhp->startupFile.totalBlocks != 0) {
 765                 cndesc.cd_nameptr = hfs_startupname;
 766                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 767                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 768
 769                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 770                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 771                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 772                 cfork.cf_vblocks = 0;
 773                 cnattr.ca_blocks = cfork.cf_blocks;
 774                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 775                         cfork.cf_extents[i].startBlock =
 776                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 777                         cfork.cf_extents[i].blockCount =
 778                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 779                 }
 780                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 781                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 782                 if (retval) {
 783                         if (HFS_MOUNT_DEBUG) {
 784                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 785                         }
 786                         goto ErrorExit;
 787                 }
 788                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 789                 hfs_unlock(hfsmp->hfs_startup_cp);
 790         }
 791
 792         /*
 793          * Pick up volume name and create date
 794          *
 795          * Acquiring the volume name should not manipulate the bitmap, only the catalog
 796          * btree and possibly the extents overflow b-tree.
 797          */
 798         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 799         if (retval) {
 800                 if (HFS_MOUNT_DEBUG) {
 801                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 802                 }
 803                 goto ErrorExit;
 804         }
 805         vcb->hfs_itime = cnattr.ca_itime;
 806         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 807         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 808         volname_length = strlen ((const char*)vcb->vcbVN);
 809         cat_releasedesc(&cndesc);
 810
 811         /* Send the volume name down to CoreStorage if necessary */
 812         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 813         if (retval == 0) {
 814                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 815         }
 816
 817         /* reset retval == 0. we don't care about errors in volname conversion */
 818         retval = 0;
 819
 820         /*
 821          * pull in the volume UUID while we are still single-threaded.
 822          * This brings the volume UUID into the cached one dangling off of the HFSMP
 823          * Otherwise it would have to be computed on first access.
 824          */
 825         uuid_t throwaway;
 826         hfs_getvoluuid (hfsmp, throwaway);
 827
 828         /*
 829          * We now always initiate a full bitmap scan even if the volume is read-only because this is
 830          * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
 831          * expects. TRIMs will not be delivered to the underlying media if the volume is not
 832          * read-write though.
 833          */
 834         hfsmp->scan_var = 0;
 835
 836         /*
 837          * We have to ensure if we can proceed to scan the bitmap allocation
 838          * file asynchronously. If the catalog file is fragmented such that it
 839          * has overflow extents and the volume needs journal transaction we
 840          * cannot scan the bitmap asynchronously. Doing so will cause the mount
 841          * thread to block at journal transaction on bitmap lock, while scan
 842          * thread which hold the bitmap lock exclusively performs disk I/O to
 843          * issue TRIMS to unallocated ranges and build summary table. The
 844          * amount of time the mount thread is blocked depends on the size of
 845          * the volume, type of disk, etc. This blocking can cause the watchdog
 846          * timer to timeout resulting in panic. Thus to ensure we don't timeout
 847          * watchdog in such cases we scan the bitmap synchronously.
 848          *
 849          * Please NOTE: Currently this timeout only seem to happen for non SSD
 850          * drives. Possibly reading a big fragmented allocation file to
 851          * construct the summary table takes enough time to timeout watchdog.
 852          * Thus we check if we need to scan the bitmap synchronously only if
 853          * the disk is not SSD.
 854          */
 855         async_bitmap_scan = true;
 856         if (!ISSET(hfsmp->hfs_flags, HFS_SSD) && hfsmp->hfs_catalog_cp) {
 857                 bool catalog_has_overflow_extents;
 858                 bool journal_transaction_needed;
 859
 860                 catalog_has_overflow_extents = false;
 861                 if ((hfsmp->hfs_catalog_vp != NULL) &&
 862                                 (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
 863                         catalog_has_overflow_extents = true;
 864                 }
 865
 866                 journal_transaction_needed = false;
 867                 if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) &&
 868                                         (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 869                         journal_transaction_needed = true;
 870                 }
 871
 872                 if (catalog_has_overflow_extents && journal_transaction_needed)
 873                         async_bitmap_scan = false;
 874         }
 875
 876         if (async_bitmap_scan) {
 877                 thread_t allocator_scanner;
 878
 879                 /* Take the HFS mount mutex and wait on scan_var */
 880                 hfs_lock_mount (hfsmp);
 881
 882
 883                 /*
 884                  * Scan the bitmap asynchronously.
 885                  */
 886                 kernel_thread_start ((thread_continue_t)hfs_scan_blocks, hfsmp,
 887                                 &allocator_scanner);
 888
 889                 /*
 890                  * Wait until it registers that it's got the appropriate locks
 891                  * (or that it is finished).
 892                  */
 893                 while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|
 894                                                 HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
 895                         msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD,
 896                                         "hfs_scan_blocks", 0);
 897                 }
 898
 899                 hfs_unlock_mount(hfsmp);
 900
 901                 thread_deallocate (allocator_scanner);
 902         } else {
 903
 904                 /*
 905                  * Initialize the summary table and then scan the bitmap
 906                  * synchronously. Since we are scanning the bitmap
 907                  * synchronously we don't need to hold the bitmap lock.
 908                  */
 909                 if (hfs_init_summary (hfsmp)) {
 910                         printf ("hfs: could not initialize summary table for "
 911                                         "%s\n", hfsmp->vcbVN);
 912                 }
 913
 914                 (void)ScanUnmapBlocks (hfsmp);
 915
 916                 /*
 917                  * We need to set that the allocator scan is completed because
 918                  * hot file clustering waits for this condition later.
 919                  */
 920                 hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
 921                 buf_invalidateblks (hfsmp->hfs_allocation_vp, 0, 0, 0);
 922         }
 923
 924         /* mark the volume dirty (clear clean unmount bit) */
 925         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 926         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 927                 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
 928         }
 929
 930         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 931         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 932                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 933         }
 934
 935         //
 936         // Check if we need to do late journal initialization.  This only
 937         // happens if a previous version of MacOS X (or 9) touched the disk.
 938         // In that case hfs_late_journal_init() will go re-locate the journal
 939         // and journal_info_block files and validate that they're still kosher.
 940         //
 941         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 942                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 943                 && (hfsmp->jnl == NULL)) {
 944
 945                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 946                 if (retval != 0) {
 947                         if (retval == EROFS) {
 948                                 // EROFS is a special error code that means the volume has an external
 949                                 // journal which we couldn't find.  in that case we do not want to
 950                                 // rewrite the volume header - we'll just refuse to mount the volume.
 951                                 if (HFS_MOUNT_DEBUG) {
 952                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 953                                 }
 954                                 retval = EINVAL;
 955                                 goto ErrorExit;
 956                         }
 957
 958                         hfsmp->jnl = NULL;
 959
 960                         // if the journal failed to open, then set the lastMountedVersion
 961                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 962                         // of just bailing out because the volume is journaled.
 963                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 964                                 HFSPlusVolumeHeader *jvhp;
 965                                 daddr64_t mdb_offset;
 966                                 struct buf *bp = NULL;
 967
 968                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 969
 970                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 971
 972                                 bp = NULL;
 973                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 974                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 975                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 976                                 if (retval == 0) {
 977                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 978
 979                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 980                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 981                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 982                                                 buf_bwrite(bp);
 983                                         } else {
 984                                                 buf_brelse(bp);
 985                                         }
 986                                         bp = NULL;
 987                                 } else if (bp) {
 988                                         buf_brelse(bp);
 989                                         // clear this so the error exit path won't try to use it
 990                                         bp = NULL;
 991                             }
 992                         }
 993
 994                         if (HFS_MOUNT_DEBUG) {
 995                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 996                         }
 997                         retval = EINVAL;
 998                         goto ErrorExit;
 999                 } else if (hfsmp->jnl) {
1000                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1001                 }
1002         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
1003                 struct cat_attr jinfo_attr, jnl_attr;
1004
1005                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1006                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
1007                 }
1008
1009                 // if we're here we need to fill in the fileid's for the
1010                 // journal and journal_info_block.
1011                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
1012                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
1013                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
1014                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
1015                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
1016                 }
1017
1018                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1019                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
1020                 }
1021
1022                 if (hfsmp->jnl == NULL) {
1023                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1024                 }
1025         }
1026
1027         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
1028         {
1029                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
1030         }
1031
1032         if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
1033                 hfs_pin_fs_metadata(hfsmp);
1034         }
1035         /*
1036          * Distinguish 3 potential cases involving content protection:
1037          * 1. mount point bit set; vcbAtrb does not support it. Fail.
1038          * 2. mount point bit set; vcbattrb supports it. we're good.
1039          * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
1040          */
1041         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
1042                 /* Does the mount point support it ? */
1043                 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
1044                         /* Case 1 above */
1045                         retval = EINVAL;
1046                         goto ErrorExit;
1047                 }
1048         }
1049         else {
1050                 /* not requested in the mount point. Is it in FS? */
1051                 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
1052                         /* Case 3 above */
1053                         vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
1054                 }
1055         }
1056
1057         /* At this point, if the mount point flag is set, we can enable it. */
1058         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
1059                 /* Cases 2+3 above */
1060 #if CONFIG_PROTECT
1061                 /* Get the EAs as needed. */
1062                 int cperr = 0;
1063                 struct cp_root_xattr *xattr = NULL;
1064                 xattr = hfs_malloc(sizeof(*xattr));
1065
1066                 /* go get the EA to get the version information */
1067                 cperr = cp_getrootxattr (hfsmp, xattr);
1068                 /*
1069                  * If there was no EA there, then write one out.
1070                  * Assuming EA is not present on the root means
1071                  * this is an erase install or a very old FS
1072                  */
1073
1074                 if (cperr == 0) {
1075                         /* Have to run a valid CP version. */
1076                         if (!cp_is_supported_version(xattr->major_version)) {
1077                                 cperr = EINVAL;
1078                         }
1079                 }
1080                 else if (cperr == ENOATTR) {
1081                         printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
1082                         bzero(xattr, sizeof(struct cp_root_xattr));
1083                         xattr->major_version = CP_CURRENT_VERS;
1084                         xattr->minor_version = CP_MINOR_VERS;
1085                         cperr = cp_setrootxattr (hfsmp, xattr);
1086                 }
1087
1088                 if (cperr) {
1089                         hfs_free(xattr, sizeof(*xattr));
1090                         retval = EPERM;
1091                         goto ErrorExit;
1092                 }
1093
1094                 /* If we got here, then the CP version is valid. Set it in the mount point */
1095                 hfsmp->hfs_running_cp_major_vers = xattr->major_version;
1096                 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
1097                 hfsmp->cproot_flags = xattr->flags;
1098                 hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
1099 #if HFS_CONFIG_KEY_ROLL
1100                 hfsmp->hfs_auto_roll_min_key_os_version = xattr->auto_roll_min_version;
1101                 hfsmp->hfs_auto_roll_max_key_os_version = xattr->auto_roll_max_version;
1102 #endif
1103
1104                 hfs_free(xattr, sizeof(*xattr));
1105
1106                 /*
1107                  * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
1108                  * Ensure that the boot-arg's value is valid for FILES (not directories),
1109                  * since only files are actually protected for now.
1110                  */
1111
1112                 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1113
1114                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1115                         PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1116                 }
1117
1118 #if HFS_TMPDBG
1119 #if !SECURE_KERNEL
1120                 PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
1121 #endif
1122 #endif
1123
1124                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1125                         hfsmp->default_cp_class = PROTECTION_CLASS_C;
1126                 }
1127
1128 #else
1129                 /* If CONFIG_PROTECT not built, ignore CP */
1130                 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1131 #endif
1132         }
1133
1134         /*
1135          * Establish a metadata allocation zone.
1136          */
1137         hfs_metadatazone_init(hfsmp, false);
1138
1139         /*
1140          * Make any metadata zone adjustments.
1141          */
1142         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1143                 /* Keep the roving allocator out of the metadata zone. */
1144                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1145                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1146                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1147                 }
1148         } else {
1149                 if (vcb->nextAllocation <= 1) {
1150                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1151                 }
1152         }
1153         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1154
1155         /* Setup private/hidden directories for hardlinks. */
1156         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1157         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1158
1159         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1160                 hfs_remove_orphans(hfsmp);
1161
1162         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1163         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1164         {
1165                 retval = hfs_erase_unused_nodes(hfsmp);
1166                 if (retval) {
1167                         if (HFS_MOUNT_DEBUG) {
1168                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1169                         }
1170
1171                         goto ErrorExit;
1172                 }
1173         }
1174
1175         /*
1176          * Allow hot file clustering if conditions allow.
1177          */
1178         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
1179             ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
1180                 //
1181                 // Wait until the bitmap scan completes before we initializes the
1182                 // hotfile area so that we do not run into any issues with the
1183                 // bitmap being read while hotfiles is initializing itself.  On
1184                 // some older/slower machines, without this interlock, the bitmap
1185                 // would sometimes get corrupted at boot time.
1186                 //
1187                 hfs_lock_mount(hfsmp);
1188                 while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
1189                         (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
1190                 }
1191                 hfs_unlock_mount(hfsmp);
1192
1193                 /*
1194                  * Note: at this point we are not allowed to fail the
1195                  *       mount operation because the HotFile init code
1196                  *       in hfs_recording_init() will lookup vnodes with
1197                  *       VNOP_LOOKUP() which hangs vnodes off the mount
1198                  *       (and if we were to fail, VFS is not prepared to
1199                  *       clean that up at this point.  Since HotFiles are
1200                  *       optional, this is not a big deal.
1201                  */
1202                 (void) hfs_recording_init(hfsmp);
1203         }
1204
1205         /* Force ACLs on HFS+ file systems. */
1206         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1207
1208         /* Enable extent-based extended attributes by default */
1209         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1210
1211         return (0);
1212
1213 ErrorExit:
1214         /*
1215          * A fatal error occurred and the volume cannot be mounted, so
1216          * release any resources that we acquired...
1217          */
1218         hfsUnmount(hfsmp, NULL);
1219
1220         if (HFS_MOUNT_DEBUG) {
1221                 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1222         }
1223         return (retval);
1224 }
1225
1226 static int
1227 _pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
1228 {
1229         int err;
1230
1231         err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
1232         if (err == 0) {
1233                 err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL);
1234                 hfs_unlock(VTOC(vp));
1235         }
1236
1237         return err;
1238 }
1239
1240 void
1241 hfs_pin_fs_metadata(struct hfsmount *hfsmp)
1242 {
1243         ExtendedVCB *vcb;
1244         int err;
1245
1246         vcb = HFSTOVCB(hfsmp);
1247
1248         err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
1249         if (err != 0) {
1250                 printf("hfs: failed to pin extents overflow file %d\n", err);
1251         }
1252         err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
1253         if (err != 0) {
1254                 printf("hfs: failed to pin catalog file %d\n", err);
1255         }
1256         err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
1257         if (err != 0) {
1258                 printf("hfs: failed to pin bitmap file %d\n", err);
1259         }
1260         err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
1261         if (err != 0) {
1262                 printf("hfs: failed to pin extended attr file %d\n", err);
1263         }
1264
1265         hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1);
1266         hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1);
1267
1268         if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
1269                 // and hey, if we've got a journal, let's pin that too!
1270                 hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize));
1271         }
1272 }
1273
1274 /*
1275  * ReleaseMetaFileVNode
1276  *
1277  * vp   L - -
1278  */
1279 static void ReleaseMetaFileVNode(struct vnode *vp)
1280 {
1281         struct filefork *fp;
1282
1283         if (vp && (fp = VTOF(vp))) {
1284                 if (fp->fcbBTCBPtr != NULL) {
1285                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1286                         (void) BTClosePath(fp);
1287                         hfs_unlock(VTOC(vp));
1288                 }
1289
1290                 /* release the node even if BTClosePath fails */
1291                 vnode_recycle(vp);
1292                 vnode_put(vp);
1293         }
1294 }
1295
1296
1297 /*************************************************************
1298 *
1299 * Unmounts a hfs volume.
1300 *       At this point vflush() has been called (to dump all non-metadata files)
1301 *
1302 *************************************************************/
1303
1304 int
1305 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1306 {
1307         /* Get rid of our attribute data vnode (if any).  This is done
1308          * after the vflush() during mount, so we don't need to worry
1309          * about any locks.
1310          */
1311         if (hfsmp->hfs_attrdata_vp) {
1312                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1313                 hfsmp->hfs_attrdata_vp = NULLVP;
1314         }
1315
1316         if (hfsmp->hfs_startup_vp) {
1317                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1318                 hfsmp->hfs_startup_cp = NULL;
1319                 hfsmp->hfs_startup_vp = NULL;
1320         }
1321
1322         if (hfsmp->hfs_attribute_vp) {
1323                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1324                 hfsmp->hfs_attribute_cp = NULL;
1325                 hfsmp->hfs_attribute_vp = NULL;
1326         }
1327
1328         if (hfsmp->hfs_catalog_vp) {
1329                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1330                 hfsmp->hfs_catalog_cp = NULL;
1331                 hfsmp->hfs_catalog_vp = NULL;
1332         }
1333
1334         if (hfsmp->hfs_extents_vp) {
1335                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1336                 hfsmp->hfs_extents_cp = NULL;
1337                 hfsmp->hfs_extents_vp = NULL;
1338         }
1339
1340         if (hfsmp->hfs_allocation_vp) {
1341                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1342                 hfsmp->hfs_allocation_cp = NULL;
1343                 hfsmp->hfs_allocation_vp = NULL;
1344         }
1345
1346         return (0);
1347 }
1348
1349
1350 /*
1351  * Test if fork has overflow extents.
1352  *
1353  * Returns:
1354  *      non-zero - overflow extents exist
1355  *      zero     - overflow extents do not exist
1356  */
1357 bool overflow_extents(struct filefork *fp)
1358 {
1359         u_int32_t blocks;
1360
1361         //
1362         // If the vnode pointer is NULL then we're being called
1363         // from hfs_remove_orphans() with a faked-up filefork
1364         // and therefore it has to be an HFS+ volume.  Otherwise
1365         // we check through the volume header to see what type
1366         // of volume we're on.
1367         //
1368
1369 #if CONFIG_HFS_STD
1370         if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1371                 if (fp->ff_extents[2].blockCount == 0)
1372                         return false;
1373
1374                 blocks = fp->ff_extents[0].blockCount +
1375                         fp->ff_extents[1].blockCount +
1376                         fp->ff_extents[2].blockCount;
1377
1378                 return fp->ff_blocks > blocks;
1379         }
1380 #endif
1381
1382         if (fp->ff_extents[7].blockCount == 0)
1383                 return false;
1384
1385         blocks = fp->ff_extents[0].blockCount +
1386                 fp->ff_extents[1].blockCount +
1387                 fp->ff_extents[2].blockCount +
1388                 fp->ff_extents[3].blockCount +
1389                 fp->ff_extents[4].blockCount +
1390                 fp->ff_extents[5].blockCount +
1391                 fp->ff_extents[6].blockCount +
1392                 fp->ff_extents[7].blockCount;
1393
1394         return fp->ff_blocks > blocks;
1395 }
1396
1397 static __attribute__((pure))
1398 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1399 {
1400         return (hfsmp->hfs_freeze_state == HFS_FROZEN
1401                         || (hfsmp->hfs_freeze_state == HFS_FREEZING
1402                                 && current_thread() != hfsmp->hfs_freezing_thread));
1403 }
1404
1405 /*
1406  * Lock the HFS global journal lock
1407  */
1408 int
1409 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1410 {
1411         thread_t thread = current_thread();
1412
1413         if (hfsmp->hfs_global_lockowner == thread) {
1414                 panic ("hfs_lock_global: locking against myself!");
1415         }
1416
1417         /*
1418          * This check isn't really necessary but this stops us taking
1419          * the mount lock in most cases.  The essential check is below.
1420          */
1421         if (hfs_is_frozen(hfsmp)) {
1422                 /*
1423                  * Unfortunately, there is no easy way of getting a notification
1424                  * for when a process is exiting and it's possible for the exiting
1425                  * process to get blocked somewhere else.  To catch this, we
1426                  * periodically monitor the frozen process here and thaw if
1427                  * we spot that it's exiting.
1428                  */
1429 frozen:
1430                 hfs_lock_mount(hfsmp);
1431
1432                 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1433
1434                 while (hfs_is_frozen(hfsmp)) {
1435                         if (hfsmp->hfs_freeze_state == HFS_FROZEN
1436                                 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1437                                 hfs_thaw_locked(hfsmp);
1438                                 break;
1439                         }
1440
1441                         msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1442                                PWAIT, "hfs_lock_global (frozen)", &ts);
1443                 }
1444                 hfs_unlock_mount(hfsmp);
1445         }
1446
1447         /* HFS_SHARED_LOCK */
1448         if (locktype == HFS_SHARED_LOCK) {
1449                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1450                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1451         }
1452         /* HFS_EXCLUSIVE_LOCK */
1453         else {
1454                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1455                 hfsmp->hfs_global_lockowner = thread;
1456         }
1457
1458         /*
1459          * We have to check if we're frozen again because of the time
1460          * between when we checked and when we took the global lock.
1461          */
1462         if (hfs_is_frozen(hfsmp)) {
1463                 hfs_unlock_global(hfsmp);
1464                 goto frozen;
1465         }
1466
1467         return 0;
1468 }
1469
1470
1471 /*
1472  * Unlock the HFS global journal lock
1473  */
1474 void
1475 hfs_unlock_global (struct hfsmount *hfsmp)
1476 {
1477         thread_t thread = current_thread();
1478
1479         /* HFS_LOCK_EXCLUSIVE */
1480         if (hfsmp->hfs_global_lockowner == thread) {
1481                 hfsmp->hfs_global_lockowner = NULL;
1482                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1483         }
1484         /* HFS_LOCK_SHARED */
1485         else {
1486                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1487         }
1488 }
1489
1490 /*
1491  * Lock the HFS mount lock
1492  *
1493  * Note: this is a mutex, not a rw lock!
1494  */
1495 inline
1496 void hfs_lock_mount (struct hfsmount *hfsmp) {
1497         lck_mtx_lock (&(hfsmp->hfs_mutex));
1498 }
1499
1500 /*
1501  * Unlock the HFS mount lock
1502  *
1503  * Note: this is a mutex, not a rw lock!
1504  */
1505 inline
1506 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1507         lck_mtx_unlock (&(hfsmp->hfs_mutex));
1508 }
1509
1510 /*
1511  * Lock HFS system file(s).
1512  *
1513  * This function accepts a @flags parameter which indicates which
1514  * system file locks are required.  The value it returns should be
1515  * used in a subsequent call to hfs_systemfile_unlock.  The caller
1516  * should treat this value as opaque; it may or may not have a
1517  * relation to the @flags field that is passed in.  The *only*
1518  * guarantee that we make is that a value of zero means that no locks
1519  * were taken and that there is no need to call hfs_systemfile_unlock
1520  * (although it is harmless to do so).  Recursion is supported but
1521  * care must still be taken to ensure correct lock ordering.  Note
1522  * that requests for certain locks may cause other locks to also be
1523  * taken, including locks that are not possible to ask for via the
1524  * @flags parameter.
1525  */
1526 int
1527 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1528 {
1529         /*
1530          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1531          */
1532         if (flags & SFL_CATALOG) {
1533                 if (hfsmp->hfs_catalog_cp
1534                         && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
1535 #ifdef HFS_CHECK_LOCK_ORDER
1536                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1537                                 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1538                         }
1539                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1540                                 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1541                         }
1542                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1543                                 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1544                         }
1545 #endif /* HFS_CHECK_LOCK_ORDER */
1546
1547                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1548                         /*
1549                          * When the catalog file has overflow extents then
1550                          * also acquire the extents b-tree lock if its not
1551                          * already requested.
1552                          */
1553                         if (((flags & SFL_EXTENTS) == 0) &&
1554                             (hfsmp->hfs_catalog_vp != NULL) &&
1555                             (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1556                                 flags |= SFL_EXTENTS;
1557                         }
1558                 } else {
1559                         flags &= ~SFL_CATALOG;
1560                 }
1561         }
1562
1563         if (flags & SFL_ATTRIBUTE) {
1564                 if (hfsmp->hfs_attribute_cp
1565                         && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
1566 #ifdef HFS_CHECK_LOCK_ORDER
1567                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1568                                 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1569                         }
1570                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1571                                 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1572                         }
1573 #endif /* HFS_CHECK_LOCK_ORDER */
1574
1575                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1576                         /*
1577                          * When the attribute file has overflow extents then
1578                          * also acquire the extents b-tree lock if its not
1579                          * already requested.
1580                          */
1581                         if (((flags & SFL_EXTENTS) == 0) &&
1582                             (hfsmp->hfs_attribute_vp != NULL) &&
1583                             (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1584                                 flags |= SFL_EXTENTS;
1585                         }
1586                 } else {
1587                         flags &= ~SFL_ATTRIBUTE;
1588                 }
1589         }
1590
1591         if (flags & SFL_STARTUP) {
1592                 if (hfsmp->hfs_startup_cp
1593                         && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
1594 #ifdef HFS_CHECK_LOCK_ORDER
1595                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1596                                 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1597                         }
1598 #endif /* HFS_CHECK_LOCK_ORDER */
1599
1600                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1601                         /*
1602                          * When the startup file has overflow extents then
1603                          * also acquire the extents b-tree lock if its not
1604                          * already requested.
1605                          */
1606                         if (((flags & SFL_EXTENTS) == 0) &&
1607                             (hfsmp->hfs_startup_vp != NULL) &&
1608                             (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1609                                 flags |= SFL_EXTENTS;
1610                         }
1611                 } else {
1612                         flags &= ~SFL_STARTUP;
1613                 }
1614         }
1615
1616         /*
1617          * To prevent locks being taken in the wrong order, the extent lock
1618          * gets a bitmap lock as well.
1619          */
1620         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1621                 if (hfsmp->hfs_allocation_cp) {
1622                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1623                         /*
1624                          * The bitmap lock is also grabbed when only extent lock
1625                          * was requested. Set the bitmap lock bit in the lock
1626                          * flags which callers will use during unlock.
1627                          */
1628                         flags |= SFL_BITMAP;
1629                 } else {
1630                         flags &= ~SFL_BITMAP;
1631                 }
1632         }
1633
1634         if (flags & SFL_EXTENTS) {
1635                 /*
1636                  * Since the extents btree lock is recursive we always
1637                  * need exclusive access.
1638                  */
1639                 if (hfsmp->hfs_extents_cp) {
1640                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1641
1642                         if (vfs_isswapmount(hfsmp->hfs_mp)) {
1643                                 /*
1644                                  * because we may need this lock on the pageout path (if a swapfile allocation
1645                                  * spills into the extents overflow tree), we will grant the holder of this
1646                                  * lock the privilege of dipping into the reserve free pool in order to prevent
1647                                  * a deadlock from occurring if we need those pageouts to complete before we
1648                                  * will make any new pages available on the free list... the deadlock can occur
1649                                  * if this thread needs to allocate memory while this lock is held
1650                                  */
1651                                 if (set_vm_privilege(TRUE) == FALSE) {
1652                                         /*
1653                                          * indicate that we need to drop vm_privilege
1654                                          * when we unlock
1655                                          */
1656                                         flags |= SFL_VM_PRIV;
1657                                 }
1658                         }
1659                 } else {
1660                         flags &= ~SFL_EXTENTS;
1661                 }
1662         }
1663
1664         return (flags);
1665 }
1666
1667 /*
1668  * unlock HFS system file(s).
1669  */
1670 void
1671 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1672 {
1673         if (!flags)
1674                 return;
1675
1676         struct timeval tv;
1677         u_int32_t lastfsync;
1678         int numOfLockedBuffs;
1679
1680         if (hfsmp->jnl == NULL) {
1681                 microuptime(&tv);
1682                 lastfsync = tv.tv_sec;
1683         }
1684         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1685                 hfs_unlock(hfsmp->hfs_startup_cp);
1686         }
1687         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1688                 if (hfsmp->jnl == NULL) {
1689                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1690                         numOfLockedBuffs = count_lock_queue();
1691                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1692                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1693                               kMaxSecsForFsync))) {
1694                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1695                         }
1696                 }
1697                 hfs_unlock(hfsmp->hfs_attribute_cp);
1698         }
1699         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1700                 if (hfsmp->jnl == NULL) {
1701                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1702                         numOfLockedBuffs = count_lock_queue();
1703                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1704                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1705                               kMaxSecsForFsync))) {
1706                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1707                         }
1708                 }
1709                 hfs_unlock(hfsmp->hfs_catalog_cp);
1710         }
1711         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1712                 hfs_unlock(hfsmp->hfs_allocation_cp);
1713         }
1714         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1715                 if (hfsmp->jnl == NULL) {
1716                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1717                         numOfLockedBuffs = count_lock_queue();
1718                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1719                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1720                               kMaxSecsForFsync))) {
1721                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1722                         }
1723                 }
1724                 hfs_unlock(hfsmp->hfs_extents_cp);
1725
1726                 if (flags & SFL_VM_PRIV) {
1727                         /*
1728                          * revoke the vm_privilege we granted this thread
1729                          * now that we have unlocked the overflow extents
1730                          */
1731                         set_vm_privilege(FALSE);
1732                 }
1733         }
1734 }
1735
1736
1737 /*
1738  * RequireFileLock
1739  *
1740  * Check to see if a vnode is locked in the current context
1741  * This is to be used for debugging purposes only!!
1742  */
1743 #if DEBUG
1744 void RequireFileLock(FileReference vp, int shareable)
1745 {
1746         int locked;
1747
1748         /* The extents btree and allocation bitmap are always exclusive. */
1749         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1750             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1751                 shareable = 0;
1752         }
1753
1754         locked = VTOC(vp)->c_lockowner == current_thread();
1755
1756         if (!locked && !shareable) {
1757                 switch (VTOC(vp)->c_fileid) {
1758                 case kHFSExtentsFileID:
1759                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1760                         break;
1761                 case kHFSCatalogFileID:
1762                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1763                         break;
1764                 case kHFSAllocationFileID:
1765                         /* The allocation file can hide behind the jornal lock. */
1766                         if (VTOHFS(vp)->jnl == NULL)
1767                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1768                         break;
1769                 case kHFSStartupFileID:
1770                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1771                 case kHFSAttributesFileID:
1772                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1773                         break;
1774                 }
1775         }
1776 }
1777 #endif // DEBUG
1778
1779
1780 /*
1781  * There are three ways to qualify for ownership rights on an object:
1782  *
1783  * 1. (a) Your UID matches the cnode's UID.
1784  *    (b) The object in question is owned by "unknown"
1785  * 2. (a) Permissions on the filesystem are being ignored and
1786  *        your UID matches the replacement UID.
1787  *    (b) Permissions on the filesystem are being ignored and
1788  *        the replacement UID is "unknown".
1789  * 3. You are root.
1790  *
1791  */
1792 int
1793 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1794                 __unused struct proc *p, int invokesuperuserstatus)
1795 {
1796         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1797             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1798             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1799               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1800                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1801             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1802                 return (0);
1803         } else {
1804                 return (EPERM);
1805         }
1806 }
1807
1808
1809 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1810                                u_int32_t blockSizeLimit,
1811                                u_int32_t baseMultiple) {
1812     /*
1813        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1814        specified limit but still an even multiple of the baseMultiple.
1815      */
1816     int baseBlockCount, blockCount;
1817     u_int32_t trialBlockSize;
1818
1819     if (allocationBlockSize % baseMultiple != 0) {
1820         /*
1821            Whoops: the allocation blocks aren't even multiples of the specified base:
1822            no amount of dividing them into even parts will be a multiple, either then!
1823         */
1824         return 512;             /* Hope for the best */
1825     };
1826
1827     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1828        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1829        Even though the former (the result of the loop below) is the larger allocation
1830        block size, the latter is more efficient: */
1831     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1832
1833     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1834     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1835
1836     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1837         trialBlockSize = blockCount * baseMultiple;
1838         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1839             if ((trialBlockSize <= blockSizeLimit) &&
1840                 (trialBlockSize % baseMultiple == 0)) {
1841                 return trialBlockSize;
1842             };
1843         };
1844     };
1845
1846     /* Note: we should never get here, since blockCount = 1 should always work,
1847        but this is nice and safe and makes the compiler happy, too ... */
1848     return 512;
1849 }
1850
1851
1852 u_int32_t
1853 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1854                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1855 {
1856         struct hfsmount * hfsmp;
1857         struct cat_desc jdesc;
1858         int lockflags;
1859         int error;
1860
1861         if (vcb->vcbSigWord != kHFSPlusSigWord)
1862                 return (0);
1863
1864         hfsmp = VCBTOHFS(vcb);
1865
1866         memset(&jdesc, 0, sizeof(struct cat_desc));
1867         jdesc.cd_parentcnid = kRootDirID;
1868         jdesc.cd_nameptr = (const u_int8_t *)name;
1869         jdesc.cd_namelen = strlen(name);
1870
1871         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1872         error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1873         hfs_systemfile_unlock(hfsmp, lockflags);
1874
1875         if (error == 0) {
1876                 return (fattr->ca_fileid);
1877         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1878                 return (0);
1879         }
1880
1881         return (0);     /* XXX what callers expect on an error */
1882 }
1883
1884
1885 /*
1886  * On HFS Plus Volumes, there can be orphaned files or directories
1887  * These are files or directories that were unlinked while busy.
1888  * If the volume was not cleanly unmounted then some of these may
1889  * have persisted and need to be removed.
1890  */
1891 void
1892 hfs_remove_orphans(struct hfsmount * hfsmp)
1893 {
1894         struct BTreeIterator * iterator = NULL;
1895         struct FSBufferDescriptor btdata;
1896         struct HFSPlusCatalogFile filerec;
1897         struct HFSPlusCatalogKey * keyp;
1898         struct proc *p = current_proc();
1899         FCB *fcb;
1900         ExtendedVCB *vcb;
1901         char filename[32];
1902         char tempname[32];
1903         size_t namelen;
1904         cat_cookie_t cookie;
1905         int catlock = 0;
1906         int catreserve = 0;
1907         bool started_tr = false;
1908         int lockflags;
1909         int result;
1910         int orphaned_files = 0;
1911         int orphaned_dirs = 0;
1912
1913         bzero(&cookie, sizeof(cookie));
1914
1915         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1916                 return;
1917
1918         vcb = HFSTOVCB(hfsmp);
1919         fcb = VTOF(hfsmp->hfs_catalog_vp);
1920
1921         btdata.bufferAddress = &filerec;
1922         btdata.itemSize = sizeof(filerec);
1923         btdata.itemCount = 1;
1924
1925         iterator = hfs_mallocz(sizeof(*iterator));
1926
1927         /* Build a key to "temp" */
1928         keyp = (HFSPlusCatalogKey*)&iterator->key;
1929         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1930         keyp->nodeName.length = 4;  /* "temp" */
1931         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1932         keyp->nodeName.unicode[0] = 't';
1933         keyp->nodeName.unicode[1] = 'e';
1934         keyp->nodeName.unicode[2] = 'm';
1935         keyp->nodeName.unicode[3] = 'p';
1936
1937         /*
1938          * Position the iterator just before the first real temp file/dir.
1939          */
1940         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1941         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1942         hfs_systemfile_unlock(hfsmp, lockflags);
1943
1944         /* Visit all the temp files/dirs in the HFS+ private directory. */
1945         for (;;) {
1946                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1947                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1948                 hfs_systemfile_unlock(hfsmp, lockflags);
1949                 if (result)
1950                         break;
1951                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1952                         break;
1953
1954                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1955                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1956
1957                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1958                                 HFS_DELETE_PREFIX, filerec.fileID);
1959
1960                 /*
1961                  * Delete all files (and directories) named "tempxxx",
1962                  * where xxx is the file's cnid in decimal.
1963                  *
1964                  */
1965                 if (bcmp(tempname, filename, namelen + 1) != 0)
1966                         continue;
1967
1968                 struct filefork dfork;
1969                 struct filefork rfork;
1970                 struct cnode cnode;
1971                 int mode = 0;
1972
1973                 bzero(&dfork, sizeof(dfork));
1974                 bzero(&rfork, sizeof(rfork));
1975                 bzero(&cnode, sizeof(cnode));
1976
1977                 if (hfs_start_transaction(hfsmp) != 0) {
1978                         printf("hfs_remove_orphans: failed to start transaction\n");
1979                         goto exit;
1980                 }
1981                 started_tr = true;
1982
1983                 /*
1984                  * Reserve some space in the Catalog file.
1985                  */
1986                 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1987                         printf("hfs_remove_orphans: cat_preflight failed\n");
1988                         goto exit;
1989                 }
1990                 catreserve = 1;
1991
1992                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1993                 catlock = 1;
1994
1995                 /* Build a fake cnode */
1996                 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1997                                                 &dfork.ff_data, &rfork.ff_data);
1998                 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1999                 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
2000                 cnode.c_desc.cd_namelen = namelen;
2001                 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
2002                 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
2003
2004                 /* Position iterator at previous entry */
2005                 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
2006                                                         NULL, NULL) != 0) {
2007                         break;
2008                 }
2009
2010                 /* Truncate the file to zero (both forks) */
2011                 if (dfork.ff_blocks > 0) {
2012                         u_int64_t fsize;
2013
2014                         dfork.ff_cp = &cnode;
2015                         cnode.c_datafork = &dfork;
2016                         cnode.c_rsrcfork = NULL;
2017                         fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
2018                         while (fsize > 0) {
2019                                 if (fsize > HFS_BIGFILE_SIZE) {
2020                                         fsize -= HFS_BIGFILE_SIZE;
2021                                 } else {
2022                                         fsize = 0;
2023                                 }
2024
2025                                 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
2026                                                                   cnode.c_attr.ca_fileid, false) != 0) {
2027                                         printf("hfs: error truncating data fork!\n");
2028                                         break;
2029                                 }
2030
2031                                 //
2032                                 // if we're iteratively truncating this file down,
2033                                 // then end the transaction and start a new one so
2034                                 // that no one transaction gets too big.
2035                                 //
2036                                 if (fsize > 0) {
2037                                         /* Drop system file locks before starting
2038                                          * another transaction to preserve lock order.
2039                                          */
2040                                         hfs_systemfile_unlock(hfsmp, lockflags);
2041                                         catlock = 0;
2042                                         hfs_end_transaction(hfsmp);
2043
2044                                         if (hfs_start_transaction(hfsmp) != 0) {
2045                                                 started_tr = false;
2046                                                 goto exit;
2047                                         }
2048                                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2049                                         catlock = 1;
2050                                 }
2051                         }
2052                 }
2053
2054                 if (rfork.ff_blocks > 0) {
2055                         rfork.ff_cp = &cnode;
2056                         cnode.c_datafork = NULL;
2057                         cnode.c_rsrcfork = &rfork;
2058                         if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
2059                                 printf("hfs: error truncating rsrc fork!\n");
2060                                 break;
2061                         }
2062                 }
2063
2064                 // Deal with extended attributes
2065                 if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
2066                         // hfs_removeallattr uses its own transactions
2067                         hfs_systemfile_unlock(hfsmp, lockflags);
2068                         catlock = false;
2069                         hfs_end_transaction(hfsmp);
2070
2071                         hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
2072
2073                         if (!started_tr) {
2074                                 if (hfs_start_transaction(hfsmp) != 0) {
2075                                         printf("hfs_remove_orphans: failed to start transaction\n");
2076                                         goto exit;
2077                                 }
2078                                 started_tr = true;
2079                         }
2080
2081                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2082                         catlock = 1;
2083                 }
2084
2085                 /* Remove the file or folder record from the Catalog */
2086                 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
2087                         printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
2088                         hfs_systemfile_unlock(hfsmp, lockflags);
2089                         catlock = 0;
2090                         hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2091                         break;
2092                 }
2093
2094                 mode = cnode.c_attr.ca_mode & S_IFMT;
2095
2096                 if (mode == S_IFDIR) {
2097                         orphaned_dirs++;
2098                 }
2099                 else {
2100                         orphaned_files++;
2101                 }
2102
2103                 /* Update parent and volume counts */
2104                 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
2105                 if (mode == S_IFDIR) {
2106                         DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
2107                 }
2108
2109                 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
2110                                                  &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
2111
2112                 /* Drop locks and end the transaction */
2113                 hfs_systemfile_unlock(hfsmp, lockflags);
2114                 cat_postflight(hfsmp, &cookie, p);
2115                 catlock = catreserve = 0;
2116
2117                 /*
2118                    Now that Catalog is unlocked, update the volume info, making
2119                    sure to differentiate between files and directories
2120                 */
2121                 if (mode == S_IFDIR) {
2122                         hfs_volupdate(hfsmp, VOL_RMDIR, 0);
2123                 }
2124                 else{
2125                         hfs_volupdate(hfsmp, VOL_RMFILE, 0);
2126                 }
2127
2128                 hfs_end_transaction(hfsmp);
2129                 started_tr = false;
2130         } /* end for */
2131
2132 exit:
2133
2134         if (orphaned_files > 0 || orphaned_dirs > 0)
2135                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
2136         if (catlock) {
2137                 hfs_systemfile_unlock(hfsmp, lockflags);
2138         }
2139         if (catreserve) {
2140                 cat_postflight(hfsmp, &cookie, p);
2141         }
2142         if (started_tr) {
2143                 hfs_end_transaction(hfsmp);
2144         }
2145
2146         hfs_free(iterator, sizeof(*iterator));
2147         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
2148 }
2149
2150
2151 /*
2152  * This will return the correct logical block size for a given vnode.
2153  * For most files, it is the allocation block size, for meta data like
2154  * BTrees, this is kept as part of the BTree private nodeSize
2155  */
2156 u_int32_t
2157 GetLogicalBlockSize(struct vnode *vp)
2158 {
2159 u_int32_t logBlockSize;
2160
2161         hfs_assert(vp != NULL);
2162
2163         /* start with default */
2164         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
2165
2166         if (vnode_issystem(vp)) {
2167                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
2168                         BTreeInfoRec                    bTreeInfo;
2169
2170                         /*
2171                          * We do not lock the BTrees, because if we are getting block..then the tree
2172                          * should be locked in the first place.
2173                          * We just want the nodeSize wich will NEVER change..so even if the world
2174                          * is changing..the nodeSize should remain the same. Which argues why lock
2175                          * it in the first place??
2176                          */
2177
2178                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
2179
2180                         logBlockSize = bTreeInfo.nodeSize;
2181
2182                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
2183                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
2184                 }
2185         }
2186
2187         hfs_assert(logBlockSize > 0);
2188
2189         return logBlockSize;
2190 }
2191
2192 #if HFS_SPARSE_DEV
2193 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
2194 {
2195         struct vfsstatfs *vfsp;  /* 272 bytes */
2196         uint64_t vfreeblks;
2197         struct timeval now;
2198
2199         hfs_lock_mount(hfsmp);
2200
2201         vnode_t backing_vp = hfsmp->hfs_backingvp;
2202         if (!backing_vp) {
2203                 hfs_unlock_mount(hfsmp);
2204                 return false;
2205         }
2206
2207         // usecount is not enough; we need iocount
2208         if (vnode_get(backing_vp)) {
2209                 hfs_unlock_mount(hfsmp);
2210                 *pfree_blks = 0;
2211                 return true;
2212         }
2213
2214         uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2215         uint32_t bandblks       = hfsmp->hfs_sparsebandblks;
2216         uint64_t maxblks        = hfsmp->hfs_backingfs_maxblocks;
2217
2218         hfs_unlock_mount(hfsmp);
2219
2220         mount_t backingfs_mp = vnode_mount(backing_vp);
2221
2222         microtime(&now);
2223         if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
2224                 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
2225                 hfsmp->hfs_last_backingstatfs = now.tv_sec;
2226         }
2227
2228         if (!(vfsp = vfs_statfs(backingfs_mp))) {
2229                 vnode_put(backing_vp);
2230                 return false;
2231         }
2232
2233         vfreeblks = vfsp->f_bavail;
2234         /* Normalize block count if needed. */
2235         if (vfsp->f_bsize != hfsmp->blockSize)
2236                 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2237         if (vfreeblks > bandblks)
2238                 vfreeblks -= bandblks;
2239         else
2240                 vfreeblks = 0;
2241
2242         /*
2243          * Take into account any delayed allocations.  It is not
2244          * certain what the original reason for the "2 *" is.  Most
2245          * likely it is to allow for additional requirements in the
2246          * host file system and metadata required by disk images.  The
2247          * number of loaned blocks is likely to be small and we will
2248          * stop using them as we get close to the limit.
2249          */
2250         loanedblks = 2 * loanedblks;
2251         if (vfreeblks > loanedblks)
2252                 vfreeblks -= loanedblks;
2253         else
2254                 vfreeblks = 0;
2255
2256         if (maxblks)
2257                 vfreeblks = MIN(vfreeblks, maxblks);
2258
2259         vnode_put(backing_vp);
2260
2261         *pfree_blks = vfreeblks;
2262
2263         return true;
2264 }
2265 #endif
2266
2267 u_int32_t
2268 hfs_free_cnids(struct hfsmount * hfsmp)
2269 {
2270         return HFS_MAX_FILES - hfsmp->hfs_filecount - hfsmp->hfs_dircount;
2271 }
2272
2273 u_int32_t
2274 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2275 {
2276         u_int32_t freeblks;
2277         u_int32_t rsrvblks;
2278         u_int32_t loanblks;
2279
2280         /*
2281          * We don't bother taking the mount lock
2282          * to look at these values since the values
2283          * themselves are each updated atomically
2284          * on aligned addresses.
2285          */
2286         freeblks = hfsmp->freeBlocks;
2287         rsrvblks = hfsmp->reserveBlocks;
2288         loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2289         if (wantreserve) {
2290                 if (freeblks > rsrvblks)
2291                         freeblks -= rsrvblks;
2292                 else
2293                         freeblks = 0;
2294         }
2295         if (freeblks > loanblks)
2296                 freeblks -= loanblks;
2297         else
2298                 freeblks = 0;
2299
2300 #if HFS_SPARSE_DEV
2301         /*
2302          * When the underlying device is sparse, check the
2303          * available space on the backing store volume.
2304          */
2305         uint64_t vfreeblks;
2306         if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2307                 freeblks = MIN(freeblks, vfreeblks);
2308 #endif /* HFS_SPARSE_DEV */
2309
2310         return (freeblks);
2311 }
2312
2313 /*
2314  * Map HFS Common errors (negative) to BSD error codes (positive).
2315  * Positive errors (ie BSD errors) are passed through unchanged.
2316  */
2317 short MacToVFSError(OSErr err)
2318 {
2319         if (err >= 0)
2320                 return err;
2321
2322         /* BSD/VFS internal errnos */
2323         switch (err) {
2324                 case HFS_ERESERVEDNAME: /* -8 */
2325                         return err;
2326         }
2327
2328         switch (err) {
2329         case dskFulErr:                 /*    -34 */
2330         case btNoSpaceAvail:            /* -32733 */
2331                 return ENOSPC;
2332         case fxOvFlErr:                 /* -32750 */
2333                 return EOVERFLOW;
2334
2335         case btBadNode:                 /* -32731 */
2336                 return EIO;
2337
2338         case memFullErr:                /*  -108 */
2339                 return ENOMEM;          /*   +12 */
2340
2341         case cmExists:                  /* -32718 */
2342         case btExists:                  /* -32734 */
2343                 return EEXIST;          /*    +17 */
2344
2345         case cmNotFound:                /* -32719 */
2346         case btNotFound:                /* -32735 */
2347                 return ENOENT;          /*     28 */
2348
2349         case cmNotEmpty:                /* -32717 */
2350                 return ENOTEMPTY;       /*     66 */
2351
2352         case cmFThdDirErr:              /* -32714 */
2353                 return EISDIR;          /*     21 */
2354
2355         case fxRangeErr:                /* -32751 */
2356                 return ERANGE;
2357
2358         case bdNamErr:                  /*   -37 */
2359                 return ENAMETOOLONG;    /*    63 */
2360
2361         case paramErr:                  /*   -50 */
2362         case fileBoundsErr:             /* -1309 */
2363                 return EINVAL;          /*   +22 */
2364
2365         case fsBTBadNodeSize:
2366                 return ENXIO;
2367
2368         default:
2369                 return EIO;             /*   +5 */
2370         }
2371 }
2372
2373
2374 /*
2375  * Find the current thread's directory hint for a given index.
2376  *
2377  * Requires an exclusive lock on directory cnode.
2378  *
2379  * Use detach if the cnode lock must be dropped while the hint is still active.
2380  */
2381 directoryhint_t *
2382 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2383 {
2384         struct timeval tv;
2385         directoryhint_t *hint;
2386         boolean_t need_remove, need_init;
2387         const u_int8_t * name;
2388
2389         microuptime(&tv);
2390
2391         /*
2392          *  Look for an existing hint first.  If not found, create a new one (when
2393          *  the list is not full) or recycle the oldest hint.  Since new hints are
2394          *  always added to the head of the list, the last hint is always the
2395          *  oldest.
2396          */
2397         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2398                 if (hint->dh_index == index)
2399                         break;
2400         }
2401         if (hint != NULL) { /* found an existing hint */
2402                 need_init = false;
2403                 need_remove = true;
2404         } else { /* cannot find an existing hint */
2405                 need_init = true;
2406                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2407                         /* Create a default directory hint */
2408                         hint = hfs_zalloc(HFS_DIRHINT_ZONE);
2409                         ++dcp->c_dirhintcnt;
2410                         need_remove = false;
2411                 } else {                                /* recycle the last (i.e., the oldest) hint */
2412                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2413                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2414                             (name = hint->dh_desc.cd_nameptr)) {
2415                                 hint->dh_desc.cd_nameptr = NULL;
2416                                 hint->dh_desc.cd_namelen = 0;
2417                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2418                                 vfs_removename((const char *)name);
2419                         }
2420                         need_remove = true;
2421                 }
2422         }
2423
2424         if (need_remove)
2425                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2426
2427         if (detach)
2428                 --dcp->c_dirhintcnt;
2429         else
2430                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2431
2432         if (need_init) {
2433                 hint->dh_index = index;
2434                 hint->dh_desc.cd_flags = 0;
2435                 hint->dh_desc.cd_encoding = 0;
2436                 hint->dh_desc.cd_namelen = 0;
2437                 hint->dh_desc.cd_nameptr = NULL;
2438                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2439                 hint->dh_desc.cd_hint = dcp->c_childhint;
2440                 hint->dh_desc.cd_cnid = 0;
2441         }
2442         hint->dh_time = tv.tv_sec;
2443         return (hint);
2444 }
2445
2446 /*
2447  * Release a single directory hint.
2448  *
2449  * Requires an exclusive lock on directory cnode.
2450  */
2451 void
2452 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2453 {
2454         const u_int8_t * name;
2455         directoryhint_t *hint;
2456
2457         /* Check if item is on list (could be detached) */
2458         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2459                 if (hint == relhint) {
2460                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2461                         --dcp->c_dirhintcnt;
2462                         break;
2463                 }
2464         }
2465         name = relhint->dh_desc.cd_nameptr;
2466         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2467                 relhint->dh_desc.cd_nameptr = NULL;
2468                 relhint->dh_desc.cd_namelen = 0;
2469                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2470                 vfs_removename((const char *)name);
2471         }
2472         hfs_zfree(relhint, HFS_DIRHINT_ZONE);
2473 }
2474
2475 /*
2476  * Release directory hints for given directory
2477  *
2478  * Requires an exclusive lock on directory cnode.
2479  */
2480 void
2481 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2482 {
2483         struct timeval tv;
2484         directoryhint_t *hint, *prev;
2485         const u_int8_t * name;
2486
2487         if (stale_hints_only)
2488                 microuptime(&tv);
2489
2490         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2491         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2492                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2493                         break;  /* stop here if this entry is too new */
2494                 name = hint->dh_desc.cd_nameptr;
2495                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2496                         hint->dh_desc.cd_nameptr = NULL;
2497                         hint->dh_desc.cd_namelen = 0;
2498                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
2499                         vfs_removename((const char *)name);
2500                 }
2501                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2502                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2503                 hfs_zfree(hint, HFS_DIRHINT_ZONE);
2504                 --dcp->c_dirhintcnt;
2505         }
2506 }
2507
2508 /*
2509  * Insert a detached directory hint back into the list of dirhints.
2510  *
2511  * Requires an exclusive lock on directory cnode.
2512  */
2513 void
2514 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2515 {
2516         directoryhint_t *test;
2517
2518         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2519                 if (test == hint)
2520                         panic("hfs_insertdirhint: hint %p already on list!", hint);
2521         }
2522
2523         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2524         ++dcp->c_dirhintcnt;
2525 }
2526
2527 /*
2528  * Perform a case-insensitive compare of two UTF-8 filenames.
2529  *
2530  * Returns 0 if the strings match.
2531  */
2532 int
2533 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2534 {
2535         u_int16_t *ustr1, *ustr2;
2536         size_t ulen1, ulen2;
2537         size_t maxbytes;
2538         int cmp = -1;
2539
2540         if (len1 != len2)
2541                 return (cmp);
2542
2543         maxbytes = kHFSPlusMaxFileNameChars << 1;
2544         ustr1 = hfs_malloc(maxbytes << 1);
2545         ustr2 = ustr1 + (maxbytes >> 1);
2546
2547         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2548                 goto out;
2549         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2550                 goto out;
2551
2552         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2553 out:
2554         hfs_free(ustr1, maxbytes << 1);
2555         return (cmp);
2556 }
2557
2558 typedef struct jopen_cb_info {
2559         mount_t mp;
2560         off_t   jsize;
2561         char   *desired_uuid;
2562         struct  vnode *jvp;
2563         size_t  blksize;
2564         int     need_clean;
2565         int     need_init;
2566 } jopen_cb_info;
2567
2568 static int
2569 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2570 {
2571         jopen_cb_info *ji = (jopen_cb_info *)arg;
2572         char bsd_name[256];
2573         int error;
2574
2575         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2576         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2577
2578         if ((error = vnode_lookup(bsd_name, VNODE_LOOKUP_NOFOLLOW, &ji->jvp,
2579                                                           vfs_context_kernel()))) {
2580                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2581                 return 1;   // keep iterating
2582         }
2583
2584         struct vnop_open_args oargs = {
2585                 .a_vp           = ji->jvp,
2586                 .a_mode         = FREAD | FWRITE,
2587                 .a_context      = vfs_context_kernel(),
2588         };
2589
2590         if (spec_open(&oargs)) {
2591                 vnode_put(ji->jvp);
2592                 ji->jvp = NULL;
2593                 return 1;
2594         }
2595
2596         // if the journal is dirty and we didn't specify a desired
2597         // journal device uuid, then do not use the journal.  but
2598         // if the journal is just invalid (e.g. it hasn't been
2599         // initialized) then just set the need_init flag.
2600         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2601                 error = journal_is_clean(ji->jvp, 0, ji->jsize,
2602                                                                  (void *)1, ji->blksize);
2603                 if (error == EBUSY) {
2604                         struct vnop_close_args cargs = {
2605                                 .a_vp           = ji->jvp,
2606                                 .a_fflag        = FREAD | FWRITE,
2607                                 .a_context      = vfs_context_kernel()
2608                         };
2609                         spec_close(&cargs);
2610                         vnode_put(ji->jvp);
2611                         ji->jvp = NULL;
2612                         return 1;    // keep iterating
2613                 } else if (error == EINVAL) {
2614                         ji->need_init = 1;
2615                 }
2616         }
2617
2618         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2619                 strlcpy(ji->desired_uuid, uuid_str, 128);
2620         }
2621         vnode_setmountedon(ji->jvp);
2622         return 0;   // stop iterating
2623 }
2624
2625 static vnode_t
2626 open_journal_dev(mount_t mp,
2627                                  const char *vol_device,
2628                                  int need_clean,
2629                                  char *uuid_str,
2630                                  char *machine_serial_num,
2631                                  off_t jsize,
2632                                  size_t blksize,
2633                                  int *need_init)
2634 {
2635     int retry_counter=0;
2636     jopen_cb_info ji;
2637
2638         ji.mp                   = mp;
2639     ji.jsize        = jsize;
2640     ji.desired_uuid = uuid_str;
2641     ji.jvp          = NULL;
2642     ji.blksize      = blksize;
2643     ji.need_clean   = need_clean;
2644     ji.need_init    = 0;
2645
2646 //    if (uuid_str[0] == '\0') {
2647 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2648 //    } else {
2649 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2650 //    }
2651     while (ji.jvp == NULL && retry_counter++ < 4) {
2652             if (retry_counter > 1) {
2653                     if (uuid_str[0]) {
2654                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2655                     } else {
2656                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2657                     }
2658                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2659             }
2660
2661             hfs_iterate_media_with_content(EXTJNL_CONTENT_TYPE_UUID,
2662                                                                            journal_open_cb, &ji);
2663     }
2664
2665     if (ji.jvp == NULL) {
2666             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2667                    vol_device, uuid_str, machine_serial_num);
2668     }
2669
2670     *need_init = ji.need_init;
2671
2672     return ji.jvp;
2673 }
2674
2675 void hfs_close_jvp(hfsmount_t *hfsmp)
2676 {
2677         if (!hfsmp || !hfsmp->jvp || hfsmp->jvp == hfsmp->hfs_devvp)
2678                 return;
2679
2680         vnode_clearmountedon(hfsmp->jvp);
2681         struct vnop_close_args cargs = {
2682                 .a_vp           = hfsmp->jvp,
2683                 .a_fflag        = FREAD | FWRITE,
2684                 .a_context      = vfs_context_kernel()
2685         };
2686         spec_close(&cargs);
2687         vnode_put(hfsmp->jvp);
2688         hfsmp->jvp = NULL;
2689 }
2690
2691 int
2692 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2693                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2694                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2695 {
2696         JournalInfoBlock *jibp;
2697         struct buf       *jinfo_bp, *bp;
2698         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2699         int               retval, write_jibp = 0;
2700         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2701         struct vnode     *devvp;
2702         struct hfs_mount_args *args = _args;
2703         u_int32_t         jib_flags;
2704         u_int64_t         jib_offset;
2705         u_int64_t         jib_size;
2706         const char *dev_name;
2707
2708         devvp = hfsmp->hfs_devvp;
2709         dev_name = vnode_getname_printable(devvp);
2710
2711         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2712                 arg_flags  = args->journal_flags;
2713                 arg_tbufsz = args->journal_tbuffer_size;
2714         }
2715
2716         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2717
2718         jinfo_bp = NULL;
2719         retval = (int)buf_meta_bread(devvp,
2720                                                 (daddr64_t)((embeddedOffset/blksize) +
2721                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2722                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2723         if (retval) {
2724                 if (jinfo_bp) {
2725                         buf_brelse(jinfo_bp);
2726                 }
2727                 goto cleanup_dev_name;
2728         }
2729
2730         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2731         jib_flags  = SWAP_BE32(jibp->flags);
2732         jib_size   = SWAP_BE64(jibp->size);
2733
2734         if (jib_flags & kJIJournalInFSMask) {
2735                 hfsmp->jvp = hfsmp->hfs_devvp;
2736                 jib_offset = SWAP_BE64(jibp->offset);
2737         } else {
2738             int need_init=0;
2739
2740             // if the volume was unmounted cleanly then we'll pick any
2741             // available external journal partition
2742             //
2743             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2744                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2745             }
2746
2747             hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
2748                                                                           dev_name,
2749                                                                           !(jib_flags & kJIJournalNeedInitMask),
2750                                                                           (char *)&jibp->ext_jnl_uuid[0],
2751                                                                           (char *)&jibp->machine_serial_num[0],
2752                                                                           jib_size,
2753                                                                           hfsmp->hfs_logical_block_size,
2754                                                                           &need_init);
2755             if (hfsmp->jvp == NULL) {
2756                     buf_brelse(jinfo_bp);
2757                     retval = EROFS;
2758                     goto cleanup_dev_name;
2759             } else {
2760                     if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2761                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2762                     }
2763             }
2764
2765             jib_offset = 0;
2766             write_jibp = 1;
2767             if (need_init) {
2768                     jib_flags |= kJIJournalNeedInitMask;
2769             }
2770         }
2771
2772         // save this off for the hack-y check in hfs_remove()
2773         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2774         hfsmp->jnl_size  = jib_size;
2775
2776         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2777             // if the file system is read-only, check if the journal is empty.
2778             // if it is, then we can allow the mount.  otherwise we have to
2779             // return failure.
2780             retval = journal_is_clean(hfsmp->jvp,
2781                                       jib_offset + embeddedOffset,
2782                                       jib_size,
2783                                       devvp,
2784                                       hfsmp->hfs_logical_block_size);
2785
2786             hfsmp->jnl = NULL;
2787
2788             buf_brelse(jinfo_bp);
2789
2790             if (retval) {
2791                     const char *name = vnode_getname_printable(devvp);
2792                     printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2793                     name);
2794                     vnode_putname_printable(name);
2795             }
2796
2797             goto cleanup_dev_name;
2798         }
2799
2800         if (jib_flags & kJIJournalNeedInitMask) {
2801                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2802                            jib_offset + embeddedOffset, jib_size);
2803                 hfsmp->jnl = journal_create(hfsmp->jvp,
2804                                                                         jib_offset + embeddedOffset,
2805                                                                         jib_size,
2806                                                                         devvp,
2807                                                                         blksize,
2808                                                                         arg_flags,
2809                                                                         arg_tbufsz,
2810                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2811                                                                         hfsmp->hfs_mp);
2812                 if (hfsmp->jnl)
2813                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2814
2815                 // no need to start a transaction here... if this were to fail
2816                 // we'd just re-init it on the next mount.
2817                 jib_flags &= ~kJIJournalNeedInitMask;
2818                 jibp->flags  = SWAP_BE32(jib_flags);
2819                 buf_bwrite(jinfo_bp);
2820                 jinfo_bp = NULL;
2821                 jibp     = NULL;
2822         } else {
2823                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2824                 //         jib_offset + embeddedOffset,
2825                 //         jib_size, SWAP_BE32(vhp->blockSize));
2826
2827                 hfsmp->jnl = journal_open(hfsmp->jvp,
2828                                                                   jib_offset + embeddedOffset,
2829                                                                   jib_size,
2830                                                                   devvp,
2831                                                                   blksize,
2832                                                                   arg_flags,
2833                                                                   arg_tbufsz,
2834                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2835                                                                   hfsmp->hfs_mp);
2836                 if (hfsmp->jnl)
2837                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2838
2839                 if (write_jibp) {
2840                         buf_bwrite(jinfo_bp);
2841                 } else {
2842                         buf_brelse(jinfo_bp);
2843                 }
2844                 jinfo_bp = NULL;
2845                 jibp     = NULL;
2846
2847                 if (hfsmp->jnl && mdbp) {
2848                         // reload the mdb because it could have changed
2849                         // if the journal had to be replayed.
2850                         if (mdb_offset == 0) {
2851                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2852                         }
2853                         bp = NULL;
2854                         retval = (int)buf_meta_bread(devvp,
2855                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2856                                         hfsmp->hfs_physical_block_size, cred, &bp);
2857                         if (retval) {
2858                                 if (bp) {
2859                                         buf_brelse(bp);
2860                                 }
2861                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2862                                            retval);
2863                                 goto cleanup_dev_name;
2864                         }
2865                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2866                         buf_brelse(bp);
2867                         bp = NULL;
2868                 }
2869         }
2870
2871         // if we expected the journal to be there and we couldn't
2872         // create it or open it then we have to bail out.
2873         if (hfsmp->jnl == NULL) {
2874                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2875                 retval = EINVAL;
2876                 goto cleanup_dev_name;
2877         }
2878
2879         retval = 0;
2880
2881 cleanup_dev_name:
2882         vnode_putname_printable(dev_name);
2883         return retval;
2884 }
2885
2886
2887 //
2888 // This function will go and re-locate the .journal_info_block and
2889 // the .journal files in case they moved (which can happen if you
2890 // run Norton SpeedDisk).  If we fail to find either file we just
2891 // disable journaling for this volume and return.  We turn off the
2892 // journaling bit in the vcb and assume it will get written to disk
2893 // later (if it doesn't on the next mount we'd do the same thing
2894 // again which is harmless).  If we disable journaling we don't
2895 // return an error so that the volume is still mountable.
2896 //
2897 // If the info we find for the .journal_info_block and .journal files
2898 // isn't what we had stored, we re-set our cached info and proceed
2899 // with opening the journal normally.
2900 //
2901 static int
2902 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2903 {
2904         JournalInfoBlock *jibp;
2905         struct buf       *jinfo_bp;
2906         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2907         int               retval, write_jibp = 0, recreate_journal = 0;
2908         struct vnode     *devvp;
2909         struct cat_attr   jib_attr, jattr;
2910         struct cat_fork   jib_fork, jfork;
2911         ExtendedVCB      *vcb;
2912         u_int32_t            fid;
2913         struct hfs_mount_args *args = _args;
2914         u_int32_t         jib_flags;
2915         u_int64_t         jib_offset;
2916         u_int64_t         jib_size;
2917
2918         devvp = hfsmp->hfs_devvp;
2919         vcb = HFSTOVCB(hfsmp);
2920
2921         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2922                 if (args->journal_disable) {
2923                         return 0;
2924                 }
2925
2926                 arg_flags  = args->journal_flags;
2927                 arg_tbufsz = args->journal_tbuffer_size;
2928         }
2929
2930         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2931         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2932                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2933                            fid ? jib_fork.cf_extents[0].startBlock : 0);
2934                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2935                 return 0;
2936         }
2937         hfsmp->hfs_jnlinfoblkid = fid;
2938
2939         // make sure the journal_info_block begins where we think it should.
2940         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2941                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2942                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2943
2944                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2945                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2946                 recreate_journal = 1;
2947         }
2948
2949
2950         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2951         jinfo_bp = NULL;
2952         retval = (int)buf_meta_bread(devvp,
2953                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2954                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2955                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2956         if (retval) {
2957                 if (jinfo_bp) {
2958                         buf_brelse(jinfo_bp);
2959                 }
2960                 printf("hfs: can't read journal info block. disabling journaling.\n");
2961                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2962                 return 0;
2963         }
2964
2965         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2966         jib_flags  = SWAP_BE32(jibp->flags);
2967         jib_offset = SWAP_BE64(jibp->offset);
2968         jib_size   = SWAP_BE64(jibp->size);
2969
2970         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2971         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2972                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2973                            fid ? jfork.cf_extents[0].startBlock : 0);
2974                 buf_brelse(jinfo_bp);
2975                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2976                 return 0;
2977         }
2978         hfsmp->hfs_jnlfileid = fid;
2979
2980         // make sure the journal file begins where we think it should.
2981         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2982                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2983                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2984
2985                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2986                 write_jibp   = 1;
2987                 recreate_journal = 1;
2988         }
2989
2990         // check the size of the journal file.
2991         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2992                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2993                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2994
2995                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2996                 write_jibp = 1;
2997                 recreate_journal = 1;
2998         }
2999
3000         if (jib_flags & kJIJournalInFSMask) {
3001                 hfsmp->jvp = hfsmp->hfs_devvp;
3002                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
3003         } else {
3004             const char *dev_name;
3005             int need_init = 0;
3006
3007             dev_name = vnode_getname_printable(devvp);
3008
3009             // since the journal is empty, just use any available external journal
3010             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
3011
3012             // this fills in the uuid of the device we actually get
3013             hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
3014                                                                           dev_name,
3015                                                                           !(jib_flags & kJIJournalNeedInitMask),
3016                                                                           (char *)&jibp->ext_jnl_uuid[0],
3017                                                                           (char *)&jibp->machine_serial_num[0],
3018                                                                           jib_size,
3019                                                                           hfsmp->hfs_logical_block_size,
3020                                                                           &need_init);
3021             if (hfsmp->jvp == NULL) {
3022                     buf_brelse(jinfo_bp);
3023                     vnode_putname_printable(dev_name);
3024                     return EROFS;
3025             } else {
3026                     if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
3027                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
3028                     }
3029             }
3030             jib_offset = 0;
3031             recreate_journal = 1;
3032             write_jibp = 1;
3033             if (need_init) {
3034                     jib_flags |= kJIJournalNeedInitMask;
3035             }
3036             vnode_putname_printable(dev_name);
3037         }
3038
3039         // save this off for the hack-y check in hfs_remove()
3040         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
3041         hfsmp->jnl_size  = jib_size;
3042
3043         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
3044             // if the file system is read-only, check if the journal is empty.
3045             // if it is, then we can allow the mount.  otherwise we have to
3046             // return failure.
3047             retval = journal_is_clean(hfsmp->jvp,
3048                                       jib_offset,
3049                                       jib_size,
3050                                       devvp,
3051                                       hfsmp->hfs_logical_block_size);
3052
3053             hfsmp->jnl = NULL;
3054
3055             buf_brelse(jinfo_bp);
3056
3057             if (retval) {
3058                     const char *name = vnode_getname_printable(devvp);
3059                     printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
3060                     name);
3061                     vnode_putname_printable(name);
3062             }
3063
3064             return retval;
3065         }
3066
3067         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
3068                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
3069                            jib_offset, jib_size);
3070                 hfsmp->jnl = journal_create(hfsmp->jvp,
3071                                                                         jib_offset,
3072                                                                         jib_size,
3073                                                                         devvp,
3074                                                                         hfsmp->hfs_logical_block_size,
3075                                                                         arg_flags,
3076                                                                         arg_tbufsz,
3077                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
3078                                                                         hfsmp->hfs_mp);
3079                 if (hfsmp->jnl)
3080                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3081
3082                 // no need to start a transaction here... if this were to fail
3083                 // we'd just re-init it on the next mount.
3084                 jib_flags &= ~kJIJournalNeedInitMask;
3085                 write_jibp   = 1;
3086
3087         } else {
3088                 //
3089                 // if we weren't the last person to mount this volume
3090                 // then we need to throw away the journal because it
3091                 // is likely that someone else mucked with the disk.
3092                 // if the journal is empty this is no big deal.  if the
3093                 // disk is dirty this prevents us from replaying the
3094                 // journal over top of changes that someone else made.
3095                 //
3096                 arg_flags |= JOURNAL_RESET;
3097
3098                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
3099                 //         jib_offset,
3100                 //         jib_size, SWAP_BE32(vhp->blockSize));
3101
3102                 hfsmp->jnl = journal_open(hfsmp->jvp,
3103                                                                   jib_offset,
3104                                                                   jib_size,
3105                                                                   devvp,
3106                                                                   hfsmp->hfs_logical_block_size,
3107                                                                   arg_flags,
3108                                                                   arg_tbufsz,
3109                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
3110                                                                   hfsmp->hfs_mp);
3111                 if (hfsmp->jnl)
3112                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3113         }
3114
3115
3116         if (write_jibp) {
3117                 jibp->flags  = SWAP_BE32(jib_flags);
3118                 jibp->offset = SWAP_BE64(jib_offset);
3119                 jibp->size   = SWAP_BE64(jib_size);
3120
3121                 buf_bwrite(jinfo_bp);
3122         } else {
3123                 buf_brelse(jinfo_bp);
3124         }
3125         jinfo_bp = NULL;
3126         jibp     = NULL;
3127
3128         // if we expected the journal to be there and we couldn't
3129         // create it or open it then we have to bail out.
3130         if (hfsmp->jnl == NULL) {
3131                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
3132                 return EINVAL;
3133         }
3134
3135         return 0;
3136 }
3137
3138 /*
3139  * Calculate the allocation zone for metadata.
3140  *
3141  * This zone includes the following:
3142  *      Allocation Bitmap file
3143  *      Overflow Extents file
3144  *      Journal file
3145  *      Quota files
3146  *      Clustered Hot files
3147  *      Catalog file
3148  *
3149  *                          METADATA ALLOCATION ZONE
3150  * ____________________________________________________________________________
3151  * |    |    |     |               |                              |           |
3152  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
3153  * |____|____|_____|_______________|______________________________|___________|
3154  *
3155  * <------------------------------- N * 128 MB ------------------------------->
3156  *
3157  */
3158 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
3159
3160 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
3161 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
3162
3163 /* Initialize the metadata zone.
3164  *
3165  * If the size of  the volume is less than the minimum size for
3166  * metadata zone, metadata zone is disabled.
3167  *
3168  * If disable is true, disable metadata zone unconditionally.
3169  */
3170 void
3171 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
3172 {
3173         ExtendedVCB  *vcb;
3174         u_int64_t  fs_size;
3175         u_int64_t  zonesize;
3176         u_int64_t  temp;
3177         u_int64_t  filesize;
3178         u_int32_t  blk;
3179         int  items, really_do_it=1;
3180
3181         vcb = HFSTOVCB(hfsmp);
3182         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
3183
3184         /*
3185          * For volumes less than 10 GB, don't bother.
3186          */
3187         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
3188                 really_do_it = 0;
3189         }
3190
3191         /*
3192          * Skip non-journaled volumes as well.
3193          */
3194         if (hfsmp->jnl == NULL) {
3195                 really_do_it = 0;
3196         }
3197
3198         /* If caller wants to disable metadata zone, do it */
3199         if (disable == true) {
3200                 really_do_it = 0;
3201         }
3202
3203         /*
3204          * Start with space for the boot blocks and Volume Header.
3205          * 1536 = byte offset from start of volume to end of volume header:
3206          * 1024 bytes is the offset from the start of the volume to the
3207          * start of the volume header (defined by the volume format)
3208          * + 512 bytes (the size of the volume header).
3209          */
3210         zonesize = roundup(1536, hfsmp->blockSize);
3211
3212         /*
3213          * Add the on-disk size of allocation bitmap.
3214          */
3215         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3216
3217         /*
3218          * Add space for the Journal Info Block and Journal (if they're in
3219          * this file system).
3220          */
3221         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3222                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3223         }
3224
3225         /*
3226          * Add the existing size of the Extents Overflow B-tree.
3227          * (It rarely grows, so don't bother reserving additional room for it.)
3228          */
3229         zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
3230
3231         /*
3232          * If there is an Attributes B-tree, leave room for 11 clumps worth.
3233          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3234          * When installing a full OS install onto a 20GB volume, we use
3235          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3236          * us with another 3 or 4 clumps worth before we need another extent.
3237          */
3238         if (hfsmp->hfs_attribute_cp) {
3239                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3240         }
3241
3242         /*
3243          * Leave room for 11 clumps of the Catalog B-tree.
3244          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3245          * When installing a full OS install onto a 20GB volume, we use
3246          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3247          * us with another 3 or 4 clumps worth before we need another extent.
3248          */
3249         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3250
3251         /*
3252          * Add space for hot file region.
3253          *
3254          * ...for now, use 5 MB per 1 GB (0.5 %)
3255          */
3256         filesize = (fs_size / 1024) * 5;
3257         if (filesize > HOTBAND_MAXIMUM_SIZE)
3258                 filesize = HOTBAND_MAXIMUM_SIZE;
3259         else if (filesize < HOTBAND_MINIMUM_SIZE)
3260                 filesize = HOTBAND_MINIMUM_SIZE;
3261         /*
3262          * Calculate user quota file requirements.
3263          */
3264         if (hfsmp->hfs_flags & HFS_QUOTAS) {
3265                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3266                 if (items < QF_MIN_USERS)
3267                         items = QF_MIN_USERS;
3268                 else if (items > QF_MAX_USERS)
3269                         items = QF_MAX_USERS;
3270                 if (!powerof2(items)) {
3271                         int x = items;
3272                         items = 4;
3273                         while (x>>1 != 1) {
3274                                 x = x >> 1;
3275                                 items = items << 1;
3276                         }
3277                 }
3278                 filesize += (items + 1) * sizeof(struct dqblk);
3279                 /*
3280                  * Calculate group quota file requirements.
3281                  *
3282                  */
3283                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3284                 if (items < QF_MIN_GROUPS)
3285                         items = QF_MIN_GROUPS;
3286                 else if (items > QF_MAX_GROUPS)
3287                         items = QF_MAX_GROUPS;
3288                 if (!powerof2(items)) {
3289                         int x = items;
3290                         items = 4;
3291                         while (x>>1 != 1) {
3292                                 x = x >> 1;
3293                                 items = items << 1;
3294                         }
3295                 }
3296                 filesize += (items + 1) * sizeof(struct dqblk);
3297         }
3298         zonesize += filesize;
3299
3300         /*
3301          * Round up entire zone to a bitmap block's worth.
3302          * The extra space goes to the catalog file and hot file area.
3303          */
3304         temp = zonesize;
3305         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3306         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3307         /*
3308          * If doing the round up for hfs_min_alloc_start would push us past
3309          * allocLimit, then just reset it back to 0.  Though using a value
3310          * bigger than allocLimit would not cause damage in the block allocator
3311          * code, this value could get stored in the volume header and make it out
3312          * to disk, making the volume header technically corrupt.
3313          */
3314         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3315                 hfsmp->hfs_min_alloc_start = 0;
3316         }
3317
3318         if (really_do_it == 0) {
3319                 /* If metadata zone needs to be disabled because the
3320                  * volume was truncated, clear the bit and zero out
3321                  * the values that are no longer needed.
3322                  */
3323                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3324                         /* Disable metadata zone */
3325                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3326
3327                         /* Zero out mount point values that are not required */
3328                         hfsmp->hfs_catalog_maxblks = 0;
3329                         hfsmp->hfs_hotfile_maxblks = 0;
3330                         hfsmp->hfs_hotfile_start = 0;
3331                         hfsmp->hfs_hotfile_end = 0;
3332                         hfsmp->hfs_hotfile_freeblks = 0;
3333                         hfsmp->hfs_metazone_start = 0;
3334                         hfsmp->hfs_metazone_end = 0;
3335                 }
3336
3337                 return;
3338         }
3339
3340         temp = zonesize - temp;  /* temp has extra space */
3341         filesize += temp / 3;
3342         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3343
3344         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3345                 hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
3346         } else {
3347                 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3348         }
3349
3350         /* Convert to allocation blocks. */
3351         blk = zonesize / vcb->blockSize;
3352
3353         /* The default metadata zone location is at the start of volume. */
3354         hfsmp->hfs_metazone_start = 1;
3355         hfsmp->hfs_metazone_end = blk - 1;
3356
3357         /* The default hotfile area is at the end of the zone. */
3358         if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3359                 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3360                 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3361                 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3362         }
3363         else {
3364                 hfsmp->hfs_hotfile_start = 0;
3365                 hfsmp->hfs_hotfile_end = 0;
3366                 hfsmp->hfs_hotfile_freeblks = 0;
3367         }
3368 #if DEBUG
3369         printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3370         printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3371         printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
3372 #endif
3373
3374         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3375 }
3376
3377
3378 static u_int32_t
3379 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3380 {
3381         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
3382         int  lockflags;
3383         int  freeblocks;
3384
3385         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3386                 //
3387                 // This is only used at initialization time and on an ssd
3388                 // we'll get the real info from the hotfile btree user
3389                 // info
3390                 //
3391                 return 0;
3392         }
3393
3394         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3395         freeblocks = MetaZoneFreeBlocks(vcb);
3396         hfs_systemfile_unlock(hfsmp, lockflags);
3397
3398         /* Minus Extents overflow file reserve. */
3399         if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
3400                 freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3401         }
3402
3403         /* Minus catalog file reserve. */
3404         if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
3405                 freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3406         }
3407
3408         if (freeblocks < 0)
3409                 freeblocks = 0;
3410
3411         // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
3412         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3413 }
3414
3415 /*
3416  * Determine if a file is a "virtual" metadata file.
3417  * This includes journal and quota files.
3418  */
3419 int
3420 hfs_virtualmetafile(struct cnode *cp)
3421 {
3422         const char * filename;
3423
3424
3425         if (cp->c_parentcnid != kHFSRootFolderID)
3426                 return (0);
3427
3428         filename = (const char *)cp->c_desc.cd_nameptr;
3429         if (filename == NULL)
3430                 return (0);
3431
3432         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3433             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3434             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3435             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3436             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3437                 return (1);
3438
3439         return (0);
3440 }
3441
3442 void hfs_syncer_lock(struct hfsmount *hfsmp)
3443 {
3444     hfs_lock_mount(hfsmp);
3445 }
3446
3447 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3448 {
3449     hfs_unlock_mount(hfsmp);
3450 }
3451
3452 void hfs_syncer_wait(struct hfsmount *hfsmp, struct timespec *ts)
3453 {
3454     msleep(&hfsmp->hfs_syncer_thread, &hfsmp->hfs_mutex, PWAIT,
3455            "hfs_syncer_wait", ts);
3456 }
3457
3458 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3459 {
3460     wakeup(&hfsmp->hfs_syncer_thread);
3461 }
3462
3463 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3464 {
3465     uint64_t deadline;
3466     clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3467     return deadline;
3468 }
3469
3470 //
3471 // Fire off a timed callback to sync the disk if the
3472 // volume is on ejectable media.
3473 //
3474 void hfs_sync_ejectable(struct hfsmount *hfsmp)
3475 {
3476     // If we don't have a syncer or we get called by the syncer, just return
3477     if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3478                 || current_thread() == hfsmp->hfs_syncer_thread) {
3479         return;
3480         }
3481
3482     hfs_syncer_lock(hfsmp);
3483
3484     if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3485         microuptime(&hfsmp->hfs_sync_req_oldest);
3486
3487     /* If hfs_unmount is running, it will clear the HFS_RUN_SYNCER
3488            flag. Also, we don't want to queue again if there is a sync
3489            outstanding. */
3490     if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3491                 || hfsmp->hfs_syncer_thread) {
3492         hfs_syncer_unlock(hfsmp);
3493         return;
3494     }
3495
3496     hfsmp->hfs_syncer_thread = (void *)1;
3497
3498     hfs_syncer_unlock(hfsmp);
3499
3500         kernel_thread_start(hfs_syncer, hfsmp, &hfsmp->hfs_syncer_thread);
3501         thread_deallocate(hfsmp->hfs_syncer_thread);
3502 }
3503
3504 int
3505 hfs_start_transaction(struct hfsmount *hfsmp)
3506 {
3507         int ret = 0, unlock_on_err = 0;
3508         thread_t thread = current_thread();
3509
3510 #ifdef HFS_CHECK_LOCK_ORDER
3511         /*
3512          * You cannot start a transaction while holding a system
3513          * file lock. (unless the transaction is nested.)
3514          */
3515         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3516                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3517                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3518                 }
3519                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3520                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3521                 }
3522                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3523                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3524                 }
3525         }
3526 #endif /* HFS_CHECK_LOCK_ORDER */
3527
3528 again:
3529
3530         if (hfsmp->jnl) {
3531                 if (journal_owner(hfsmp->jnl) != thread) {
3532                         /*
3533                          * The global lock should be held shared if journal is
3534                          * active to prevent disabling.  If we're not the owner
3535                          * of the journal lock, verify that we're not already
3536                          * holding the global lock exclusive before moving on.
3537                          */
3538                         if (hfsmp->hfs_global_lockowner == thread) {
3539                                 ret = EBUSY;
3540                                 goto out;
3541                         }
3542
3543                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3544
3545                         // Things could have changed
3546                         if (!hfsmp->jnl) {
3547                                 hfs_unlock_global(hfsmp);
3548                                 goto again;
3549                         }
3550
3551                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3552                         unlock_on_err = 1;
3553                 }
3554         } else {
3555                 // No journal
3556                 if (hfsmp->hfs_global_lockowner != thread) {
3557                         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3558
3559                         // Things could have changed
3560                         if (hfsmp->jnl) {
3561                                 hfs_unlock_global(hfsmp);
3562                                 goto again;
3563                         }
3564
3565                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3566                         unlock_on_err = 1;
3567                 }
3568         }
3569
3570         /* If a downgrade to read-only mount is in progress, no other
3571          * thread than the downgrade thread is allowed to modify
3572          * the file system.
3573          */
3574         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3575             hfsmp->hfs_downgrading_thread != thread) {
3576                 ret = EROFS;
3577                 goto out;
3578         }
3579
3580         if (hfsmp->jnl) {
3581                 ret = journal_start_transaction(hfsmp->jnl);
3582         } else {
3583                 ret = 0;
3584         }
3585
3586         if (ret == 0)
3587                 ++hfsmp->hfs_transaction_nesting;
3588
3589 out:
3590         if (ret != 0 && unlock_on_err) {
3591                 hfs_unlock_global (hfsmp);
3592                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3593         }
3594
3595     return ret;
3596 }
3597
3598 int
3599 hfs_end_transaction(struct hfsmount *hfsmp)
3600 {
3601     int ret;
3602
3603         hfs_assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
3604         hfs_assert(hfsmp->hfs_transaction_nesting > 0);
3605
3606         if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
3607                 hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
3608
3609         bool need_unlock = !--hfsmp->hfs_transaction_nesting;
3610
3611         if (hfsmp->jnl) {
3612                 ret = journal_end_transaction(hfsmp->jnl);
3613         } else {
3614                 ret = 0;
3615         }
3616
3617         if (need_unlock) {
3618                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3619                 hfs_unlock_global (hfsmp);
3620                 hfs_sync_ejectable(hfsmp);
3621         }
3622
3623     return ret;
3624 }
3625
3626
3627 void
3628 hfs_journal_lock(struct hfsmount *hfsmp)
3629 {
3630         /* Only peek at hfsmp->jnl while holding the global lock */
3631         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3632         if (hfsmp->jnl) {
3633                 journal_lock(hfsmp->jnl);
3634         }
3635         hfs_unlock_global (hfsmp);
3636 }
3637
3638 void
3639 hfs_journal_unlock(struct hfsmount *hfsmp)
3640 {
3641         /* Only peek at hfsmp->jnl while holding the global lock */
3642         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3643         if (hfsmp->jnl) {
3644                 journal_unlock(hfsmp->jnl);
3645         }
3646         hfs_unlock_global (hfsmp);
3647 }
3648
3649 /*
3650  * Flush the contents of the journal to the disk.
3651  *
3652  *  - HFS_FLUSH_JOURNAL
3653  *      Wait to write in-memory journal to the disk consistently.
3654  *      This means that the journal still contains uncommitted
3655  *      transactions and the file system metadata blocks in
3656  *      the journal transactions might be written asynchronously
3657  *      to the disk.  But there is no guarantee that they are
3658  *      written to the disk before returning to the caller.
3659  *      Note that this option is sufficient for file system
3660  *      data integrity as it guarantees consistent journal
3661  *      content on the disk.
3662  *
3663  *  - HFS_FLUSH_JOURNAL_META
3664  *      Wait to write in-memory journal to the disk
3665  *      consistently, and also wait to write all asynchronous
3666  *      metadata blocks to its corresponding locations
3667  *      consistently on the disk. This is overkill in normal
3668  *      scenarios but is useful whenever the metadata blocks
3669  *      are required to be consistent on-disk instead of
3670  *      just the journalbeing consistent; like before live
3671  *      verification and live volume resizing.  The update of the
3672  *      metadata doesn't include a barrier of track cache flush.
3673  *
3674  *  - HFS_FLUSH_FULL
3675  *      HFS_FLUSH_JOURNAL + force a track cache flush to media
3676  *
3677  *  - HFS_FLUSH_CACHE
3678  *      Force a track cache flush to media.
3679  *
3680  *  - HFS_FLUSH_BARRIER
3681  *      Barrier-only flush to ensure write order
3682  *
3683  */
3684 errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
3685 {
3686         errno_t error = 0;
3687         int options = 0;
3688         dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
3689
3690         switch (mode) {
3691                 case HFS_FLUSH_JOURNAL_META:
3692                         // wait for journal, metadata blocks and previous async flush to finish
3693                         SET(options, JOURNAL_WAIT_FOR_IO);
3694
3695                         // no break
3696
3697                 case HFS_FLUSH_JOURNAL:
3698                 case HFS_FLUSH_JOURNAL_BARRIER:
3699                 case HFS_FLUSH_FULL:
3700
3701                         if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
3702                             !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3703                                 mode = HFS_FLUSH_FULL;
3704
3705                         if (mode == HFS_FLUSH_FULL)
3706                                 SET(options, JOURNAL_FLUSH_FULL);
3707
3708                         /* Only peek at hfsmp->jnl while holding the global lock */
3709                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3710
3711                         if (hfsmp->jnl)
3712                                 error = journal_flush(hfsmp->jnl, options);
3713
3714                         hfs_unlock_global (hfsmp);
3715
3716                         /*
3717                          * This may result in a double barrier as
3718                          * journal_flush may have issued a barrier itself
3719                          */
3720                         if (mode == HFS_FLUSH_JOURNAL_BARRIER)
3721                                 error = VNOP_IOCTL(hfsmp->hfs_devvp,
3722                                     DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3723                                     FWRITE, NULL);
3724
3725                         break;
3726
3727                 case HFS_FLUSH_CACHE:
3728                         // Do a full sync
3729                         sync_req.options = 0;
3730
3731                         // no break
3732
3733                 case HFS_FLUSH_BARRIER:
3734                         // If barrier only flush doesn't support, fall back to use full flush.
3735                         if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3736                                 sync_req.options = 0;
3737
3738                         error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3739                                            FWRITE, NULL);
3740                         break;
3741
3742                 default:
3743                         error = EINVAL;
3744         }
3745
3746         return error;
3747 }
3748
3749 /*
3750  * hfs_erase_unused_nodes
3751  *
3752  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3753  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3754  * zeroes to the unused nodes.
3755  *
3756  * How do we detect when a volume needs this repair?  We can't always be
3757  * certain.  If a volume was created after a certain date, then it may have
3758  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3759  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3760  * that means that the entire first clump must have been written to, which means
3761  * there shouldn't be unused and unwritten nodes in that first clump, and this
3762  * repair is not needed.
3763  *
3764  * We have defined a bit in the Volume Header's attributes to indicate when the
3765  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3766  * As will fsck_hfs when it repairs the unused nodes.
3767  */
3768 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3769 {
3770         int result;
3771         struct filefork *catalog;
3772         int lockflags;
3773
3774         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3775         {
3776                 /* This volume has already been checked and repaired. */
3777                 return 0;
3778         }
3779
3780         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3781         {
3782                 /* This volume is too old to have had the problem. */
3783                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3784                 return 0;
3785         }
3786
3787         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3788         if (catalog->ff_size > catalog->ff_clumpsize)
3789         {
3790                 /* The entire first clump must have been in use at some point. */
3791                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3792                 return 0;
3793         }
3794
3795         /*
3796          * If we get here, we need to zero out those unused nodes.
3797          *
3798          * We start a transaction and lock the catalog since we're going to be
3799          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3800          * do its writing via the journal, because that would be too much I/O
3801          * to fit in a transaction, and it's a pain to break it up into multiple
3802          * transactions.  (It behaves more like growing a B-tree would.)
3803          */
3804         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3805         result = hfs_start_transaction(hfsmp);
3806         if (result)
3807                 goto done;
3808         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3809         result = BTZeroUnusedNodes(catalog);
3810         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3811         hfs_systemfile_unlock(hfsmp, lockflags);
3812         hfs_end_transaction(hfsmp);
3813         if (result == 0)
3814                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3815         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3816
3817 done:
3818         return result;
3819 }
3820
3821
3822 int
3823 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3824 {
3825         int error;
3826
3827         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || decmpfs_cnode_cmp_type(VTOCMP(vp)) != DATALESS_CMPFS_TYPE) {
3828                 // there's nothing to do, it's not dataless
3829                 return 0;
3830         }
3831
3832         /* Swap files are special; ignore them */
3833         if (vnode_isswap(vp)) {
3834                 return 0;
3835         }
3836
3837         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3838         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3839         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3840                 error = 0;
3841         } else if (error) {
3842                 if (error == EAGAIN) {
3843                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3844                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3845                         return 0;
3846                 } else if (error == EINTR) {
3847                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3848                         return EINTR;
3849                 }
3850         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3851                 //
3852                 // if we're here, the dataless bit is still set on the file
3853                 // which means it didn't get handled.  we return an error
3854                 // but it's presently ignored by all callers of this function.
3855                 //
3856                 // XXXdbg - EDATANOTPRESENT is what we really need...
3857                 //
3858                 return EBADF;
3859         }
3860
3861         return error;
3862 }
3863
3864
3865 //
3866 // NOTE: this function takes care of starting a transaction and
3867 //       acquiring the systemfile lock so that it can call
3868 //       cat_update().
3869 //
3870 // NOTE: do NOT hold and cnode locks while calling this function
3871 //       to avoid deadlocks (because we take a lock on the root
3872 //       cnode)
3873 //
3874 int
3875 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3876 {
3877         struct vnode *rvp;
3878         struct cnode *cp;
3879         int error;
3880
3881         error = hfs_vfs_root(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3882         if (error) {
3883                 return error;
3884         }
3885
3886         cp = VTOC(rvp);
3887         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3888                 return error;
3889         }
3890         struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3891
3892         int lockflags;
3893         if ((error = hfs_start_transaction(hfsmp)) != 0) {
3894                 return error;
3895         }
3896         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3897
3898         if (extinfo->document_id == 0) {
3899                 // initialize this to start at 3 (one greater than the root-dir id)
3900                 extinfo->document_id = 3;
3901         }
3902
3903         *docid = extinfo->document_id++;
3904
3905         // mark the root cnode dirty
3906         cp->c_flag |= C_MODIFIED;
3907         hfs_update(cp->c_vp, 0);
3908
3909         hfs_systemfile_unlock (hfsmp, lockflags);
3910         (void) hfs_end_transaction(hfsmp);
3911
3912         (void) hfs_unlock(cp);
3913
3914         vnode_put(rvp);
3915         rvp = NULL;
3916
3917         return 0;
3918 }
3919
3920
3921 /*
3922  * Return information about number of file system allocation blocks
3923  * taken by metadata on a volume.
3924  *
3925  * This function populates struct hfsinfo_metadata with allocation blocks
3926  * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3927  * journal file, and sum of all of the above.
3928  */
3929 int
3930 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3931 {
3932         int lockflags = 0;
3933         int ret_lockflags = 0;
3934
3935         /* Zero out the output buffer */
3936         bzero(hinfo, sizeof(struct hfsinfo_metadata));
3937
3938         /*
3939          * Getting number of allocation blocks for all btrees
3940          * should be a quick operation, so we grab locks for
3941          * all of them at the same time
3942          */
3943         lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3944         ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3945         /*
3946          * Make sure that we were able to acquire all locks requested
3947          * to protect us against conditions like unmount in progress.
3948          */
3949         if ((lockflags & ret_lockflags) != lockflags) {
3950                 /* Release any locks that were acquired */
3951                 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3952                 return EPERM;
3953         }
3954
3955         /* Get information about all the btrees */
3956         hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3957         hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3958         hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3959         hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3960
3961         /* Done with btrees, give up the locks */
3962         hfs_systemfile_unlock(hfsmp, ret_lockflags);
3963
3964         /* Get information about journal file */
3965         hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3966
3967         /* Calculate total number of metadata blocks */
3968         hinfo->total = hinfo->extents + hinfo->catalog +
3969                         hinfo->allocation + hinfo->attribute +
3970                         hinfo->journal;
3971
3972         return 0;
3973 }
3974
3975 static int
3976 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3977 {
3978         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3979
3980         return 0;
3981 }
3982
3983 int hfs_freeze(struct hfsmount *hfsmp)
3984 {
3985         // First make sure some other process isn't freezing
3986         hfs_lock_mount(hfsmp);
3987         while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3988                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3989                                    PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3990                         hfs_unlock_mount(hfsmp);
3991                         return EINTR;
3992                 }
3993         }
3994
3995         // Stop new syncers from starting
3996         hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3997
3998         // Now wait for all syncers to finish
3999         while (hfsmp->hfs_syncers) {
4000                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
4001                            PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
4002                         hfs_thaw_locked(hfsmp);
4003                         hfs_unlock_mount(hfsmp);
4004                         return EINTR;
4005                 }
4006         }
4007         hfs_unlock_mount(hfsmp);
4008
4009         // flush things before we get started to try and prevent
4010         // dirty data from being paged out while we're frozen.
4011         // note: we can't do this once we're in the freezing state because
4012         // other threads will need to take the global lock
4013         vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
4014
4015         // Block everything in hfs_lock_global now
4016         hfs_lock_mount(hfsmp);
4017         hfsmp->hfs_freeze_state = HFS_FREEZING;
4018         hfsmp->hfs_freezing_thread = current_thread();
4019         hfs_unlock_mount(hfsmp);
4020
4021         /* Take the exclusive lock to flush out anything else that
4022            might have the global lock at the moment and also so we
4023            can flush the journal. */
4024         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
4025         journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
4026         hfs_unlock_global(hfsmp);
4027
4028         // don't need to iterate on all vnodes, we just need to
4029         // wait for writes to the system files and the device vnode
4030         //
4031         // Now that journal flush waits for all metadata blocks to
4032         // be written out, waiting for btree writes is probably no
4033         // longer required.
4034         if (HFSTOVCB(hfsmp)->extentsRefNum)
4035                 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
4036         if (HFSTOVCB(hfsmp)->catalogRefNum)
4037                 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
4038         if (HFSTOVCB(hfsmp)->allocationsRefNum)
4039                 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
4040         if (hfsmp->hfs_attribute_vp)
4041                 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
4042         vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
4043
4044         // We're done, mark frozen
4045         hfs_lock_mount(hfsmp);
4046         hfsmp->hfs_freeze_state  = HFS_FROZEN;
4047         hfsmp->hfs_freezing_proc = current_proc();
4048         hfs_unlock_mount(hfsmp);
4049
4050         return 0;
4051 }
4052
4053 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
4054 {
4055         hfs_lock_mount(hfsmp);
4056
4057         if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
4058                 hfs_unlock_mount(hfsmp);
4059                 return EINVAL;
4060         }
4061         if (process && hfsmp->hfs_freezing_proc != process) {
4062                 hfs_unlock_mount(hfsmp);
4063                 return EPERM;
4064         }
4065
4066         hfs_thaw_locked(hfsmp);
4067
4068         hfs_unlock_mount(hfsmp);
4069
4070         return 0;
4071 }
4072
4073 static void hfs_thaw_locked(struct hfsmount *hfsmp)
4074 {
4075         hfsmp->hfs_freezing_proc = NULL;
4076         hfsmp->hfs_freeze_state = HFS_THAWED;
4077
4078         wakeup(&hfsmp->hfs_freeze_state);
4079 }
4080
4081 uintptr_t obfuscate_addr(void *addr)
4082 {
4083         vm_offset_t new_addr;
4084         vm_kernel_addrperm_external((vm_offset_t)addr, &new_addr);
4085         return new_addr;
4086 }
4087
4088 #if CONFIG_HFS_STD
4089 /*
4090  * Convert HFS encoded string into UTF-8
4091  *
4092  * Unicode output is fully decomposed
4093  * '/' chars are converted to ':'
4094  */
4095 int
4096 hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
4097 {
4098         int error;
4099         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4100         ItemCount uniCount;
4101         size_t utf8len;
4102         hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
4103         u_int8_t pascal_length = 0;
4104
4105         /*
4106          * Validate the length of the Pascal-style string before passing it
4107          * down to the decoding engine.
4108          */
4109         pascal_length = *((const u_int8_t*)(hfs_str));
4110         if (pascal_length > 31) {
4111                 /* invalid string; longer than 31 bytes */
4112                 error = EINVAL;
4113                 return error;
4114         }
4115
4116         error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
4117
4118         if (uniCount == 0)
4119                 error = EINVAL;
4120
4121         if (error == 0) {
4122                 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
4123                 if (error == ENAMETOOLONG)
4124                         *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
4125                 else
4126                         *actualDstLen = utf8len;
4127         }
4128
4129         return error;
4130 }
4131
4132 /*
4133  * Convert UTF-8 string into HFS encoding
4134  *
4135  * ':' chars are converted to '/'
4136  * Assumes input represents fully decomposed Unicode
4137  */
4138 int
4139 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
4140 {
4141         int error;
4142         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4143         size_t ucslen;
4144
4145         error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
4146         if (error == 0)
4147                 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
4148
4149         return error;
4150 }
4151
4152 /*
4153  * Convert Unicode string into HFS encoding
4154  *
4155  * ':' chars are converted to '/'
4156  * Assumes input represents fully decomposed Unicode
4157  */
4158 int
4159 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
4160 {
4161         int error;
4162         unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
4163
4164         error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
4165         if (error && retry) {
4166                 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
4167         }
4168         return error;
4169 }
4170
4171 #endif // CONFIG_HFS_STD
4172
4173 static uint64_t hfs_allocated __attribute__((aligned(8)));
4174
4175 #if HFS_MALLOC_DEBUG
4176
4177 #warning HFS_MALLOC_DEBUG is on
4178
4179 #include <libkern/OSDebug.h>
4180 #include "hfs_alloc_trace.h"
4181
4182 struct alloc_debug_header {
4183         uint32_t magic;
4184         uint32_t size;
4185         uint64_t sequence;
4186         LIST_ENTRY(alloc_debug_header) chain;
4187         void *backtrace[HFS_ALLOC_BACKTRACE_LEN];
4188 };
4189
4190 enum {
4191         HFS_ALLOC_MAGIC = 0x68667361,   // "hfsa"
4192         HFS_ALLOC_DEAD  = 0x68667364,   // "hfsd"
4193 };
4194
4195 static LIST_HEAD(, alloc_debug_header) hfs_alloc_list;
4196 static lck_mtx_t *hfs_alloc_mtx;
4197 static int hfs_alloc_tracing;
4198 static uint64_t hfs_alloc_sequence;
4199
4200 void hfs_alloc_trace_enable(void)
4201 {
4202         if (hfs_alloc_tracing)
4203                 return;
4204
4205         // Not thread-safe, but this is debug so who cares
4206         extern lck_grp_t *hfs_mutex_group;
4207         extern lck_attr_t *hfs_lock_attr;
4208
4209         if (!hfs_alloc_mtx) {
4210                 hfs_alloc_mtx = lck_mtx_alloc_init(hfs_mutex_group, hfs_lock_attr);
4211                 LIST_INIT(&hfs_alloc_list);
4212         }
4213
4214         // Using OSCompareAndSwap in lieu of a barrier
4215         OSCompareAndSwap(hfs_alloc_tracing, true, &hfs_alloc_tracing);
4216 }
4217
4218 void hfs_alloc_trace_disable(void)
4219 {
4220         if (!hfs_alloc_tracing)
4221                 return;
4222
4223         hfs_alloc_tracing = false;
4224
4225         lck_mtx_lock_spin(hfs_alloc_mtx);
4226
4227         struct alloc_debug_header *hdr;
4228         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4229                 hdr->chain.le_prev = NULL;
4230         }
4231         LIST_INIT(&hfs_alloc_list);
4232
4233         lck_mtx_unlock(hfs_alloc_mtx);
4234 }
4235
4236 static int hfs_handle_alloc_tracing SYSCTL_HANDLER_ARGS
4237 {
4238         int v = hfs_alloc_tracing;
4239
4240         int err = sysctl_handle_int(oidp, &v, 0, req);
4241         if (err || req->newptr == USER_ADDR_NULL || v == hfs_alloc_tracing)
4242                 return err;
4243
4244         if (v)
4245                 hfs_alloc_trace_enable();
4246         else
4247                 hfs_alloc_trace_disable();
4248
4249         return 0;
4250 }
4251
4252 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_tracing,
4253                    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0,
4254                    hfs_handle_alloc_tracing, "I", "Allocation tracing")
4255
4256 static int hfs_handle_alloc_trace_info SYSCTL_HANDLER_ARGS
4257 {
4258         if (!hfs_alloc_tracing) {
4259                 struct hfs_alloc_trace_info info = {};
4260                 return sysctl_handle_opaque(oidp, &info, sizeof(info), req);
4261         }
4262
4263         const int size = 128 * 1024;
4264         struct hfs_alloc_trace_info *info = kalloc(size);
4265
4266         const int max_entries = ((size - sizeof(*info))
4267                                                          / sizeof(struct hfs_alloc_info_entry));
4268
4269         info->entry_count = 0;
4270         info->more = false;
4271
4272         lck_mtx_lock_spin(hfs_alloc_mtx);
4273
4274         struct alloc_debug_header *hdr;
4275         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4276                 if (info->entry_count == max_entries) {
4277                         info->more = true;
4278                         break;
4279                 }
4280                 vm_offset_t o;
4281                 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4282                 info->entries[info->entry_count].ptr = o;
4283                 info->entries[info->entry_count].size = hdr->size;
4284                 info->entries[info->entry_count].sequence = hdr->sequence;
4285                 for (int i = 0; i < HFS_ALLOC_BACKTRACE_LEN; ++i) {
4286                         vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[i], &o);
4287                         info->entries[info->entry_count].backtrace[i] = o;
4288                 }
4289                 ++info->entry_count;
4290         }
4291
4292         lck_mtx_unlock(hfs_alloc_mtx);
4293
4294         int err = sysctl_handle_opaque(oidp, info,
4295                                                                    sizeof(*info) + info->entry_count
4296                                                                    * sizeof(struct hfs_alloc_info_entry),
4297                                                                    req);
4298
4299         kfree(info, size);
4300
4301         return err;
4302 }
4303
4304 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_trace_info,
4305                    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED, NULL, 0,
4306                    hfs_handle_alloc_trace_info, "-", "Allocation trace info")
4307
4308 bool hfs_dump_allocations(void)
4309 {
4310         if (!hfs_allocated)
4311                 return false;
4312
4313         lck_mtx_lock(hfs_alloc_mtx);
4314
4315         struct alloc_debug_header *hdr;
4316         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4317                 vm_offset_t o;
4318                 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4319                 printf(" -- 0x%lx:%llu <%u> --\n", o, hdr->sequence, hdr->size);
4320                 for (int j = 0; j < HFS_ALLOC_BACKTRACE_LEN && hdr->backtrace[j]; ++j) {
4321                         vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[j], &o);
4322                         printf("0x%lx\n", o);
4323                 }
4324         }
4325
4326         lck_mtx_unlock(hfs_alloc_mtx);
4327
4328         return true;
4329 }
4330
4331 #endif
4332
4333 HFS_SYSCTL(QUAD, _vfs_generic_hfs, OID_AUTO, allocated,
4334                    CTLFLAG_RD | CTLFLAG_LOCKED, &hfs_allocated, "Memory allocated")
4335
4336 void *hfs_malloc(size_t size)
4337 {
4338 #if HFS_MALLOC_DEBUG
4339         hfs_assert(size <= 0xffffffff);
4340
4341         struct alloc_debug_header *hdr;
4342
4343         void *ptr;
4344         ptr = kalloc(size + sizeof(*hdr));
4345
4346         hdr = ptr + size;
4347
4348         hdr->magic = HFS_ALLOC_MAGIC;
4349         hdr->size = size;
4350
4351         if (hfs_alloc_tracing) {
4352                 OSBacktrace(hdr->backtrace, HFS_ALLOC_BACKTRACE_LEN);
4353                 lck_mtx_lock_spin(hfs_alloc_mtx);
4354                 LIST_INSERT_HEAD(&hfs_alloc_list, hdr, chain);
4355                 hdr->sequence = ++hfs_alloc_sequence;
4356                 lck_mtx_unlock(hfs_alloc_mtx);
4357         } else
4358                 hdr->chain.le_prev = NULL;
4359 #else
4360         void *ptr;
4361         ptr = kalloc(size);
4362 #endif
4363
4364         OSAddAtomic64(size, &hfs_allocated);
4365
4366         return ptr;
4367 }
4368
4369 void hfs_free(void *ptr, size_t size)
4370 {
4371         if (!ptr)
4372                 return;
4373
4374         OSAddAtomic64(-(int64_t)size, &hfs_allocated);
4375
4376 #if HFS_MALLOC_DEBUG
4377         struct alloc_debug_header *hdr = ptr + size;
4378
4379         hfs_assert(hdr->magic == HFS_ALLOC_MAGIC);
4380         hfs_assert(hdr->size == size);
4381
4382         hdr->magic = HFS_ALLOC_DEAD;
4383
4384         if (hdr->chain.le_prev) {
4385                 lck_mtx_lock_spin(hfs_alloc_mtx);
4386                 LIST_REMOVE(hdr, chain);
4387                 lck_mtx_unlock(hfs_alloc_mtx);
4388         }
4389
4390         kfree(ptr, size + sizeof(*hdr));
4391 #else
4392         kfree(ptr, size);
4393 #endif
4394 }
4395
4396 void *hfs_mallocz(size_t size)
4397 {
4398         void *ptr = hfs_malloc(size);
4399         bzero(ptr, size);
4400         return ptr;
4401 }
4402
4403 // -- Zone allocator-related structures and routines --
4404
4405 hfs_zone_entry_t hfs_zone_entries[HFS_NUM_ZONES] = {
4406         { HFS_CNODE_ZONE, sizeof(struct cnode), "HFS node" },
4407         { HFS_FILEFORK_ZONE, sizeof(struct filefork), "HFS fork" },
4408         { HFS_DIRHINT_ZONE, sizeof(struct directoryhint), "HFS dirhint" }
4409 };
4410
4411 hfs_zone_t hfs_zones[HFS_NUM_ZONES];
4412
4413 void hfs_init_zones(void) {
4414         for (int i = 0; i < HFS_NUM_ZONES; i++) {
4415                 hfs_zones[i].hz_zone = zone_create(hfs_zone_entries[i].hze_name,
4416                     hfs_zone_entries[i].hze_elem_size, ZC_NOENCRYPT);
4417         }
4418 }
4419
4420 void *hfs_zalloc(hfs_zone_kind_t zone)
4421 {
4422         OSAddAtomic64(hfs_zones[zone].hz_elem_size, &hfs_allocated);
4423
4424         return zalloc(hfs_zones[zone].hz_zone);
4425 }
4426
4427 void hfs_zfree(void *ptr, hfs_zone_kind_t zone)
4428 {
4429         OSAddAtomic64(-(int64_t)hfs_zones[zone].hz_elem_size, &hfs_allocated);
4430
4431         zfree(hfs_zones[zone].hz_zone, ptr);
4432 }
4433
4434 struct hfs_sysctl_chain *sysctl_list;
4435
4436 void hfs_sysctl_register(void)
4437 {
4438         struct hfs_sysctl_chain *e = sysctl_list;
4439         while (e) {
4440                 sysctl_register_oid(e->oid);
4441                 e = e->next;
4442         }
4443 }
4444
4445 void hfs_sysctl_unregister(void)
4446 {
4447         struct hfs_sysctl_chain *e = sysctl_list;
4448         while (e) {
4449                 sysctl_unregister_oid(e->oid);
4450                 e = e->next;
4451         }
4452 }
4453
4454 void hfs_assert_fail(const char *file, unsigned line, const char *expr)
4455 {
4456         Assert(file, line, expr);
4457         __builtin_unreachable();
4458 }