bsd/hfs/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/buf.h>
  43 #include <sys/buf_internal.h>
  44 #include <sys/ubc.h>
  45 #include <sys/unistd.h>
  46 #include <sys/utfconv.h>
  47 #include <sys/kauth.h>
  48 #include <sys/fcntl.h>
  49 #include <sys/fsctl.h>
  50 #include <sys/vnode_internal.h>
  51 #include <kern/clock.h>
  52 #include <stdbool.h>
  53
  54 #include <libkern/OSAtomic.h>
  55
  56 /* for parsing boot-args */
  57 #include <pexpert/pexpert.h>
  58
  59 #if CONFIG_PROTECT
  60 #include <sys/cprotect.h>
  61 #endif
  62
  63 #include "hfs.h"
  64 #include "hfs_catalog.h"
  65 #include "hfs_dbg.h"
  66 #include "hfs_mount.h"
  67 #include "hfs_endian.h"
  68 #include "hfs_cnode.h"
  69 #include "hfs_fsctl.h"
  70
  71 #include "hfscommon/headers/FileMgrInternal.h"
  72 #include "hfscommon/headers/BTreesInternal.h"
  73 #include "hfscommon/headers/HFSUnicodeWrappers.h"
  74
  75 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
  76 extern int hfs_resize_debug;
  77
  78 static void ReleaseMetaFileVNode(struct vnode *vp);
  79 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  80
  81 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  82 static void hfs_thaw_locked(struct hfsmount *hfsmp);
  83
  84 #define HFS_MOUNT_DEBUG 1
  85
  86
  87 //*******************************************************************************
  88 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  89 //       hence are not in the right byte order on little endian machines. It is
  90 //       the responsibility of the finder and other clients to swap the data.
  91 //*******************************************************************************
  92
  93 //*******************************************************************************
  94 //      Routine:        hfs_MountHFSVolume
  95 //
  96 //
  97 //*******************************************************************************
  98 unsigned char hfs_catname[] = "Catalog B-tree";
  99 unsigned char hfs_extname[] = "Extents B-tree";
 100 unsigned char hfs_vbmname[] = "Volume Bitmap";
 101 unsigned char hfs_attrname[] = "Attribute B-tree";
 102 unsigned char hfs_startupname[] = "Startup File";
 103
 104 #if CONFIG_HFS_STD
 105 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 106                 __unused struct proc *p)
 107 {
 108         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 109         int error;
 110         ByteCount utf8chars;
 111         struct cat_desc cndesc;
 112         struct cat_attr cnattr;
 113         struct cat_fork fork;
 114         int newvnode_flags = 0;
 115
 116         /* Block size must be a multiple of 512 */
 117         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 118             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 119                 return (EINVAL);
 120
 121         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 122         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 123             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 124                 return (EINVAL);
 125         }
 126         hfsmp->hfs_flags |= HFS_STANDARD;
 127         /*
 128          * The MDB seems OK: transfer info from it into VCB
 129          * Note - the VCB starts out clear (all zeros)
 130          *
 131          */
 132         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 133         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 134         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 135         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 136         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 137         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 138         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 139         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 140         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 141         vcb->allocLimit         = vcb->totalBlocks;
 142         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 143         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 144         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 145         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 146         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 147         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 148         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 149         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 150         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 151         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 152         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 153         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 154                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 155
 156         /* convert hfs encoded name into UTF-8 string */
 157         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 158         /*
 159          * When an HFS name cannot be encoded with the current
 160          * volume encoding we use MacRoman as a fallback.
 161          */
 162         if (error || (utf8chars == 0)) {
 163                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 164                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 165                 if (error) {
 166                         goto MtVolErr;
 167                 }
 168         }
 169
 170         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 171         vcb->vcbVBMIOSize = kHFSBlockSize;
 172
 173         /* Generate the partition-based AVH location */
 174         hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 175                                                   hfsmp->hfs_logical_block_count);
 176
 177         /* HFS standard is read-only, so just stuff the FS location in here, too */
 178         hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 179
 180         bzero(&cndesc, sizeof(cndesc));
 181         cndesc.cd_parentcnid = kHFSRootParentID;
 182         cndesc.cd_flags |= CD_ISMETA;
 183         bzero(&cnattr, sizeof(cnattr));
 184         cnattr.ca_linkcount = 1;
 185         cnattr.ca_mode = S_IFREG;
 186         bzero(&fork, sizeof(fork));
 187
 188         /*
 189          * Set up Extents B-tree vnode
 190          */
 191         cndesc.cd_nameptr = hfs_extname;
 192         cndesc.cd_namelen = strlen((char *)hfs_extname);
 193         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 194         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 195         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 196         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 197         fork.cf_vblocks = 0;
 198         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 199         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 200         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 201         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 202         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 203         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 204         cnattr.ca_blocks = fork.cf_blocks;
 205
 206         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 207                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 208         if (error) {
 209                 if (HFS_MOUNT_DEBUG) {
 210                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 211                 }
 212                 goto MtVolErr;
 213         }
 214         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 215                                          (KeyCompareProcPtr)CompareExtentKeys));
 216         if (error) {
 217                 if (HFS_MOUNT_DEBUG) {
 218                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 219                 }
 220                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 221                 goto MtVolErr;
 222         }
 223         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 224
 225         /*
 226          * Set up Catalog B-tree vnode...
 227          */
 228         cndesc.cd_nameptr = hfs_catname;
 229         cndesc.cd_namelen = strlen((char *)hfs_catname);
 230         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 231         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 232         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 233         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 234         fork.cf_vblocks = 0;
 235         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 236         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 237         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 238         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 239         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 240         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 241         cnattr.ca_blocks = fork.cf_blocks;
 242
 243         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 244                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 245         if (error) {
 246                 if (HFS_MOUNT_DEBUG) {
 247                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 248                 }
 249                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 250                 goto MtVolErr;
 251         }
 252         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 253                                          (KeyCompareProcPtr)CompareCatalogKeys));
 254         if (error) {
 255                 if (HFS_MOUNT_DEBUG) {
 256                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 257                 }
 258                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 259                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 260                 goto MtVolErr;
 261         }
 262         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 263
 264         /*
 265          * Set up dummy Allocation file vnode (used only for locking bitmap)
 266          */
 267         cndesc.cd_nameptr = hfs_vbmname;
 268         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 269         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 270         bzero(&fork, sizeof(fork));
 271         cnattr.ca_blocks = 0;
 272
 273         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 274                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 275         if (error) {
 276                 if (HFS_MOUNT_DEBUG) {
 277                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 278                 }
 279                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 280                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 281                 goto MtVolErr;
 282         }
 283         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 284
 285         /* mark the volume dirty (clear clean unmount bit) */
 286         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 287
 288     if (error == noErr) {
 289                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 290                 if (HFS_MOUNT_DEBUG) {
 291                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 292                 }
 293         }
 294
 295     if (error == noErr) {
 296                 /* If the disk isn't write protected.. */
 297         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 298             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 299                 }
 300         }
 301
 302         /*
 303          * all done with system files so we can unlock now...
 304          */
 305         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 306         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 307         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 308
 309         if (error == noErr) {
 310                 /* If successful, then we can just return once we've unlocked the cnodes */
 311                 return error;
 312         }
 313
 314     //--        Release any resources allocated so far before exiting with an error:
 315 MtVolErr:
 316         hfsUnmount(hfsmp, NULL);
 317
 318     return (error);
 319 }
 320
 321 #endif
 322
 323 //*******************************************************************************
 324 //      Routine:        hfs_MountHFSPlusVolume
 325 //
 326 //
 327 //*******************************************************************************
 328
 329 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 330         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 331 {
 332         register ExtendedVCB *vcb;
 333         struct cat_desc cndesc;
 334         struct cat_attr cnattr;
 335         struct cat_fork cfork;
 336         u_int32_t blockSize;
 337         daddr64_t spare_sectors;
 338         struct BTreeInfoRec btinfo;
 339         u_int16_t  signature;
 340         u_int16_t  hfs_version;
 341         int newvnode_flags = 0;
 342         int  i;
 343         OSErr retval;
 344         char converted_volname[256];
 345         size_t volname_length = 0;
 346         size_t conv_volname_length = 0;
 347
 348         signature = SWAP_BE16(vhp->signature);
 349         hfs_version = SWAP_BE16(vhp->version);
 350
 351         if (signature == kHFSPlusSigWord) {
 352                 if (hfs_version != kHFSPlusVersion) {
 353                         printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version);
 354                         return (EINVAL);
 355                 }
 356         } else if (signature == kHFSXSigWord) {
 357                 if (hfs_version != kHFSXVersion) {
 358                         printf("hfs_mount: invalid HFSX version: %x\n", hfs_version);
 359                         return (EINVAL);
 360                 }
 361                 /* The in-memory signature is always 'H+'. */
 362                 signature = kHFSPlusSigWord;
 363                 hfsmp->hfs_flags |= HFS_X;
 364         } else {
 365                 /* Removed printf for invalid HFS+ signature because it gives
 366                  * false error for UFS root volume
 367                  */
 368                 if (HFS_MOUNT_DEBUG) {
 369                         printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature);
 370                 }
 371                 return (EINVAL);
 372         }
 373
 374         /* Block size must be at least 512 and a power of 2 */
 375         blockSize = SWAP_BE32(vhp->blockSize);
 376         if (blockSize < 512 || !powerof2(blockSize)) {
 377                 if (HFS_MOUNT_DEBUG) {
 378                         printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
 379                 }
 380                 return (EINVAL);
 381         }
 382
 383         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 384         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 385             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 386                 if (HFS_MOUNT_DEBUG) {
 387                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 388                 }
 389                 return (EINVAL);
 390         }
 391
 392         /* Make sure we can live with the physical block size. */
 393         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 394             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
 395             (blockSize < hfsmp->hfs_logical_block_size)) {
 396                 if (HFS_MOUNT_DEBUG) {
 397                         printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 398                                         blockSize, hfsmp->hfs_logical_block_size);
 399                 }
 400                 return (ENXIO);
 401         }
 402
 403         /* If allocation block size is less than the physical
 404          * block size, we assume that the physical block size
 405          * is same as logical block size.  The physical block
 406          * size value is used to round down the offsets for
 407          * reading and writing the primary and alternate volume
 408          * headers at physical block boundary and will cause
 409          * problems if it is less than the block size.
 410          */
 411         if (blockSize < hfsmp->hfs_physical_block_size) {
 412                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 413                 hfsmp->hfs_log_per_phys = 1;
 414         }
 415
 416         /*
 417          * The VolumeHeader seems OK: transfer info from it into VCB
 418          * Note - the VCB starts out clear (all zeros)
 419          */
 420         vcb = HFSTOVCB(hfsmp);
 421
 422         vcb->vcbSigWord = signature;
 423         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 424         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 425         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 426         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 427         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 428         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 429         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 430         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 431         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 432
 433         /* copy 32 bytes of Finder info */
 434         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 435
 436         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 437         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 438                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 439
 440         /* Now fill in the Extended VCB info */
 441         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 442         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 443         vcb->allocLimit         = vcb->totalBlocks;
 444         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 445         vcb->blockSize          = blockSize;
 446         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 447         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 448
 449         vcb->hfsPlusIOPosOffset = embeddedOffset;
 450
 451         /* Default to no free block reserve */
 452         vcb->reserveBlocks = 0;
 453
 454         /*
 455          * Update the logical block size in the mount struct
 456          * (currently set up from the wrapper MDB) using the
 457          * new blocksize value:
 458          */
 459         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 460         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 461
 462         /*
 463          * Validate and initialize the location of the alternate volume header.
 464          *
 465          * Note that there may be spare sectors beyond the end of the filesystem that still
 466          * belong to our partition.
 467          */
 468
 469         spare_sectors = hfsmp->hfs_logical_block_count -
 470                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 471                            hfsmp->hfs_logical_block_size);
 472
 473         /*
 474          * Differentiate between "innocuous" spare sectors and the more unusual
 475          * degenerate case:
 476          *
 477          * *** Innocuous spare sectors exist if:
 478          *
 479          * A) the number of bytes assigned to the partition (by multiplying logical
 480          * block size * logical block count) is greater than the filesystem size
 481          * (by multiplying allocation block count and allocation block size)
 482          *
 483          * and
 484          *
 485          * B) the remainder is less than the size of a full allocation block's worth of bytes.
 486          *
 487          * This handles the normal case where there may be a few extra sectors, but the two
 488          * are fundamentally in sync.
 489          *
 490          * *** Degenerate spare sectors exist if:
 491          * A) The number of bytes assigned to the partition (by multiplying logical
 492          * block size * logical block count) is greater than the filesystem size
 493          * (by multiplying allocation block count and block size).
 494          *
 495          * and
 496          *
 497          * B) the remainder is greater than a full allocation's block worth of bytes.
 498          * In this case,  a smaller file system exists in a larger partition.
 499          * This can happen in various ways, including when volume is resized but the
 500          * partition is yet to be resized.  Under this condition, we have to assume that
 501          * a partition management software may resize the partition to match
 502          * the file system size in the future.  Therefore we should update
 503          * alternate volume header at two locations on the disk,
 504          *   a. 1024 bytes before end of the partition
 505          *   b. 1024 bytes before end of the file system
 506          */
 507
 508         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 509                 /*
 510                  * Handle the degenerate case above. FS < partition size.
 511                  * AVH located at 1024 bytes from the end of the partition
 512                  */
 513                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 514                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 515
 516                 /* AVH located at 1024 bytes from the end of the filesystem */
 517                 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 518                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 519                                                 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
 520         }
 521         else {
 522                 /* Innocuous spare sectors; Partition & FS notion are in sync */
 523                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 524                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 525
 526                 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 527         }
 528         if (hfs_resize_debug) {
 529                 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 530                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 531         }
 532
 533         bzero(&cndesc, sizeof(cndesc));
 534         cndesc.cd_parentcnid = kHFSRootParentID;
 535         cndesc.cd_flags |= CD_ISMETA;
 536         bzero(&cnattr, sizeof(cnattr));
 537         cnattr.ca_linkcount = 1;
 538         cnattr.ca_mode = S_IFREG;
 539
 540         /*
 541          * Set up Extents B-tree vnode
 542          */
 543         cndesc.cd_nameptr = hfs_extname;
 544         cndesc.cd_namelen = strlen((char *)hfs_extname);
 545         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 546
 547         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 548         cfork.cf_new_size= 0;
 549         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 550         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 551         cfork.cf_vblocks = 0;
 552         cnattr.ca_blocks = cfork.cf_blocks;
 553         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 554                 cfork.cf_extents[i].startBlock =
 555                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 556                 cfork.cf_extents[i].blockCount =
 557                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 558         }
 559         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 560                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 561         if (retval)
 562         {
 563                 if (HFS_MOUNT_DEBUG) {
 564                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 565                 }
 566                 goto ErrorExit;
 567         }
 568         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 569         hfs_unlock(hfsmp->hfs_extents_cp);
 570
 571         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 572                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 573         if (retval)
 574         {
 575                 if (HFS_MOUNT_DEBUG) {
 576                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 577                 }
 578                 goto ErrorExit;
 579         }
 580         /*
 581          * Set up Catalog B-tree vnode
 582          */
 583         cndesc.cd_nameptr = hfs_catname;
 584         cndesc.cd_namelen = strlen((char *)hfs_catname);
 585         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 586
 587         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 588         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 589         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 590         cfork.cf_vblocks = 0;
 591         cnattr.ca_blocks = cfork.cf_blocks;
 592         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 593                 cfork.cf_extents[i].startBlock =
 594                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 595                 cfork.cf_extents[i].blockCount =
 596                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 597         }
 598         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 599                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 600         if (retval) {
 601                 if (HFS_MOUNT_DEBUG) {
 602                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 603                 }
 604                 goto ErrorExit;
 605         }
 606         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 607         hfs_unlock(hfsmp->hfs_catalog_cp);
 608
 609         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 610                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 611         if (retval) {
 612                 if (HFS_MOUNT_DEBUG) {
 613                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 614                 }
 615                 goto ErrorExit;
 616         }
 617         if ((hfsmp->hfs_flags & HFS_X) &&
 618             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 619                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 620                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 621                         /* Install a case-sensitive key compare */
 622                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 623                                           (KeyCompareProcPtr)cat_binarykeycompare);
 624                 }
 625         }
 626
 627         /*
 628          * Set up Allocation file vnode
 629          */
 630         cndesc.cd_nameptr = hfs_vbmname;
 631         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 632         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 633
 634         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 635         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 636         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 637         cfork.cf_vblocks = 0;
 638         cnattr.ca_blocks = cfork.cf_blocks;
 639         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 640                 cfork.cf_extents[i].startBlock =
 641                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 642                 cfork.cf_extents[i].blockCount =
 643                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 644         }
 645         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 646                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 647         if (retval) {
 648                 if (HFS_MOUNT_DEBUG) {
 649                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 650                 }
 651                 goto ErrorExit;
 652         }
 653         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 654         hfs_unlock(hfsmp->hfs_allocation_cp);
 655
 656         /*
 657          * Set up Attribute B-tree vnode
 658          */
 659         if (vhp->attributesFile.totalBlocks != 0) {
 660                 cndesc.cd_nameptr = hfs_attrname;
 661                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 662                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 663
 664                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 665                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 666                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 667                 cfork.cf_vblocks = 0;
 668                 cnattr.ca_blocks = cfork.cf_blocks;
 669                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 670                         cfork.cf_extents[i].startBlock =
 671                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 672                         cfork.cf_extents[i].blockCount =
 673                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 674                 }
 675                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 676                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 677                 if (retval) {
 678                         if (HFS_MOUNT_DEBUG) {
 679                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 680                         }
 681                         goto ErrorExit;
 682                 }
 683                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 684                 hfs_unlock(hfsmp->hfs_attribute_cp);
 685                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 686                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 687                 if (retval) {
 688                         if (HFS_MOUNT_DEBUG) {
 689                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 690                         }
 691                         goto ErrorExit;
 692                 }
 693
 694                 /* Initialize vnode for virtual attribute data file that spans the
 695                  * entire file system space for performing I/O to attribute btree
 696                  * We hold iocount on the attrdata vnode for the entire duration
 697                  * of mount (similar to btree vnodes)
 698                  */
 699                 retval = init_attrdata_vnode(hfsmp);
 700                 if (retval) {
 701                         if (HFS_MOUNT_DEBUG) {
 702                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 703                         }
 704                         goto ErrorExit;
 705                 }
 706         }
 707
 708         /*
 709          * Set up Startup file vnode
 710          */
 711         if (vhp->startupFile.totalBlocks != 0) {
 712                 cndesc.cd_nameptr = hfs_startupname;
 713                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 714                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 715
 716                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 717                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 718                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 719                 cfork.cf_vblocks = 0;
 720                 cnattr.ca_blocks = cfork.cf_blocks;
 721                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 722                         cfork.cf_extents[i].startBlock =
 723                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 724                         cfork.cf_extents[i].blockCount =
 725                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 726                 }
 727                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 728                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 729                 if (retval) {
 730                         if (HFS_MOUNT_DEBUG) {
 731                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 732                         }
 733                         goto ErrorExit;
 734                 }
 735                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 736                 hfs_unlock(hfsmp->hfs_startup_cp);
 737         }
 738
 739         /*
 740          * Pick up volume name and create date
 741          *
 742          * Acquiring the volume name should not manipulate the bitmap, only the catalog
 743          * btree and possibly the extents overflow b-tree.
 744          */
 745         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 746         if (retval) {
 747                 if (HFS_MOUNT_DEBUG) {
 748                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 749                 }
 750                 goto ErrorExit;
 751         }
 752         vcb->hfs_itime = cnattr.ca_itime;
 753         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 754         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 755         volname_length = strlen ((const char*)vcb->vcbVN);
 756         cat_releasedesc(&cndesc);
 757
 758 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
 759
 760
 761         /* Send the volume name down to CoreStorage if necessary */
 762         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 763         if (retval == 0) {
 764                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 765         }
 766
 767         /* reset retval == 0. we don't care about errors in volname conversion */
 768         retval = 0;
 769
 770
 771         /*
 772          * We now always initiate a full bitmap scan even if the volume is read-only because this is
 773          * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
 774          * expects. TRIMs will not be delivered to the underlying media if the volume is not
 775          * read-write though.
 776          */
 777         thread_t allocator_scanner;
 778         hfsmp->scan_var = 0;
 779
 780         /* Take the HFS mount mutex and wait on scan_var */
 781         hfs_lock_mount (hfsmp);
 782
 783         kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
 784         /* Wait until it registers that it's got the appropriate locks */
 785         while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
 786                 (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
 787                 if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
 788                         break;
 789                 }
 790                 else {
 791                         hfs_lock_mount (hfsmp);
 792                 }
 793         }
 794
 795         thread_deallocate (allocator_scanner);
 796
 797         /* mark the volume dirty (clear clean unmount bit) */
 798         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 799         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 800                 hfs_flushvolumeheader(hfsmp, TRUE, 0);
 801         }
 802
 803         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 804         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 805                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 806         }
 807
 808         //
 809         // Check if we need to do late journal initialization.  This only
 810         // happens if a previous version of MacOS X (or 9) touched the disk.
 811         // In that case hfs_late_journal_init() will go re-locate the journal
 812         // and journal_info_block files and validate that they're still kosher.
 813         //
 814         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 815                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 816                 && (hfsmp->jnl == NULL)) {
 817
 818                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 819                 if (retval != 0) {
 820                         if (retval == EROFS) {
 821                                 // EROFS is a special error code that means the volume has an external
 822                                 // journal which we couldn't find.  in that case we do not want to
 823                                 // rewrite the volume header - we'll just refuse to mount the volume.
 824                                 if (HFS_MOUNT_DEBUG) {
 825                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 826                                 }
 827                                 retval = EINVAL;
 828                                 goto ErrorExit;
 829                         }
 830
 831                         hfsmp->jnl = NULL;
 832
 833                         // if the journal failed to open, then set the lastMountedVersion
 834                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 835                         // of just bailing out because the volume is journaled.
 836                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 837                                 HFSPlusVolumeHeader *jvhp;
 838                                 daddr64_t mdb_offset;
 839                                 struct buf *bp = NULL;
 840
 841                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 842
 843                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 844
 845                                 bp = NULL;
 846                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 847                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 848                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 849                                 if (retval == 0) {
 850                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 851
 852                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 853                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 854                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 855                                                 buf_bwrite(bp);
 856                                         } else {
 857                                                 buf_brelse(bp);
 858                                         }
 859                                         bp = NULL;
 860                                 } else if (bp) {
 861                                         buf_brelse(bp);
 862                                         // clear this so the error exit path won't try to use it
 863                                         bp = NULL;
 864                             }
 865                         }
 866
 867                         if (HFS_MOUNT_DEBUG) {
 868                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 869                         }
 870                         retval = EINVAL;
 871                         goto ErrorExit;
 872                 } else if (hfsmp->jnl) {
 873                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 874                 }
 875         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 876                 struct cat_attr jinfo_attr, jnl_attr;
 877
 878                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 879                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 880                 }
 881
 882                 // if we're here we need to fill in the fileid's for the
 883                 // journal and journal_info_block.
 884                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 885                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 886                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 887                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 888                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 889                 }
 890
 891                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 892                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 893                 }
 894
 895                 if (hfsmp->jnl == NULL) {
 896                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 897                 }
 898         }
 899
 900         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 901         {
 902                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 903         }
 904
 905         /*
 906          * Distinguish 3 potential cases involving content protection:
 907          * 1. mount point bit set; vcbAtrb does not support it. Fail.
 908          * 2. mount point bit set; vcbattrb supports it. we're good.
 909          * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
 910          */
 911         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 912                 /* Does the mount point support it ? */
 913                 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
 914                         /* Case 1 above */
 915                         retval = EINVAL;
 916                         goto ErrorExit;
 917                 }
 918         }
 919         else {
 920                 /* not requested in the mount point. Is it in FS? */
 921                 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
 922                         /* Case 3 above */
 923                         vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
 924                 }
 925         }
 926
 927         /* At this point, if the mount point flag is set, we can enable it. */
 928         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 929                 /* Cases 2+3 above */
 930 #if CONFIG_PROTECT
 931                 /* Get the EAs as needed. */
 932                 int cperr = 0;
 933                 uint16_t majorversion;
 934                 uint16_t minorversion;
 935                 uint64_t flags;
 936                 uint8_t cryptogen = 0;
 937                 struct cp_root_xattr *xattr = NULL;
 938                 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
 939                 if (xattr == NULL) {
 940                         retval = ENOMEM;
 941                         goto ErrorExit;
 942                 }
 943                 bzero (xattr, sizeof(struct cp_root_xattr));
 944
 945                 /* go get the EA to get the version information */
 946                 cperr = cp_getrootxattr (hfsmp, xattr);
 947                 /*
 948                  * If there was no EA there, then write one out.
 949                  * Assuming EA is not present on the root means
 950                  * this is an erase install or a very old FS
 951                  */
 952
 953                 if (cperr == 0) {
 954                         /* Have to run a valid CP version. */
 955                         if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
 956                                 cperr = EINVAL;
 957                         }
 958                 }
 959                 else if (cperr == ENOATTR) {
 960                         printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
 961                         bzero(xattr, sizeof(struct cp_root_xattr));
 962                         xattr->major_version = CP_NEW_MAJOR_VERS;
 963                         xattr->minor_version = CP_MINOR_VERS;
 964                         cperr = cp_setrootxattr (hfsmp, xattr);
 965                 }
 966                 majorversion = xattr->major_version;
 967                 minorversion = xattr->minor_version;
 968                 flags = xattr->flags;
 969                 if (xattr->flags & CP_ROOT_CRYPTOG1) {
 970                         cryptogen = 1;
 971                 }
 972
 973                 if (xattr) {
 974                         FREE(xattr, M_TEMP);
 975                 }
 976
 977                 /* Recheck for good status */
 978                 if (cperr == 0) {
 979                         /* If we got here, then the CP version is valid. Set it in the mount point */
 980                         hfsmp->hfs_running_cp_major_vers = majorversion;
 981                         printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
 982                         hfsmp->cproot_flags = flags;
 983                         hfsmp->cp_crypto_generation = cryptogen;
 984
 985                         /*
 986                          * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
 987                          * Ensure that the boot-arg's value is valid for FILES (not directories),
 988                          * since only files are actually protected for now.
 989                          */
 990
 991                         PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
 992
 993                         if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
 994                                 PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
 995                         }
 996
 997                         if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
 998                                 hfsmp->default_cp_class = PROTECTION_CLASS_C;
 999                         }
1000                 }
1001                 else {
1002                         retval = EPERM;
1003                         goto ErrorExit;
1004                 }
1005 #else
1006                 /* If CONFIG_PROTECT not built, ignore CP */
1007                 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1008 #endif
1009         }
1010
1011         /*
1012          * Establish a metadata allocation zone.
1013          */
1014         hfs_metadatazone_init(hfsmp, false);
1015
1016         /*
1017          * Make any metadata zone adjustments.
1018          */
1019         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1020                 /* Keep the roving allocator out of the metadata zone. */
1021                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1022                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1023                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1024                 }
1025         } else {
1026                 if (vcb->nextAllocation <= 1) {
1027                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1028                 }
1029         }
1030         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1031
1032         /* Setup private/hidden directories for hardlinks. */
1033         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1034         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1035
1036         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1037                 hfs_remove_orphans(hfsmp);
1038
1039         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1040         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1041         {
1042                 retval = hfs_erase_unused_nodes(hfsmp);
1043                 if (retval) {
1044                         if (HFS_MOUNT_DEBUG) {
1045                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1046                         }
1047
1048                         goto ErrorExit;
1049                 }
1050         }
1051
1052         /*
1053          * Allow hot file clustering if conditions allow.
1054          */
1055         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
1056             ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
1057                 (void) hfs_recording_init(hfsmp);
1058         }
1059
1060         /* Force ACLs on HFS+ file systems. */
1061         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1062
1063         /* Enable extent-based extended attributes by default */
1064         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1065
1066         return (0);
1067
1068 ErrorExit:
1069         /*
1070          * A fatal error occurred and the volume cannot be mounted, so
1071          * release any resources that we acquired...
1072          */
1073         hfsUnmount(hfsmp, NULL);
1074
1075         if (HFS_MOUNT_DEBUG) {
1076                 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1077         }
1078         return (retval);
1079 }
1080
1081
1082 /*
1083  * ReleaseMetaFileVNode
1084  *
1085  * vp   L - -
1086  */
1087 static void ReleaseMetaFileVNode(struct vnode *vp)
1088 {
1089         struct filefork *fp;
1090
1091         if (vp && (fp = VTOF(vp))) {
1092                 if (fp->fcbBTCBPtr != NULL) {
1093                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1094                         (void) BTClosePath(fp);
1095                         hfs_unlock(VTOC(vp));
1096                 }
1097
1098                 /* release the node even if BTClosePath fails */
1099                 vnode_recycle(vp);
1100                 vnode_put(vp);
1101         }
1102 }
1103
1104
1105 /*************************************************************
1106 *
1107 * Unmounts a hfs volume.
1108 *       At this point vflush() has been called (to dump all non-metadata files)
1109 *
1110 *************************************************************/
1111
1112 int
1113 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1114 {
1115         /* Get rid of our attribute data vnode (if any).  This is done
1116          * after the vflush() during mount, so we don't need to worry
1117          * about any locks.
1118          */
1119         if (hfsmp->hfs_attrdata_vp) {
1120                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1121                 hfsmp->hfs_attrdata_vp = NULLVP;
1122         }
1123
1124         if (hfsmp->hfs_startup_vp) {
1125                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1126                 hfsmp->hfs_startup_cp = NULL;
1127                 hfsmp->hfs_startup_vp = NULL;
1128         }
1129
1130         if (hfsmp->hfs_attribute_vp) {
1131                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1132                 hfsmp->hfs_attribute_cp = NULL;
1133                 hfsmp->hfs_attribute_vp = NULL;
1134         }
1135
1136         if (hfsmp->hfs_catalog_vp) {
1137                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1138                 hfsmp->hfs_catalog_cp = NULL;
1139                 hfsmp->hfs_catalog_vp = NULL;
1140         }
1141
1142         if (hfsmp->hfs_extents_vp) {
1143                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1144                 hfsmp->hfs_extents_cp = NULL;
1145                 hfsmp->hfs_extents_vp = NULL;
1146         }
1147
1148         if (hfsmp->hfs_allocation_vp) {
1149                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1150                 hfsmp->hfs_allocation_cp = NULL;
1151                 hfsmp->hfs_allocation_vp = NULL;
1152         }
1153
1154         return (0);
1155 }
1156
1157
1158 /*
1159  * Test if fork has overflow extents.
1160  *
1161  * Returns:
1162  *      non-zero - overflow extents exist
1163  *      zero     - overflow extents do not exist
1164  */
1165 __private_extern__
1166 bool overflow_extents(struct filefork *fp)
1167 {
1168         u_int32_t blocks;
1169
1170         //
1171         // If the vnode pointer is NULL then we're being called
1172         // from hfs_remove_orphans() with a faked-up filefork
1173         // and therefore it has to be an HFS+ volume.  Otherwise
1174         // we check through the volume header to see what type
1175         // of volume we're on.
1176         //
1177
1178 #if CONFIG_HFS_STD
1179         if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1180                 if (fp->ff_extents[2].blockCount == 0)
1181                         return false;
1182
1183                 blocks = fp->ff_extents[0].blockCount +
1184                         fp->ff_extents[1].blockCount +
1185                         fp->ff_extents[2].blockCount;
1186
1187                 return fp->ff_blocks > blocks;
1188         }
1189 #endif
1190
1191         if (fp->ff_extents[7].blockCount == 0)
1192                 return false;
1193
1194         blocks = fp->ff_extents[0].blockCount +
1195                 fp->ff_extents[1].blockCount +
1196                 fp->ff_extents[2].blockCount +
1197                 fp->ff_extents[3].blockCount +
1198                 fp->ff_extents[4].blockCount +
1199                 fp->ff_extents[5].blockCount +
1200                 fp->ff_extents[6].blockCount +
1201                 fp->ff_extents[7].blockCount;
1202
1203         return fp->ff_blocks > blocks;
1204 }
1205
1206 static __attribute__((pure))
1207 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1208 {
1209         return (hfsmp->hfs_freeze_state == HFS_FROZEN
1210                         || (hfsmp->hfs_freeze_state == HFS_FREEZING
1211                                 && current_thread() != hfsmp->hfs_freezing_thread));
1212 }
1213
1214 /*
1215  * Lock the HFS global journal lock
1216  */
1217 int
1218 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1219 {
1220         thread_t thread = current_thread();
1221
1222         if (hfsmp->hfs_global_lockowner == thread) {
1223                 panic ("hfs_lock_global: locking against myself!");
1224         }
1225
1226         /*
1227          * This check isn't really necessary but this stops us taking
1228          * the mount lock in most cases.  The essential check is below.
1229          */
1230         if (hfs_is_frozen(hfsmp)) {
1231                 /*
1232                  * Unfortunately, there is no easy way of getting a notification
1233                  * for when a process is exiting and it's possible for the exiting
1234                  * process to get blocked somewhere else.  To catch this, we
1235                  * periodically monitor the frozen process here and thaw if
1236                  * we spot that it's exiting.
1237                  */
1238 frozen:
1239                 hfs_lock_mount(hfsmp);
1240
1241                 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1242
1243                 while (hfs_is_frozen(hfsmp)) {
1244                         if (hfsmp->hfs_freeze_state == HFS_FROZEN
1245                                 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1246                                 hfs_thaw_locked(hfsmp);
1247                                 break;
1248                         }
1249
1250                         msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1251                                PWAIT, "hfs_lock_global (frozen)", &ts);
1252                 }
1253                 hfs_unlock_mount(hfsmp);
1254         }
1255
1256         /* HFS_SHARED_LOCK */
1257         if (locktype == HFS_SHARED_LOCK) {
1258                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1259                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1260         }
1261         /* HFS_EXCLUSIVE_LOCK */
1262         else {
1263                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1264                 hfsmp->hfs_global_lockowner = thread;
1265         }
1266
1267         /*
1268          * We have to check if we're frozen again because of the time
1269          * between when we checked and when we took the global lock.
1270          */
1271         if (hfs_is_frozen(hfsmp)) {
1272                 hfs_unlock_global(hfsmp);
1273                 goto frozen;
1274         }
1275
1276         return 0;
1277 }
1278
1279
1280 /*
1281  * Unlock the HFS global journal lock
1282  */
1283 void
1284 hfs_unlock_global (struct hfsmount *hfsmp)
1285 {
1286         thread_t thread = current_thread();
1287
1288         /* HFS_LOCK_EXCLUSIVE */
1289         if (hfsmp->hfs_global_lockowner == thread) {
1290                 hfsmp->hfs_global_lockowner = NULL;
1291                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1292         }
1293         /* HFS_LOCK_SHARED */
1294         else {
1295                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1296         }
1297 }
1298
1299 /*
1300  * Lock the HFS mount lock
1301  *
1302  * Note: this is a mutex, not a rw lock!
1303  */
1304 inline
1305 void hfs_lock_mount (struct hfsmount *hfsmp) {
1306         lck_mtx_lock (&(hfsmp->hfs_mutex));
1307 }
1308
1309 /*
1310  * Unlock the HFS mount lock
1311  *
1312  * Note: this is a mutex, not a rw lock!
1313  */
1314 inline
1315 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1316         lck_mtx_unlock (&(hfsmp->hfs_mutex));
1317 }
1318
1319 /*
1320  * Lock HFS system file(s).
1321  */
1322 int
1323 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1324 {
1325         /*
1326          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1327          */
1328         if (flags & SFL_CATALOG) {
1329 #ifdef HFS_CHECK_LOCK_ORDER
1330                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1331                         panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1332                 }
1333                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1334                         panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1335                 }
1336                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1337                         panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1338                 }
1339 #endif /* HFS_CHECK_LOCK_ORDER */
1340
1341                 if (hfsmp->hfs_catalog_cp) {
1342                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1343                         /*
1344                          * When the catalog file has overflow extents then
1345                          * also acquire the extents b-tree lock if its not
1346                          * already requested.
1347                          */
1348                         if (((flags & SFL_EXTENTS) == 0) &&
1349                             (hfsmp->hfs_catalog_vp != NULL) &&
1350                             (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1351                                 flags |= SFL_EXTENTS;
1352                         }
1353                 } else {
1354                         flags &= ~SFL_CATALOG;
1355                 }
1356         }
1357
1358         if (flags & SFL_ATTRIBUTE) {
1359 #ifdef HFS_CHECK_LOCK_ORDER
1360                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1361                         panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1362                 }
1363                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1364                         panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1365                 }
1366 #endif /* HFS_CHECK_LOCK_ORDER */
1367
1368                 if (hfsmp->hfs_attribute_cp) {
1369                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1370                         /*
1371                          * When the attribute file has overflow extents then
1372                          * also acquire the extents b-tree lock if its not
1373                          * already requested.
1374                          */
1375                         if (((flags & SFL_EXTENTS) == 0) &&
1376                             (hfsmp->hfs_attribute_vp != NULL) &&
1377                             (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1378                                 flags |= SFL_EXTENTS;
1379                         }
1380                 } else {
1381                         flags &= ~SFL_ATTRIBUTE;
1382                 }
1383         }
1384
1385         if (flags & SFL_STARTUP) {
1386 #ifdef HFS_CHECK_LOCK_ORDER
1387                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1388                         panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1389                 }
1390 #endif /* HFS_CHECK_LOCK_ORDER */
1391
1392                 if (hfsmp->hfs_startup_cp) {
1393                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1394                         /*
1395                          * When the startup file has overflow extents then
1396                          * also acquire the extents b-tree lock if its not
1397                          * already requested.
1398                          */
1399                         if (((flags & SFL_EXTENTS) == 0) &&
1400                             (hfsmp->hfs_startup_vp != NULL) &&
1401                             (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1402                                 flags |= SFL_EXTENTS;
1403                         }
1404                 } else {
1405                         flags &= ~SFL_STARTUP;
1406                 }
1407         }
1408
1409         /*
1410          * To prevent locks being taken in the wrong order, the extent lock
1411          * gets a bitmap lock as well.
1412          */
1413         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1414                 if (hfsmp->hfs_allocation_cp) {
1415                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1416                         /*
1417                          * The bitmap lock is also grabbed when only extent lock
1418                          * was requested. Set the bitmap lock bit in the lock
1419                          * flags which callers will use during unlock.
1420                          */
1421                         flags |= SFL_BITMAP;
1422                 } else {
1423                         flags &= ~SFL_BITMAP;
1424                 }
1425         }
1426
1427         if (flags & SFL_EXTENTS) {
1428                 /*
1429                  * Since the extents btree lock is recursive we always
1430                  * need exclusive access.
1431                  */
1432                 if (hfsmp->hfs_extents_cp) {
1433                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1434
1435                         if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) {
1436                                 /*
1437                                  * because we may need this lock on the pageout path (if a swapfile allocation
1438                                  * spills into the extents overflow tree), we will grant the holder of this
1439                                  * lock the privilege of dipping into the reserve free pool in order to prevent
1440                                  * a deadlock from occurring if we need those pageouts to complete before we
1441                                  * will make any new pages available on the free list... the deadlock can occur
1442                                  * if this thread needs to allocate memory while this lock is held
1443                                  */
1444                                 if (set_vm_privilege(TRUE) == FALSE) {
1445                                         /*
1446                                          * indicate that we need to drop vm_privilege
1447                                          * when we unlock
1448                                          */
1449                                         flags |= SFL_VM_PRIV;
1450                                 }
1451                         }
1452                 } else {
1453                         flags &= ~SFL_EXTENTS;
1454                 }
1455         }
1456
1457         return (flags);
1458 }
1459
1460 /*
1461  * unlock HFS system file(s).
1462  */
1463 void
1464 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1465 {
1466         struct timeval tv;
1467         u_int32_t lastfsync;
1468         int numOfLockedBuffs;
1469
1470         if (hfsmp->jnl == NULL) {
1471                 microuptime(&tv);
1472                 lastfsync = tv.tv_sec;
1473         }
1474         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1475                 hfs_unlock(hfsmp->hfs_startup_cp);
1476         }
1477         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1478                 if (hfsmp->jnl == NULL) {
1479                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1480                         numOfLockedBuffs = count_lock_queue();
1481                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1482                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1483                               kMaxSecsForFsync))) {
1484                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1485                         }
1486                 }
1487                 hfs_unlock(hfsmp->hfs_attribute_cp);
1488         }
1489         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1490                 if (hfsmp->jnl == NULL) {
1491                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1492                         numOfLockedBuffs = count_lock_queue();
1493                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1494                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1495                               kMaxSecsForFsync))) {
1496                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1497                         }
1498                 }
1499                 hfs_unlock(hfsmp->hfs_catalog_cp);
1500         }
1501         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1502                 hfs_unlock(hfsmp->hfs_allocation_cp);
1503         }
1504         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1505                 if (hfsmp->jnl == NULL) {
1506                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1507                         numOfLockedBuffs = count_lock_queue();
1508                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1509                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1510                               kMaxSecsForFsync))) {
1511                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1512                         }
1513                 }
1514                 hfs_unlock(hfsmp->hfs_extents_cp);
1515
1516                 if (flags & SFL_VM_PRIV) {
1517                         /*
1518                          * revoke the vm_privilege we granted this thread
1519                          * now that we have unlocked the overflow extents
1520                          */
1521                         set_vm_privilege(FALSE);
1522                 }
1523         }
1524 }
1525
1526
1527 /*
1528  * RequireFileLock
1529  *
1530  * Check to see if a vnode is locked in the current context
1531  * This is to be used for debugging purposes only!!
1532  */
1533 #if HFS_DIAGNOSTIC
1534 void RequireFileLock(FileReference vp, int shareable)
1535 {
1536         int locked;
1537
1538         /* The extents btree and allocation bitmap are always exclusive. */
1539         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1540             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1541                 shareable = 0;
1542         }
1543
1544         locked = VTOC(vp)->c_lockowner == current_thread();
1545
1546         if (!locked && !shareable) {
1547                 switch (VTOC(vp)->c_fileid) {
1548                 case kHFSExtentsFileID:
1549                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1550                         break;
1551                 case kHFSCatalogFileID:
1552                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1553                         break;
1554                 case kHFSAllocationFileID:
1555                         /* The allocation file can hide behind the jornal lock. */
1556                         if (VTOHFS(vp)->jnl == NULL)
1557                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1558                         break;
1559                 case kHFSStartupFileID:
1560                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1561                 case kHFSAttributesFileID:
1562                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1563                         break;
1564                 }
1565         }
1566 }
1567 #endif
1568
1569
1570 /*
1571  * There are three ways to qualify for ownership rights on an object:
1572  *
1573  * 1. (a) Your UID matches the cnode's UID.
1574  *    (b) The object in question is owned by "unknown"
1575  * 2. (a) Permissions on the filesystem are being ignored and
1576  *        your UID matches the replacement UID.
1577  *    (b) Permissions on the filesystem are being ignored and
1578  *        the replacement UID is "unknown".
1579  * 3. You are root.
1580  *
1581  */
1582 int
1583 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1584                 __unused struct proc *p, int invokesuperuserstatus)
1585 {
1586         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1587             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1588             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1589               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1590                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1591             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1592                 return (0);
1593         } else {
1594                 return (EPERM);
1595         }
1596 }
1597
1598
1599 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1600                                u_int32_t blockSizeLimit,
1601                                u_int32_t baseMultiple) {
1602     /*
1603        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1604        specified limit but still an even multiple of the baseMultiple.
1605      */
1606     int baseBlockCount, blockCount;
1607     u_int32_t trialBlockSize;
1608
1609     if (allocationBlockSize % baseMultiple != 0) {
1610         /*
1611            Whoops: the allocation blocks aren't even multiples of the specified base:
1612            no amount of dividing them into even parts will be a multiple, either then!
1613         */
1614         return 512;             /* Hope for the best */
1615     };
1616
1617     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1618        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1619        Even though the former (the result of the loop below) is the larger allocation
1620        block size, the latter is more efficient: */
1621     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1622
1623     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1624     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1625
1626     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1627         trialBlockSize = blockCount * baseMultiple;
1628         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1629             if ((trialBlockSize <= blockSizeLimit) &&
1630                 (trialBlockSize % baseMultiple == 0)) {
1631                 return trialBlockSize;
1632             };
1633         };
1634     };
1635
1636     /* Note: we should never get here, since blockCount = 1 should always work,
1637        but this is nice and safe and makes the compiler happy, too ... */
1638     return 512;
1639 }
1640
1641
1642 u_int32_t
1643 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1644                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1645 {
1646         struct hfsmount * hfsmp;
1647         struct cat_desc jdesc;
1648         int lockflags;
1649         int error;
1650
1651         if (vcb->vcbSigWord != kHFSPlusSigWord)
1652                 return (0);
1653
1654         hfsmp = VCBTOHFS(vcb);
1655
1656         memset(&jdesc, 0, sizeof(struct cat_desc));
1657         jdesc.cd_parentcnid = kRootDirID;
1658         jdesc.cd_nameptr = (const u_int8_t *)name;
1659         jdesc.cd_namelen = strlen(name);
1660
1661         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1662         error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1663         hfs_systemfile_unlock(hfsmp, lockflags);
1664
1665         if (error == 0) {
1666                 return (fattr->ca_fileid);
1667         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1668                 return (0);
1669         }
1670
1671         return (0);     /* XXX what callers expect on an error */
1672 }
1673
1674
1675 /*
1676  * On HFS Plus Volumes, there can be orphaned files or directories
1677  * These are files or directories that were unlinked while busy.
1678  * If the volume was not cleanly unmounted then some of these may
1679  * have persisted and need to be removed.
1680  */
1681 void
1682 hfs_remove_orphans(struct hfsmount * hfsmp)
1683 {
1684         struct BTreeIterator * iterator = NULL;
1685         struct FSBufferDescriptor btdata;
1686         struct HFSPlusCatalogFile filerec;
1687         struct HFSPlusCatalogKey * keyp;
1688         struct proc *p = current_proc();
1689         FCB *fcb;
1690         ExtendedVCB *vcb;
1691         char filename[32];
1692         char tempname[32];
1693         size_t namelen;
1694         cat_cookie_t cookie;
1695         int catlock = 0;
1696         int catreserve = 0;
1697         int started_tr = 0;
1698         int lockflags;
1699         int result;
1700         int orphaned_files = 0;
1701         int orphaned_dirs = 0;
1702
1703         bzero(&cookie, sizeof(cookie));
1704
1705         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1706                 return;
1707
1708         vcb = HFSTOVCB(hfsmp);
1709         fcb = VTOF(hfsmp->hfs_catalog_vp);
1710
1711         btdata.bufferAddress = &filerec;
1712         btdata.itemSize = sizeof(filerec);
1713         btdata.itemCount = 1;
1714
1715         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1716         bzero(iterator, sizeof(*iterator));
1717
1718         /* Build a key to "temp" */
1719         keyp = (HFSPlusCatalogKey*)&iterator->key;
1720         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1721         keyp->nodeName.length = 4;  /* "temp" */
1722         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1723         keyp->nodeName.unicode[0] = 't';
1724         keyp->nodeName.unicode[1] = 'e';
1725         keyp->nodeName.unicode[2] = 'm';
1726         keyp->nodeName.unicode[3] = 'p';
1727
1728         /*
1729          * Position the iterator just before the first real temp file/dir.
1730          */
1731         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1732         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1733         hfs_systemfile_unlock(hfsmp, lockflags);
1734
1735         /* Visit all the temp files/dirs in the HFS+ private directory. */
1736         for (;;) {
1737                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1738                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1739                 hfs_systemfile_unlock(hfsmp, lockflags);
1740                 if (result)
1741                         break;
1742                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1743                         break;
1744
1745                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1746                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1747
1748                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1749                                 HFS_DELETE_PREFIX, filerec.fileID);
1750
1751                 /*
1752                  * Delete all files (and directories) named "tempxxx",
1753                  * where xxx is the file's cnid in decimal.
1754                  *
1755                  */
1756                 if (bcmp(tempname, filename, namelen) == 0) {
1757                         struct filefork dfork;
1758                 struct filefork rfork;
1759                         struct cnode cnode;
1760                         int mode = 0;
1761
1762                         bzero(&dfork, sizeof(dfork));
1763                         bzero(&rfork, sizeof(rfork));
1764                         bzero(&cnode, sizeof(cnode));
1765
1766                         /* Delete any attributes, ignore errors */
1767                         (void) hfs_removeallattr(hfsmp, filerec.fileID);
1768
1769                         if (hfs_start_transaction(hfsmp) != 0) {
1770                             printf("hfs_remove_orphans: failed to start transaction\n");
1771                             goto exit;
1772                         }
1773                         started_tr = 1;
1774
1775                         /*
1776                          * Reserve some space in the Catalog file.
1777                          */
1778                         if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1779                             printf("hfs_remove_orphans: cat_preflight failed\n");
1780                                 goto exit;
1781                         }
1782                         catreserve = 1;
1783
1784                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1785                         catlock = 1;
1786
1787                         /* Build a fake cnode */
1788                         cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1789                                         &dfork.ff_data, &rfork.ff_data);
1790                         cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1791                         cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1792                         cnode.c_desc.cd_namelen = namelen;
1793                         cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1794                         cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1795
1796                         /* Position iterator at previous entry */
1797                         if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1798                             NULL, NULL) != 0) {
1799                                 break;
1800                         }
1801
1802                         /* Truncate the file to zero (both forks) */
1803                         if (dfork.ff_blocks > 0) {
1804                                 u_int64_t fsize;
1805
1806                                 dfork.ff_cp = &cnode;
1807                                 cnode.c_datafork = &dfork;
1808                                 cnode.c_rsrcfork = NULL;
1809                                 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1810                                 while (fsize > 0) {
1811                                     if (fsize > HFS_BIGFILE_SIZE) {
1812                                                 fsize -= HFS_BIGFILE_SIZE;
1813                                         } else {
1814                                                 fsize = 0;
1815                                         }
1816
1817                                         if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1818                                                                           cnode.c_attr.ca_fileid, false) != 0) {
1819                                                 printf("hfs: error truncating data fork!\n");
1820                                                 break;
1821                                         }
1822
1823                                         //
1824                                         // if we're iteratively truncating this file down,
1825                                         // then end the transaction and start a new one so
1826                                         // that no one transaction gets too big.
1827                                         //
1828                                         if (fsize > 0 && started_tr) {
1829                                                 /* Drop system file locks before starting
1830                                                  * another transaction to preserve lock order.
1831                                                  */
1832                                                 hfs_systemfile_unlock(hfsmp, lockflags);
1833                                                 catlock = 0;
1834                                                 hfs_end_transaction(hfsmp);
1835
1836                                                 if (hfs_start_transaction(hfsmp) != 0) {
1837                                                         started_tr = 0;
1838                                                         break;
1839                                                 }
1840                                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1841                                                 catlock = 1;
1842                                         }
1843                                 }
1844                         }
1845
1846                         if (rfork.ff_blocks > 0) {
1847                                 rfork.ff_cp = &cnode;
1848                                 cnode.c_datafork = NULL;
1849                                 cnode.c_rsrcfork = &rfork;
1850                                 if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1851                                         printf("hfs: error truncating rsrc fork!\n");
1852                                         break;
1853                                 }
1854                         }
1855
1856                         /* Remove the file or folder record from the Catalog */
1857                         if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1858                                 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1859                                 hfs_systemfile_unlock(hfsmp, lockflags);
1860                                 catlock = 0;
1861                                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1862                                 break;
1863                         }
1864
1865                         mode = cnode.c_attr.ca_mode & S_IFMT;
1866
1867                         if (mode == S_IFDIR) {
1868                                 orphaned_dirs++;
1869                         }
1870                         else {
1871                                 orphaned_files++;
1872                         }
1873
1874                         /* Update parent and volume counts */
1875                         hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1876                         if (mode == S_IFDIR) {
1877                                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1878                         }
1879
1880                         (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1881                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1882
1883                         /* Drop locks and end the transaction */
1884                         hfs_systemfile_unlock(hfsmp, lockflags);
1885                         cat_postflight(hfsmp, &cookie, p);
1886                         catlock = catreserve = 0;
1887
1888                         /*
1889                            Now that Catalog is unlocked, update the volume info, making
1890                            sure to differentiate between files and directories
1891                         */
1892                         if (mode == S_IFDIR) {
1893                                 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1894                         }
1895                         else{
1896                                 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1897                         }
1898
1899                         if (started_tr) {
1900                                 hfs_end_transaction(hfsmp);
1901                                 started_tr = 0;
1902                         }
1903
1904                 } /* end if */
1905         } /* end for */
1906         if (orphaned_files > 0 || orphaned_dirs > 0)
1907                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1908 exit:
1909         if (catlock) {
1910                 hfs_systemfile_unlock(hfsmp, lockflags);
1911         }
1912         if (catreserve) {
1913                 cat_postflight(hfsmp, &cookie, p);
1914         }
1915         if (started_tr) {
1916                 hfs_end_transaction(hfsmp);
1917         }
1918
1919         FREE(iterator, M_TEMP);
1920         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1921 }
1922
1923
1924 /*
1925  * This will return the correct logical block size for a given vnode.
1926  * For most files, it is the allocation block size, for meta data like
1927  * BTrees, this is kept as part of the BTree private nodeSize
1928  */
1929 u_int32_t
1930 GetLogicalBlockSize(struct vnode *vp)
1931 {
1932 u_int32_t logBlockSize;
1933
1934         DBG_ASSERT(vp != NULL);
1935
1936         /* start with default */
1937         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1938
1939         if (vnode_issystem(vp)) {
1940                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1941                         BTreeInfoRec                    bTreeInfo;
1942
1943                         /*
1944                          * We do not lock the BTrees, because if we are getting block..then the tree
1945                          * should be locked in the first place.
1946                          * We just want the nodeSize wich will NEVER change..so even if the world
1947                          * is changing..the nodeSize should remain the same. Which argues why lock
1948                          * it in the first place??
1949                          */
1950
1951                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1952
1953                         logBlockSize = bTreeInfo.nodeSize;
1954
1955                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1956                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1957                 }
1958         }
1959
1960         DBG_ASSERT(logBlockSize > 0);
1961
1962         return logBlockSize;
1963 }
1964
1965 #if HFS_SPARSE_DEV
1966 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
1967 {
1968         struct vfsstatfs *vfsp;  /* 272 bytes */
1969         uint64_t vfreeblks;
1970         struct timeval now;
1971
1972         hfs_lock_mount(hfsmp);
1973
1974         vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp;
1975         if (!backing_vp) {
1976                 hfs_unlock_mount(hfsmp);
1977                 return false;
1978         }
1979
1980         // usecount is not enough; we need iocount
1981         if (vnode_get(backing_vp)) {
1982                 hfs_unlock_mount(hfsmp);
1983                 *pfree_blks = 0;
1984                 return true;
1985         }
1986
1987         uint32_t loanedblks = hfsmp->loanedBlocks;
1988         uint32_t bandblks       = hfsmp->hfs_sparsebandblks;
1989         uint64_t maxblks        = hfsmp->hfs_backingfs_maxblocks;
1990
1991         hfs_unlock_mount(hfsmp);
1992
1993         mount_t backingfs_mp = vnode_mount(backing_vp);
1994
1995         microtime(&now);
1996         if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1997                 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1998                 hfsmp->hfs_last_backingstatfs = now.tv_sec;
1999         }
2000
2001         if (!(vfsp = vfs_statfs(backingfs_mp))) {
2002                 vnode_put(backing_vp);
2003                 return false;
2004         }
2005
2006         vfreeblks = vfsp->f_bavail;
2007         /* Normalize block count if needed. */
2008         if (vfsp->f_bsize != hfsmp->blockSize)
2009                 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2010         if (vfreeblks > bandblks)
2011                 vfreeblks -= bandblks;
2012         else
2013                 vfreeblks = 0;
2014
2015         /*
2016          * Take into account any delayed allocations.  It is not
2017          * certain what the original reason for the "2 *" is.  Most
2018          * likely it is to allow for additional requirements in the
2019          * host file system and metadata required by disk images.  The
2020          * number of loaned blocks is likely to be small and we will
2021          * stop using them as we get close to the limit.
2022          */
2023         loanedblks = 2 * loanedblks;
2024         if (vfreeblks > loanedblks)
2025                 vfreeblks -= loanedblks;
2026         else
2027                 vfreeblks = 0;
2028
2029         if (maxblks)
2030                 vfreeblks = MIN(vfreeblks, maxblks);
2031
2032         vnode_put(backing_vp);
2033
2034         *pfree_blks = vfreeblks;
2035
2036         return true;
2037 }
2038 #endif
2039
2040 u_int32_t
2041 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2042 {
2043         u_int32_t freeblks;
2044         u_int32_t rsrvblks;
2045         u_int32_t loanblks;
2046
2047         /*
2048          * We don't bother taking the mount lock
2049          * to look at these values since the values
2050          * themselves are each updated atomically
2051          * on aligned addresses.
2052          */
2053         freeblks = hfsmp->freeBlocks;
2054         rsrvblks = hfsmp->reserveBlocks;
2055         loanblks = hfsmp->loanedBlocks;
2056         if (wantreserve) {
2057                 if (freeblks > rsrvblks)
2058                         freeblks -= rsrvblks;
2059                 else
2060                         freeblks = 0;
2061         }
2062         if (freeblks > loanblks)
2063                 freeblks -= loanblks;
2064         else
2065                 freeblks = 0;
2066
2067 #if HFS_SPARSE_DEV
2068         /*
2069          * When the underlying device is sparse, check the
2070          * available space on the backing store volume.
2071          */
2072         uint64_t vfreeblks;
2073         if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2074                 freeblks = MIN(freeblks, vfreeblks);
2075 #endif /* HFS_SPARSE_DEV */
2076
2077         if (hfsmp->hfs_flags & HFS_CS) {
2078                 uint64_t cs_free_bytes;
2079                 uint64_t cs_free_blks;
2080                 if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
2081                     (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
2082                         cs_free_blks = cs_free_bytes / hfsmp->blockSize;
2083                         if (cs_free_blks > loanblks)
2084                                 cs_free_blks -= loanblks;
2085                         else
2086                                 cs_free_blks = 0;
2087                         freeblks = MIN(cs_free_blks, freeblks);
2088                 }
2089         }
2090
2091         return (freeblks);
2092 }
2093
2094 /*
2095  * Map HFS Common errors (negative) to BSD error codes (positive).
2096  * Positive errors (ie BSD errors) are passed through unchanged.
2097  */
2098 short MacToVFSError(OSErr err)
2099 {
2100         if (err >= 0)
2101                 return err;
2102
2103         /* BSD/VFS internal errnos */
2104         switch (err) {
2105                 case ERESERVEDNAME: /* -8 */
2106                         return err;
2107         }
2108
2109         switch (err) {
2110         case dskFulErr:                 /*    -34 */
2111         case btNoSpaceAvail:            /* -32733 */
2112                 return ENOSPC;
2113         case fxOvFlErr:                 /* -32750 */
2114                 return EOVERFLOW;
2115
2116         case btBadNode:                 /* -32731 */
2117                 return EIO;
2118
2119         case memFullErr:                /*  -108 */
2120                 return ENOMEM;          /*   +12 */
2121
2122         case cmExists:                  /* -32718 */
2123         case btExists:                  /* -32734 */
2124                 return EEXIST;          /*    +17 */
2125
2126         case cmNotFound:                /* -32719 */
2127         case btNotFound:                /* -32735 */
2128                 return ENOENT;          /*     28 */
2129
2130         case cmNotEmpty:                /* -32717 */
2131                 return ENOTEMPTY;       /*     66 */
2132
2133         case cmFThdDirErr:              /* -32714 */
2134                 return EISDIR;          /*     21 */
2135
2136         case fxRangeErr:                /* -32751 */
2137                 return ERANGE;
2138
2139         case bdNamErr:                  /*   -37 */
2140                 return ENAMETOOLONG;    /*    63 */
2141
2142         case paramErr:                  /*   -50 */
2143         case fileBoundsErr:             /* -1309 */
2144                 return EINVAL;          /*   +22 */
2145
2146         case fsBTBadNodeSize:
2147                 return ENXIO;
2148
2149         default:
2150                 return EIO;             /*   +5 */
2151         }
2152 }
2153
2154
2155 /*
2156  * Find the current thread's directory hint for a given index.
2157  *
2158  * Requires an exclusive lock on directory cnode.
2159  *
2160  * Use detach if the cnode lock must be dropped while the hint is still active.
2161  */
2162 __private_extern__
2163 directoryhint_t *
2164 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2165 {
2166         struct timeval tv;
2167         directoryhint_t *hint;
2168         boolean_t need_remove, need_init;
2169         const u_int8_t * name;
2170
2171         microuptime(&tv);
2172
2173         /*
2174          *  Look for an existing hint first.  If not found, create a new one (when
2175          *  the list is not full) or recycle the oldest hint.  Since new hints are
2176          *  always added to the head of the list, the last hint is always the
2177          *  oldest.
2178          */
2179         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2180                 if (hint->dh_index == index)
2181                         break;
2182         }
2183         if (hint != NULL) { /* found an existing hint */
2184                 need_init = false;
2185                 need_remove = true;
2186         } else { /* cannot find an existing hint */
2187                 need_init = true;
2188                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2189                         /* Create a default directory hint */
2190                         MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
2191                         ++dcp->c_dirhintcnt;
2192                         need_remove = false;
2193                 } else {                                /* recycle the last (i.e., the oldest) hint */
2194                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2195                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2196                             (name = hint->dh_desc.cd_nameptr)) {
2197                                 hint->dh_desc.cd_nameptr = NULL;
2198                                 hint->dh_desc.cd_namelen = 0;
2199                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2200                                 vfs_removename((const char *)name);
2201                         }
2202                         need_remove = true;
2203                 }
2204         }
2205
2206         if (need_remove)
2207                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2208
2209         if (detach)
2210                 --dcp->c_dirhintcnt;
2211         else
2212                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2213
2214         if (need_init) {
2215                 hint->dh_index = index;
2216                 hint->dh_desc.cd_flags = 0;
2217                 hint->dh_desc.cd_encoding = 0;
2218                 hint->dh_desc.cd_namelen = 0;
2219                 hint->dh_desc.cd_nameptr = NULL;
2220                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2221                 hint->dh_desc.cd_hint = dcp->c_childhint;
2222                 hint->dh_desc.cd_cnid = 0;
2223         }
2224         hint->dh_time = tv.tv_sec;
2225         return (hint);
2226 }
2227
2228 /*
2229  * Release a single directory hint.
2230  *
2231  * Requires an exclusive lock on directory cnode.
2232  */
2233 __private_extern__
2234 void
2235 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2236 {
2237         const u_int8_t * name;
2238         directoryhint_t *hint;
2239
2240         /* Check if item is on list (could be detached) */
2241         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2242                 if (hint == relhint) {
2243                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2244                         --dcp->c_dirhintcnt;
2245                         break;
2246                 }
2247         }
2248         name = relhint->dh_desc.cd_nameptr;
2249         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2250                 relhint->dh_desc.cd_nameptr = NULL;
2251                 relhint->dh_desc.cd_namelen = 0;
2252                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2253                 vfs_removename((const char *)name);
2254         }
2255         FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2256 }
2257
2258 /*
2259  * Release directory hints for given directory
2260  *
2261  * Requires an exclusive lock on directory cnode.
2262  */
2263 __private_extern__
2264 void
2265 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2266 {
2267         struct timeval tv;
2268         directoryhint_t *hint, *prev;
2269         const u_int8_t * name;
2270
2271         if (stale_hints_only)
2272                 microuptime(&tv);
2273
2274         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2275         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2276                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2277                         break;  /* stop here if this entry is too new */
2278                 name = hint->dh_desc.cd_nameptr;
2279                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2280                         hint->dh_desc.cd_nameptr = NULL;
2281                         hint->dh_desc.cd_namelen = 0;
2282                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
2283                         vfs_removename((const char *)name);
2284                 }
2285                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2286                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2287                 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2288                 --dcp->c_dirhintcnt;
2289         }
2290 }
2291
2292 /*
2293  * Insert a detached directory hint back into the list of dirhints.
2294  *
2295  * Requires an exclusive lock on directory cnode.
2296  */
2297 __private_extern__
2298 void
2299 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2300 {
2301         directoryhint_t *test;
2302
2303         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2304                 if (test == hint)
2305                         panic("hfs_insertdirhint: hint %p already on list!", hint);
2306         }
2307
2308         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2309         ++dcp->c_dirhintcnt;
2310 }
2311
2312 /*
2313  * Perform a case-insensitive compare of two UTF-8 filenames.
2314  *
2315  * Returns 0 if the strings match.
2316  */
2317 __private_extern__
2318 int
2319 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2320 {
2321         u_int16_t *ustr1, *ustr2;
2322         size_t ulen1, ulen2;
2323         size_t maxbytes;
2324         int cmp = -1;
2325
2326         if (len1 != len2)
2327                 return (cmp);
2328
2329         maxbytes = kHFSPlusMaxFileNameChars << 1;
2330         MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2331         ustr2 = ustr1 + (maxbytes >> 1);
2332
2333         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2334                 goto out;
2335         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2336                 goto out;
2337
2338         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2339 out:
2340         FREE(ustr1, M_TEMP);
2341         return (cmp);
2342 }
2343
2344
2345 typedef struct jopen_cb_info {
2346         off_t   jsize;
2347         char   *desired_uuid;
2348         struct  vnode *jvp;
2349         size_t  blksize;
2350         int     need_clean;
2351         int     need_init;
2352 } jopen_cb_info;
2353
2354 static int
2355 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2356 {
2357         struct nameidata nd;
2358         jopen_cb_info *ji = (jopen_cb_info *)arg;
2359         char bsd_name[256];
2360         int error;
2361
2362         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2363         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2364
2365         if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2366                 return 1;   // keep iterating
2367         }
2368
2369         // if we're here, either the desired uuid matched or there was no
2370         // desired uuid so let's try to open the device for writing and
2371         // see if it works.  if it does, we'll use it.
2372
2373         NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2374         if ((error = namei(&nd))) {
2375                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2376                 return 1;   // keep iterating
2377         }
2378
2379         ji->jvp = nd.ni_vp;
2380         nameidone(&nd);
2381
2382         if (ji->jvp == NULL) {
2383                 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2384         } else {
2385                 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2386                 if (error == 0) {
2387                         // if the journal is dirty and we didn't specify a desired
2388                         // journal device uuid, then do not use the journal.  but
2389                         // if the journal is just invalid (e.g. it hasn't been
2390                         // initialized) then just set the need_init flag.
2391                         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2392                                 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2393                                 if (error == EBUSY) {
2394                                         VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2395                                         vnode_put(ji->jvp);
2396                                         ji->jvp = NULL;
2397                                         return 1;    // keep iterating
2398                                 } else if (error == EINVAL) {
2399                                         ji->need_init = 1;
2400                                 }
2401                         }
2402
2403                         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2404                                 strlcpy(ji->desired_uuid, uuid_str, 128);
2405                         }
2406                         vnode_setmountedon(ji->jvp);
2407                         return 0;   // stop iterating
2408                 } else {
2409                         vnode_put(ji->jvp);
2410                         ji->jvp = NULL;
2411                 }
2412         }
2413
2414         return 1;   // keep iterating
2415 }
2416
2417 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2418 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2419
2420
2421 static vnode_t
2422 open_journal_dev(const char *vol_device,
2423                  int need_clean,
2424                  char *uuid_str,
2425                  char *machine_serial_num,
2426                  off_t jsize,
2427                  size_t blksize,
2428                  int *need_init)
2429 {
2430     int retry_counter=0;
2431     jopen_cb_info ji;
2432
2433     ji.jsize        = jsize;
2434     ji.desired_uuid = uuid_str;
2435     ji.jvp          = NULL;
2436     ji.blksize      = blksize;
2437     ji.need_clean   = need_clean;
2438     ji.need_init    = 0;
2439
2440 //    if (uuid_str[0] == '\0') {
2441 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2442 //    } else {
2443 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2444 //    }
2445     while (ji.jvp == NULL && retry_counter++ < 4) {
2446             if (retry_counter > 1) {
2447                     if (uuid_str[0]) {
2448                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2449                     } else {
2450                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2451                     }
2452                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2453             }
2454
2455             IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2456     }
2457
2458     if (ji.jvp == NULL) {
2459             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2460                    vol_device, uuid_str, machine_serial_num);
2461     }
2462
2463     *need_init = ji.need_init;
2464
2465     return ji.jvp;
2466 }
2467
2468
2469 int
2470 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2471                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2472                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2473 {
2474         JournalInfoBlock *jibp;
2475         struct buf       *jinfo_bp, *bp;
2476         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2477         int               retval, write_jibp = 0;
2478         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2479         struct vnode     *devvp;
2480         struct hfs_mount_args *args = _args;
2481         u_int32_t         jib_flags;
2482         u_int64_t         jib_offset;
2483         u_int64_t         jib_size;
2484         const char *dev_name;
2485
2486         devvp = hfsmp->hfs_devvp;
2487         dev_name = vnode_getname_printable(devvp);
2488
2489         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2490                 arg_flags  = args->journal_flags;
2491                 arg_tbufsz = args->journal_tbuffer_size;
2492         }
2493
2494         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2495
2496         jinfo_bp = NULL;
2497         retval = (int)buf_meta_bread(devvp,
2498                                                 (daddr64_t)((embeddedOffset/blksize) +
2499                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2500                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2501         if (retval) {
2502                 if (jinfo_bp) {
2503                         buf_brelse(jinfo_bp);
2504                 }
2505                 goto cleanup_dev_name;
2506         }
2507
2508         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2509         jib_flags  = SWAP_BE32(jibp->flags);
2510         jib_size   = SWAP_BE64(jibp->size);
2511
2512         if (jib_flags & kJIJournalInFSMask) {
2513                 hfsmp->jvp = hfsmp->hfs_devvp;
2514                 jib_offset = SWAP_BE64(jibp->offset);
2515         } else {
2516             int need_init=0;
2517
2518             // if the volume was unmounted cleanly then we'll pick any
2519             // available external journal partition
2520             //
2521             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2522                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2523             }
2524
2525             hfsmp->jvp = open_journal_dev(dev_name,
2526                                           !(jib_flags & kJIJournalNeedInitMask),
2527                                           (char *)&jibp->ext_jnl_uuid[0],
2528                                           (char *)&jibp->machine_serial_num[0],
2529                                           jib_size,
2530                                           hfsmp->hfs_logical_block_size,
2531                                           &need_init);
2532             if (hfsmp->jvp == NULL) {
2533                     buf_brelse(jinfo_bp);
2534                     retval = EROFS;
2535                     goto cleanup_dev_name;
2536             } else {
2537                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2538                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2539                     }
2540             }
2541
2542             jib_offset = 0;
2543             write_jibp = 1;
2544             if (need_init) {
2545                     jib_flags |= kJIJournalNeedInitMask;
2546             }
2547         }
2548
2549         // save this off for the hack-y check in hfs_remove()
2550         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2551         hfsmp->jnl_size  = jib_size;
2552
2553         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2554             // if the file system is read-only, check if the journal is empty.
2555             // if it is, then we can allow the mount.  otherwise we have to
2556             // return failure.
2557             retval = journal_is_clean(hfsmp->jvp,
2558                                       jib_offset + embeddedOffset,
2559                                       jib_size,
2560                                       devvp,
2561                                       hfsmp->hfs_logical_block_size);
2562
2563             hfsmp->jnl = NULL;
2564
2565             buf_brelse(jinfo_bp);
2566
2567             if (retval) {
2568                     const char *name = vnode_getname_printable(devvp);
2569                     printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2570                     name);
2571                     vnode_putname_printable(name);
2572             }
2573
2574             goto cleanup_dev_name;
2575         }
2576
2577         if (jib_flags & kJIJournalNeedInitMask) {
2578                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2579                            jib_offset + embeddedOffset, jib_size);
2580                 hfsmp->jnl = journal_create(hfsmp->jvp,
2581                                                                         jib_offset + embeddedOffset,
2582                                                                         jib_size,
2583                                                                         devvp,
2584                                                                         blksize,
2585                                                                         arg_flags,
2586                                                                         arg_tbufsz,
2587                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2588                                                                         hfsmp->hfs_mp);
2589                 if (hfsmp->jnl)
2590                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2591
2592                 // no need to start a transaction here... if this were to fail
2593                 // we'd just re-init it on the next mount.
2594                 jib_flags &= ~kJIJournalNeedInitMask;
2595                 jibp->flags  = SWAP_BE32(jib_flags);
2596                 buf_bwrite(jinfo_bp);
2597                 jinfo_bp = NULL;
2598                 jibp     = NULL;
2599         } else {
2600                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2601                 //         jib_offset + embeddedOffset,
2602                 //         jib_size, SWAP_BE32(vhp->blockSize));
2603
2604                 hfsmp->jnl = journal_open(hfsmp->jvp,
2605                                                                   jib_offset + embeddedOffset,
2606                                                                   jib_size,
2607                                                                   devvp,
2608                                                                   blksize,
2609                                                                   arg_flags,
2610                                                                   arg_tbufsz,
2611                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2612                                                                   hfsmp->hfs_mp);
2613                 if (hfsmp->jnl)
2614                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2615
2616                 if (write_jibp) {
2617                         buf_bwrite(jinfo_bp);
2618                 } else {
2619                         buf_brelse(jinfo_bp);
2620                 }
2621                 jinfo_bp = NULL;
2622                 jibp     = NULL;
2623
2624                 if (hfsmp->jnl && mdbp) {
2625                         // reload the mdb because it could have changed
2626                         // if the journal had to be replayed.
2627                         if (mdb_offset == 0) {
2628                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2629                         }
2630                         bp = NULL;
2631                         retval = (int)buf_meta_bread(devvp,
2632                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2633                                         hfsmp->hfs_physical_block_size, cred, &bp);
2634                         if (retval) {
2635                                 if (bp) {
2636                                         buf_brelse(bp);
2637                                 }
2638                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2639                                            retval);
2640                                 goto cleanup_dev_name;
2641                         }
2642                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2643                         buf_brelse(bp);
2644                         bp = NULL;
2645                 }
2646         }
2647
2648         // if we expected the journal to be there and we couldn't
2649         // create it or open it then we have to bail out.
2650         if (hfsmp->jnl == NULL) {
2651                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2652                 retval = EINVAL;
2653                 goto cleanup_dev_name;
2654         }
2655
2656         retval = 0;
2657
2658 cleanup_dev_name:
2659         vnode_putname_printable(dev_name);
2660         return retval;
2661 }
2662
2663
2664 //
2665 // This function will go and re-locate the .journal_info_block and
2666 // the .journal files in case they moved (which can happen if you
2667 // run Norton SpeedDisk).  If we fail to find either file we just
2668 // disable journaling for this volume and return.  We turn off the
2669 // journaling bit in the vcb and assume it will get written to disk
2670 // later (if it doesn't on the next mount we'd do the same thing
2671 // again which is harmless).  If we disable journaling we don't
2672 // return an error so that the volume is still mountable.
2673 //
2674 // If the info we find for the .journal_info_block and .journal files
2675 // isn't what we had stored, we re-set our cached info and proceed
2676 // with opening the journal normally.
2677 //
2678 static int
2679 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2680 {
2681         JournalInfoBlock *jibp;
2682         struct buf       *jinfo_bp;
2683         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2684         int               retval, write_jibp = 0, recreate_journal = 0;
2685         struct vnode     *devvp;
2686         struct cat_attr   jib_attr, jattr;
2687         struct cat_fork   jib_fork, jfork;
2688         ExtendedVCB      *vcb;
2689         u_int32_t            fid;
2690         struct hfs_mount_args *args = _args;
2691         u_int32_t         jib_flags;
2692         u_int64_t         jib_offset;
2693         u_int64_t         jib_size;
2694
2695         devvp = hfsmp->hfs_devvp;
2696         vcb = HFSTOVCB(hfsmp);
2697
2698         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2699                 if (args->journal_disable) {
2700                         return 0;
2701                 }
2702
2703                 arg_flags  = args->journal_flags;
2704                 arg_tbufsz = args->journal_tbuffer_size;
2705         }
2706
2707         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2708         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2709                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2710                            jib_fork.cf_extents[0].startBlock);
2711                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2712                 return 0;
2713         }
2714         hfsmp->hfs_jnlinfoblkid = fid;
2715
2716         // make sure the journal_info_block begins where we think it should.
2717         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2718                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2719                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2720
2721                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2722                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2723                 recreate_journal = 1;
2724         }
2725
2726
2727         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2728         jinfo_bp = NULL;
2729         retval = (int)buf_meta_bread(devvp,
2730                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2731                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2732                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2733         if (retval) {
2734                 if (jinfo_bp) {
2735                         buf_brelse(jinfo_bp);
2736                 }
2737                 printf("hfs: can't read journal info block. disabling journaling.\n");
2738                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2739                 return 0;
2740         }
2741
2742         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2743         jib_flags  = SWAP_BE32(jibp->flags);
2744         jib_offset = SWAP_BE64(jibp->offset);
2745         jib_size   = SWAP_BE64(jibp->size);
2746
2747         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2748         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2749                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2750                            jfork.cf_extents[0].startBlock);
2751                 buf_brelse(jinfo_bp);
2752                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2753                 return 0;
2754         }
2755         hfsmp->hfs_jnlfileid = fid;
2756
2757         // make sure the journal file begins where we think it should.
2758         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2759                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2760                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2761
2762                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2763                 write_jibp   = 1;
2764                 recreate_journal = 1;
2765         }
2766
2767         // check the size of the journal file.
2768         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2769                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2770                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2771
2772                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2773                 write_jibp = 1;
2774                 recreate_journal = 1;
2775         }
2776
2777         if (jib_flags & kJIJournalInFSMask) {
2778                 hfsmp->jvp = hfsmp->hfs_devvp;
2779                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2780         } else {
2781             const char *dev_name;
2782             int need_init = 0;
2783
2784             dev_name = vnode_getname_printable(devvp);
2785
2786             // since the journal is empty, just use any available external journal
2787             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2788
2789             // this fills in the uuid of the device we actually get
2790             hfsmp->jvp = open_journal_dev(dev_name,
2791                                           !(jib_flags & kJIJournalNeedInitMask),
2792                                           (char *)&jibp->ext_jnl_uuid[0],
2793                                           (char *)&jibp->machine_serial_num[0],
2794                                           jib_size,
2795                                           hfsmp->hfs_logical_block_size,
2796                                           &need_init);
2797             if (hfsmp->jvp == NULL) {
2798                     buf_brelse(jinfo_bp);
2799                     vnode_putname_printable(dev_name);
2800                     return EROFS;
2801             } else {
2802                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2803                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2804                     }
2805             }
2806             jib_offset = 0;
2807             recreate_journal = 1;
2808             write_jibp = 1;
2809             if (need_init) {
2810                     jib_flags |= kJIJournalNeedInitMask;
2811             }
2812             vnode_putname_printable(dev_name);
2813         }
2814
2815         // save this off for the hack-y check in hfs_remove()
2816         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2817         hfsmp->jnl_size  = jib_size;
2818
2819         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2820             // if the file system is read-only, check if the journal is empty.
2821             // if it is, then we can allow the mount.  otherwise we have to
2822             // return failure.
2823             retval = journal_is_clean(hfsmp->jvp,
2824                                       jib_offset,
2825                                       jib_size,
2826                                       devvp,
2827                                       hfsmp->hfs_logical_block_size);
2828
2829             hfsmp->jnl = NULL;
2830
2831             buf_brelse(jinfo_bp);
2832
2833             if (retval) {
2834                     const char *name = vnode_getname_printable(devvp);
2835                     printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2836                     name);
2837                     vnode_putname_printable(name);
2838             }
2839
2840             return retval;
2841         }
2842
2843         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2844                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2845                            jib_offset, jib_size);
2846                 hfsmp->jnl = journal_create(hfsmp->jvp,
2847                                                                         jib_offset,
2848                                                                         jib_size,
2849                                                                         devvp,
2850                                                                         hfsmp->hfs_logical_block_size,
2851                                                                         arg_flags,
2852                                                                         arg_tbufsz,
2853                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2854                                                                         hfsmp->hfs_mp);
2855                 if (hfsmp->jnl)
2856                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2857
2858                 // no need to start a transaction here... if this were to fail
2859                 // we'd just re-init it on the next mount.
2860                 jib_flags &= ~kJIJournalNeedInitMask;
2861                 write_jibp   = 1;
2862
2863         } else {
2864                 //
2865                 // if we weren't the last person to mount this volume
2866                 // then we need to throw away the journal because it
2867                 // is likely that someone else mucked with the disk.
2868                 // if the journal is empty this is no big deal.  if the
2869                 // disk is dirty this prevents us from replaying the
2870                 // journal over top of changes that someone else made.
2871                 //
2872                 arg_flags |= JOURNAL_RESET;
2873
2874                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2875                 //         jib_offset,
2876                 //         jib_size, SWAP_BE32(vhp->blockSize));
2877
2878                 hfsmp->jnl = journal_open(hfsmp->jvp,
2879                                                                   jib_offset,
2880                                                                   jib_size,
2881                                                                   devvp,
2882                                                                   hfsmp->hfs_logical_block_size,
2883                                                                   arg_flags,
2884                                                                   arg_tbufsz,
2885                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2886                                                                   hfsmp->hfs_mp);
2887                 if (hfsmp->jnl)
2888                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2889         }
2890
2891
2892         if (write_jibp) {
2893                 jibp->flags  = SWAP_BE32(jib_flags);
2894                 jibp->offset = SWAP_BE64(jib_offset);
2895                 jibp->size   = SWAP_BE64(jib_size);
2896
2897                 buf_bwrite(jinfo_bp);
2898         } else {
2899                 buf_brelse(jinfo_bp);
2900         }
2901         jinfo_bp = NULL;
2902         jibp     = NULL;
2903
2904         // if we expected the journal to be there and we couldn't
2905         // create it or open it then we have to bail out.
2906         if (hfsmp->jnl == NULL) {
2907                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2908                 return EINVAL;
2909         }
2910
2911         return 0;
2912 }
2913
2914 /*
2915  * Calculate the allocation zone for metadata.
2916  *
2917  * This zone includes the following:
2918  *      Allocation Bitmap file
2919  *      Overflow Extents file
2920  *      Journal file
2921  *      Quota files
2922  *      Clustered Hot files
2923  *      Catalog file
2924  *
2925  *                          METADATA ALLOCATION ZONE
2926  * ____________________________________________________________________________
2927  * |    |    |     |               |                              |           |
2928  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2929  * |____|____|_____|_______________|______________________________|___________|
2930  *
2931  * <------------------------------- N * 128 MB ------------------------------->
2932  *
2933  */
2934 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
2935
2936 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2937 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2938 #define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2939 #define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2940 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2941 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2942
2943 /* Initialize the metadata zone.
2944  *
2945  * If the size of  the volume is less than the minimum size for
2946  * metadata zone, metadata zone is disabled.
2947  *
2948  * If disable is true, disable metadata zone unconditionally.
2949  */
2950 void
2951 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2952 {
2953         ExtendedVCB  *vcb;
2954         u_int64_t  fs_size;
2955         u_int64_t  zonesize;
2956         u_int64_t  temp;
2957         u_int64_t  filesize;
2958         u_int32_t  blk;
2959         int  items, really_do_it=1;
2960
2961         vcb = HFSTOVCB(hfsmp);
2962         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2963
2964         /*
2965          * For volumes less than 10 GB, don't bother.
2966          */
2967         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2968                 really_do_it = 0;
2969         }
2970
2971         /*
2972          * Skip non-journaled volumes as well.
2973          */
2974         if (hfsmp->jnl == NULL) {
2975                 really_do_it = 0;
2976         }
2977
2978         /* If caller wants to disable metadata zone, do it */
2979         if (disable == true) {
2980                 really_do_it = 0;
2981         }
2982
2983         /*
2984          * Start with space for the boot blocks and Volume Header.
2985          * 1536 = byte offset from start of volume to end of volume header:
2986          * 1024 bytes is the offset from the start of the volume to the
2987          * start of the volume header (defined by the volume format)
2988          * + 512 bytes (the size of the volume header).
2989          */
2990         zonesize = roundup(1536, hfsmp->blockSize);
2991
2992         /*
2993          * Add the on-disk size of allocation bitmap.
2994          */
2995         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2996
2997         /*
2998          * Add space for the Journal Info Block and Journal (if they're in
2999          * this file system).
3000          */
3001         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3002                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3003         }
3004
3005         /*
3006          * Add the existing size of the Extents Overflow B-tree.
3007          * (It rarely grows, so don't bother reserving additional room for it.)
3008          */
3009         zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3010
3011         /*
3012          * If there is an Attributes B-tree, leave room for 11 clumps worth.
3013          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3014          * When installing a full OS install onto a 20GB volume, we use
3015          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3016          * us with another 3 or 4 clumps worth before we need another extent.
3017          */
3018         if (hfsmp->hfs_attribute_cp) {
3019                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3020         }
3021
3022         /*
3023          * Leave room for 11 clumps of the Catalog B-tree.
3024          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3025          * When installing a full OS install onto a 20GB volume, we use
3026          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3027          * us with another 3 or 4 clumps worth before we need another extent.
3028          */
3029         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3030
3031         /*
3032          * Add space for hot file region.
3033          *
3034          * ...for now, use 5 MB per 1 GB (0.5 %)
3035          */
3036         filesize = (fs_size / 1024) * 5;
3037         if (filesize > HOTBAND_MAXIMUM_SIZE)
3038                 filesize = HOTBAND_MAXIMUM_SIZE;
3039         else if (filesize < HOTBAND_MINIMUM_SIZE)
3040                 filesize = HOTBAND_MINIMUM_SIZE;
3041         /*
3042          * Calculate user quota file requirements.
3043          */
3044         if (hfsmp->hfs_flags & HFS_QUOTAS) {
3045                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3046                 if (items < QF_MIN_USERS)
3047                         items = QF_MIN_USERS;
3048                 else if (items > QF_MAX_USERS)
3049                         items = QF_MAX_USERS;
3050                 if (!powerof2(items)) {
3051                         int x = items;
3052                         items = 4;
3053                         while (x>>1 != 1) {
3054                                 x = x >> 1;
3055                                 items = items << 1;
3056                         }
3057                 }
3058                 filesize += (items + 1) * sizeof(struct dqblk);
3059                 /*
3060                  * Calculate group quota file requirements.
3061                  *
3062                  */
3063                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3064                 if (items < QF_MIN_GROUPS)
3065                         items = QF_MIN_GROUPS;
3066                 else if (items > QF_MAX_GROUPS)
3067                         items = QF_MAX_GROUPS;
3068                 if (!powerof2(items)) {
3069                         int x = items;
3070                         items = 4;
3071                         while (x>>1 != 1) {
3072                                 x = x >> 1;
3073                                 items = items << 1;
3074                         }
3075                 }
3076                 filesize += (items + 1) * sizeof(struct dqblk);
3077         }
3078         zonesize += filesize;
3079
3080         /*
3081          * Round up entire zone to a bitmap block's worth.
3082          * The extra space goes to the catalog file and hot file area.
3083          */
3084         temp = zonesize;
3085         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3086         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3087         /*
3088          * If doing the round up for hfs_min_alloc_start would push us past
3089          * allocLimit, then just reset it back to 0.  Though using a value
3090          * bigger than allocLimit would not cause damage in the block allocator
3091          * code, this value could get stored in the volume header and make it out
3092          * to disk, making the volume header technically corrupt.
3093          */
3094         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3095                 hfsmp->hfs_min_alloc_start = 0;
3096         }
3097
3098         if (really_do_it == 0) {
3099                 /* If metadata zone needs to be disabled because the
3100                  * volume was truncated, clear the bit and zero out
3101                  * the values that are no longer needed.
3102                  */
3103                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3104                         /* Disable metadata zone */
3105                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3106
3107                         /* Zero out mount point values that are not required */
3108                         hfsmp->hfs_catalog_maxblks = 0;
3109                         hfsmp->hfs_hotfile_maxblks = 0;
3110                         hfsmp->hfs_hotfile_start = 0;
3111                         hfsmp->hfs_hotfile_end = 0;
3112                         hfsmp->hfs_hotfile_freeblks = 0;
3113                         hfsmp->hfs_metazone_start = 0;
3114                         hfsmp->hfs_metazone_end = 0;
3115                 }
3116
3117                 return;
3118         }
3119
3120         temp = zonesize - temp;  /* temp has extra space */
3121         filesize += temp / 3;
3122         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3123
3124         hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3125
3126         /* Convert to allocation blocks. */
3127         blk = zonesize / vcb->blockSize;
3128
3129         /* The default metadata zone location is at the start of volume. */
3130         hfsmp->hfs_metazone_start = 1;
3131         hfsmp->hfs_metazone_end = blk - 1;
3132
3133         /* The default hotfile area is at the end of the zone. */
3134         if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3135                 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3136                 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3137                 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3138         }
3139         else {
3140                 hfsmp->hfs_hotfile_start = 0;
3141                 hfsmp->hfs_hotfile_end = 0;
3142                 hfsmp->hfs_hotfile_freeblks = 0;
3143         }
3144 #if 0
3145         printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3146         printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3147         printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
3148 #endif
3149         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3150 }
3151
3152
3153 static u_int32_t
3154 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3155 {
3156         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
3157         int  lockflags;
3158         int  freeblocks;
3159
3160         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3161         freeblocks = MetaZoneFreeBlocks(vcb);
3162         hfs_systemfile_unlock(hfsmp, lockflags);
3163
3164         /* Minus Extents overflow file reserve. */
3165         freeblocks -=
3166                 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3167         /* Minus catalog file reserve. */
3168         freeblocks -=
3169                 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3170         if (freeblocks < 0)
3171                 freeblocks = 0;
3172
3173         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3174 }
3175
3176 /*
3177  * Determine if a file is a "virtual" metadata file.
3178  * This includes journal and quota files.
3179  */
3180 int
3181 hfs_virtualmetafile(struct cnode *cp)
3182 {
3183         const char * filename;
3184
3185
3186         if (cp->c_parentcnid != kHFSRootFolderID)
3187                 return (0);
3188
3189         filename = (const char *)cp->c_desc.cd_nameptr;
3190         if (filename == NULL)
3191                 return (0);
3192
3193         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3194             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3195             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3196             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3197             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3198                 return (1);
3199
3200         return (0);
3201 }
3202
3203 __private_extern__
3204 void hfs_syncer_lock(struct hfsmount *hfsmp)
3205 {
3206     hfs_lock_mount(hfsmp);
3207 }
3208
3209 __private_extern__
3210 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3211 {
3212     hfs_unlock_mount(hfsmp);
3213 }
3214
3215 __private_extern__
3216 void hfs_syncer_wait(struct hfsmount *hfsmp)
3217 {
3218     msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3219            "hfs_syncer_wait", NULL);
3220 }
3221
3222 __private_extern__
3223 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3224 {
3225     wakeup(&hfsmp->hfs_sync_incomplete);
3226 }
3227
3228 __private_extern__
3229 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3230 {
3231     uint64_t deadline;
3232     clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3233     return deadline;
3234 }
3235
3236 __private_extern__
3237 void hfs_syncer_queue(thread_call_t syncer)
3238 {
3239     if (thread_call_enter_delayed_with_leeway(syncer,
3240                                               NULL,
3241                                               hfs_usecs_to_deadline(HFS_META_DELAY),
3242                                               0,
3243                                               THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3244                 printf("hfs: syncer already scheduled!\n");
3245     }
3246 }
3247
3248 //
3249 // Fire off a timed callback to sync the disk if the
3250 // volume is on ejectable media.
3251 //
3252  __private_extern__
3253 void
3254 hfs_sync_ejectable(struct hfsmount *hfsmp)
3255 {
3256     // If we don't have a syncer or we get called by the syncer, just return
3257     if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3258         return;
3259
3260     hfs_syncer_lock(hfsmp);
3261
3262     if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3263         microuptime(&hfsmp->hfs_sync_req_oldest);
3264
3265     /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3266        don't want to queue again if there is a sync outstanding. */
3267     if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3268         hfs_syncer_unlock(hfsmp);
3269         return;
3270     }
3271
3272     hfsmp->hfs_sync_incomplete = TRUE;
3273
3274     thread_call_t syncer = hfsmp->hfs_syncer;
3275
3276     hfs_syncer_unlock(hfsmp);
3277
3278     hfs_syncer_queue(syncer);
3279 }
3280
3281 int
3282 hfs_start_transaction(struct hfsmount *hfsmp)
3283 {
3284         int ret = 0, unlock_on_err = 0;
3285         thread_t thread = current_thread();
3286
3287 #ifdef HFS_CHECK_LOCK_ORDER
3288         /*
3289          * You cannot start a transaction while holding a system
3290          * file lock. (unless the transaction is nested.)
3291          */
3292         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3293                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3294                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3295                 }
3296                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3297                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3298                 }
3299                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3300                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3301                 }
3302         }
3303 #endif /* HFS_CHECK_LOCK_ORDER */
3304
3305         if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
3306                 /*
3307                  * The global lock should be held shared if journal is
3308                  * active to prevent disabling.  If we're not the owner
3309                  * of the journal lock, verify that we're not already
3310                  * holding the global lock exclusive before moving on.
3311                  */
3312                 if (hfsmp->hfs_global_lockowner == thread) {
3313                         ret = EBUSY;
3314                         goto out;
3315                 }
3316
3317                 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3318                 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3319                 unlock_on_err = 1;
3320         }
3321
3322         /* If a downgrade to read-only mount is in progress, no other
3323          * thread than the downgrade thread is allowed to modify
3324          * the file system.
3325          */
3326         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3327             hfsmp->hfs_downgrading_thread != thread) {
3328                 ret = EROFS;
3329                 goto out;
3330         }
3331
3332         if (hfsmp->jnl) {
3333                 ret = journal_start_transaction(hfsmp->jnl);
3334                 if (ret == 0) {
3335                         OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
3336                 }
3337         } else {
3338                 ret = 0;
3339         }
3340
3341 out:
3342         if (ret != 0 && unlock_on_err) {
3343                 hfs_unlock_global (hfsmp);
3344                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3345         }
3346
3347     return ret;
3348 }
3349
3350 int
3351 hfs_end_transaction(struct hfsmount *hfsmp)
3352 {
3353     int need_unlock=0, ret;
3354
3355     if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
3356             && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
3357             need_unlock = 1;
3358     }
3359
3360         if (hfsmp->jnl) {
3361                 ret = journal_end_transaction(hfsmp->jnl);
3362         } else {
3363                 ret = 0;
3364         }
3365
3366         if (need_unlock) {
3367                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3368                 hfs_unlock_global (hfsmp);
3369                 hfs_sync_ejectable(hfsmp);
3370         }
3371
3372     return ret;
3373 }
3374
3375
3376 void
3377 hfs_journal_lock(struct hfsmount *hfsmp)
3378 {
3379         /* Only peek at hfsmp->jnl while holding the global lock */
3380         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3381         if (hfsmp->jnl) {
3382                 journal_lock(hfsmp->jnl);
3383         }
3384         hfs_unlock_global (hfsmp);
3385 }
3386
3387 void
3388 hfs_journal_unlock(struct hfsmount *hfsmp)
3389 {
3390         /* Only peek at hfsmp->jnl while holding the global lock */
3391         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3392         if (hfsmp->jnl) {
3393                 journal_unlock(hfsmp->jnl);
3394         }
3395         hfs_unlock_global (hfsmp);
3396 }
3397
3398 /*
3399  * Flush the contents of the journal to the disk.
3400  *
3401  *  Input:
3402  *      wait_for_IO -
3403  *      If TRUE, wait to write in-memory journal to the disk
3404  *      consistently, and also wait to write all asynchronous
3405  *      metadata blocks to its corresponding locations
3406  *      consistently on the disk.  This means that the journal
3407  *      is empty at this point and does not contain any
3408  *      transactions.  This is overkill in normal scenarios
3409  *      but is useful whenever the metadata blocks are required
3410  *      to be consistent on-disk instead of just the journal
3411  *      being consistent; like before live verification
3412  *      and live volume resizing.
3413  *
3414  *      If FALSE, only wait to write in-memory journal to the
3415  *      disk consistently.  This means that the journal still
3416  *      contains uncommitted transactions and the file system
3417  *      metadata blocks in the journal transactions might be
3418  *      written asynchronously to the disk.  But there is no
3419  *      guarantee that they are written to the disk before
3420  *      returning to the caller.  Note that this option is
3421  *      sufficient for file system data integrity as it
3422  *      guarantees consistent journal content on the disk.
3423  */
3424 int
3425 hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
3426 {
3427         int ret;
3428
3429         /* Only peek at hfsmp->jnl while holding the global lock */
3430         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3431         if (hfsmp->jnl) {
3432                 ret = journal_flush(hfsmp->jnl, wait_for_IO);
3433         } else {
3434                 ret = 0;
3435         }
3436         hfs_unlock_global (hfsmp);
3437
3438         return ret;
3439 }
3440
3441
3442 /*
3443  * hfs_erase_unused_nodes
3444  *
3445  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3446  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3447  * zeroes to the unused nodes.
3448  *
3449  * How do we detect when a volume needs this repair?  We can't always be
3450  * certain.  If a volume was created after a certain date, then it may have
3451  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3452  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3453  * that means that the entire first clump must have been written to, which means
3454  * there shouldn't be unused and unwritten nodes in that first clump, and this
3455  * repair is not needed.
3456  *
3457  * We have defined a bit in the Volume Header's attributes to indicate when the
3458  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3459  * As will fsck_hfs when it repairs the unused nodes.
3460  */
3461 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3462 {
3463         int result;
3464         struct filefork *catalog;
3465         int lockflags;
3466
3467         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3468         {
3469                 /* This volume has already been checked and repaired. */
3470                 return 0;
3471         }
3472
3473         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3474         {
3475                 /* This volume is too old to have had the problem. */
3476                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3477                 return 0;
3478         }
3479
3480         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3481         if (catalog->ff_size > catalog->ff_clumpsize)
3482         {
3483                 /* The entire first clump must have been in use at some point. */
3484                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3485                 return 0;
3486         }
3487
3488         /*
3489          * If we get here, we need to zero out those unused nodes.
3490          *
3491          * We start a transaction and lock the catalog since we're going to be
3492          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3493          * do its writing via the journal, because that would be too much I/O
3494          * to fit in a transaction, and it's a pain to break it up into multiple
3495          * transactions.  (It behaves more like growing a B-tree would.)
3496          */
3497         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3498         result = hfs_start_transaction(hfsmp);
3499         if (result)
3500                 goto done;
3501         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3502         result = BTZeroUnusedNodes(catalog);
3503         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3504         hfs_systemfile_unlock(hfsmp, lockflags);
3505         hfs_end_transaction(hfsmp);
3506         if (result == 0)
3507                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3508         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3509
3510 done:
3511         return result;
3512 }
3513
3514
3515 extern time_t snapshot_timestamp;
3516
3517 int
3518 check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3519 {
3520         int snapshot_error = 0;
3521
3522         if (vp == NULL) {
3523                 return 0;
3524         }
3525
3526         /* Swap files are special; skip them */
3527         if (vnode_isswap(vp)) {
3528                 return 0;
3529         }
3530
3531         if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3532                 // the change time is within this epoch
3533                 int error;
3534
3535                 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3536                 if (error == EDEADLK) {
3537                         snapshot_error = 0;
3538                 } else if (error) {
3539                         if (error == EAGAIN) {
3540                                 printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3541                         } else if (error == EINTR) {
3542                                 // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3543                                 snapshot_error = EINTR;
3544                         }
3545                 }
3546         }
3547
3548         if (snapshot_error) return snapshot_error;
3549
3550         return 0;
3551 }
3552
3553 int
3554 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3555 {
3556         int error;
3557
3558         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3559                 // there's nothing to do, it's not dataless
3560                 return 0;
3561         }
3562
3563         /* Swap files are special; ignore them */
3564         if (vnode_isswap(vp)) {
3565                 return 0;
3566         }
3567
3568         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3569         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3570         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3571                 error = 0;
3572         } else if (error) {
3573                 if (error == EAGAIN) {
3574                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3575                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3576                         return 0;
3577                 } else if (error == EINTR) {
3578                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3579                         return EINTR;
3580                 }
3581         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3582                 //
3583                 // if we're here, the dataless bit is still set on the file
3584                 // which means it didn't get handled.  we return an error
3585                 // but it's presently ignored by all callers of this function.
3586                 //
3587                 // XXXdbg - EDATANOTPRESENT is what we really need...
3588                 //
3589                 return EBADF;
3590         }
3591
3592         return error;
3593 }
3594
3595
3596 //
3597 // NOTE: this function takes care of starting a transaction and
3598 //       acquiring the systemfile lock so that it can call
3599 //       cat_update().
3600 //
3601 // NOTE: do NOT hold and cnode locks while calling this function
3602 //       to avoid deadlocks (because we take a lock on the root
3603 //       cnode)
3604 //
3605 int
3606 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3607 {
3608         struct vnode *rvp;
3609         struct cnode *cp;
3610         int error;
3611
3612         error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3613         if (error) {
3614                 return error;
3615         }
3616
3617         cp = VTOC(rvp);
3618         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3619                 return error;
3620         }
3621         struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3622
3623         int lockflags;
3624         if (hfs_start_transaction(hfsmp) != 0) {
3625                 return error;
3626         }
3627         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3628
3629         if (extinfo->document_id == 0) {
3630                 // initialize this to start at 3 (one greater than the root-dir id)
3631                 extinfo->document_id = 3;
3632         }
3633
3634         *docid = extinfo->document_id++;
3635
3636         // mark the root cnode dirty
3637         cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
3638         (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
3639
3640         hfs_systemfile_unlock (hfsmp, lockflags);
3641         (void) hfs_end_transaction(hfsmp);
3642
3643         (void) hfs_unlock(cp);
3644
3645         vnode_put(rvp);
3646         rvp = NULL;
3647
3648         return 0;
3649 }
3650
3651
3652 /*
3653  * Return information about number of file system allocation blocks
3654  * taken by metadata on a volume.
3655  *
3656  * This function populates struct hfsinfo_metadata with allocation blocks
3657  * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3658  * journal file, and sum of all of the above.
3659  */
3660 int
3661 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3662 {
3663         int lockflags = 0;
3664         int ret_lockflags = 0;
3665
3666         /* Zero out the output buffer */
3667         bzero(hinfo, sizeof(struct hfsinfo_metadata));
3668
3669         /*
3670          * Getting number of allocation blocks for all btrees
3671          * should be a quick operation, so we grab locks for
3672          * all of them at the same time
3673          */
3674         lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3675         ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3676         /*
3677          * Make sure that we were able to acquire all locks requested
3678          * to protect us against conditions like unmount in progress.
3679          */
3680         if ((lockflags & ret_lockflags) != lockflags) {
3681                 /* Release any locks that were acquired */
3682                 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3683                 return EPERM;
3684         }
3685
3686         /* Get information about all the btrees */
3687         hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3688         hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3689         hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3690         hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3691
3692         /* Done with btrees, give up the locks */
3693         hfs_systemfile_unlock(hfsmp, ret_lockflags);
3694
3695         /* Get information about journal file */
3696         hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3697
3698         /* Calculate total number of metadata blocks */
3699         hinfo->total = hinfo->extents + hinfo->catalog +
3700                         hinfo->allocation + hinfo->attribute +
3701                         hinfo->journal;
3702
3703         return 0;
3704 }
3705
3706 static int
3707 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3708 {
3709         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3710
3711         return 0;
3712 }
3713
3714 __private_extern__
3715 int hfs_freeze(struct hfsmount *hfsmp)
3716 {
3717         // First make sure some other process isn't freezing
3718         hfs_lock_mount(hfsmp);
3719         while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3720                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3721                                    PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3722                         hfs_unlock_mount(hfsmp);
3723                         return EINTR;
3724                 }
3725         }
3726
3727         // Stop new syncers from starting
3728         hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3729
3730         // Now wait for all syncers to finish
3731         while (hfsmp->hfs_syncers) {
3732                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3733                            PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3734                         hfs_thaw_locked(hfsmp);
3735                         hfs_unlock_mount(hfsmp);
3736                         return EINTR;
3737                 }
3738         }
3739         hfs_unlock_mount(hfsmp);
3740
3741         // flush things before we get started to try and prevent
3742         // dirty data from being paged out while we're frozen.
3743         // note: we can't do this once we're in the freezing state because
3744         // other threads will need to take the global lock
3745         vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3746
3747         // Block everything in hfs_lock_global now
3748         hfs_lock_mount(hfsmp);
3749         hfsmp->hfs_freeze_state = HFS_FREEZING;
3750         hfsmp->hfs_freezing_thread = current_thread();
3751         hfs_unlock_mount(hfsmp);
3752
3753         /* Take the exclusive lock to flush out anything else that
3754            might have the global lock at the moment and also so we
3755            can flush the journal. */
3756         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3757         journal_flush(hfsmp->jnl, TRUE);
3758         hfs_unlock_global(hfsmp);
3759
3760         // don't need to iterate on all vnodes, we just need to
3761         // wait for writes to the system files and the device vnode
3762         //
3763         // Now that journal flush waits for all metadata blocks to
3764         // be written out, waiting for btree writes is probably no
3765         // longer required.
3766         if (HFSTOVCB(hfsmp)->extentsRefNum)
3767                 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3768         if (HFSTOVCB(hfsmp)->catalogRefNum)
3769                 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3770         if (HFSTOVCB(hfsmp)->allocationsRefNum)
3771                 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3772         if (hfsmp->hfs_attribute_vp)
3773                 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3774         vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3775
3776         // We're done, mark frozen
3777         hfs_lock_mount(hfsmp);
3778         hfsmp->hfs_freeze_state  = HFS_FROZEN;
3779         hfsmp->hfs_freezing_proc = current_proc();
3780         hfs_unlock_mount(hfsmp);
3781
3782         return 0;
3783 }
3784
3785 __private_extern__
3786 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
3787 {
3788         hfs_lock_mount(hfsmp);
3789
3790         if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
3791                 hfs_unlock_mount(hfsmp);
3792                 return EINVAL;
3793         }
3794         if (process && hfsmp->hfs_freezing_proc != process) {
3795                 hfs_unlock_mount(hfsmp);
3796                 return EPERM;
3797         }
3798
3799         hfs_thaw_locked(hfsmp);
3800
3801         hfs_unlock_mount(hfsmp);
3802
3803         return 0;
3804 }
3805
3806 static void hfs_thaw_locked(struct hfsmount *hfsmp)
3807 {
3808         hfsmp->hfs_freezing_proc = NULL;
3809         hfsmp->hfs_freeze_state = HFS_THAWED;
3810
3811         wakeup(&hfsmp->hfs_freeze_state);
3812 }