bsd/hfs/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/buf.h>
  43 #include <sys/buf_internal.h>
  44 #include <sys/ubc.h>
  45 #include <sys/unistd.h>
  46 #include <sys/utfconv.h>
  47 #include <sys/kauth.h>
  48 #include <sys/fcntl.h>
  49 #include <sys/vnode_internal.h>
  50 #include <kern/clock.h>
  51
  52 #include <libkern/OSAtomic.h>
  53
  54 #include "hfs.h"
  55 #include "hfs_catalog.h"
  56 #include "hfs_dbg.h"
  57 #include "hfs_mount.h"
  58 #include "hfs_endian.h"
  59 #include "hfs_cnode.h"
  60 #include "hfs_fsctl.h"
  61
  62 #include "hfscommon/headers/FileMgrInternal.h"
  63 #include "hfscommon/headers/BTreesInternal.h"
  64 #include "hfscommon/headers/HFSUnicodeWrappers.h"
  65
  66 static void ReleaseMetaFileVNode(struct vnode *vp);
  67 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  68
  69 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  70
  71
  72 //*******************************************************************************
  73 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  74 //       hence are not in the right byte order on little endian machines. It is
  75 //       the responsibility of the finder and other clients to swap the data.
  76 //*******************************************************************************
  77
  78 //*******************************************************************************
  79 //      Routine:        hfs_MountHFSVolume
  80 //
  81 //
  82 //*******************************************************************************
  83 unsigned char hfs_catname[] = "Catalog B-tree";
  84 unsigned char hfs_extname[] = "Extents B-tree";
  85 unsigned char hfs_vbmname[] = "Volume Bitmap";
  86 unsigned char hfs_attrname[] = "Attribute B-tree";
  87 unsigned char hfs_startupname[] = "Startup File";
  88
  89
  90 __private_extern__
  91 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
  92                 __unused struct proc *p)
  93 {
  94         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
  95         int error;
  96         ByteCount utf8chars;
  97         struct cat_desc cndesc;
  98         struct cat_attr cnattr;
  99         struct cat_fork fork;
 100
 101         /* Block size must be a multiple of 512 */
 102         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 103             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 104                 return (EINVAL);
 105
 106         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 107         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 108             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 109                 return (EINVAL);
 110         }
 111         hfsmp->hfs_flags |= HFS_STANDARD;
 112         /*
 113          * The MDB seems OK: transfer info from it into VCB
 114          * Note - the VCB starts out clear (all zeros)
 115          *
 116          */
 117         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 118         vcb->vcbCrDate          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 119         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 120         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 121         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 122         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 123         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 124         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 125         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 126         vcb->allocLimit         = vcb->totalBlocks;
 127         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 128         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 129         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 130         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 131         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 132         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 133         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 134         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 135         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 136         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 137         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 138         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 139                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 140
 141         /* convert hfs encoded name into UTF-8 string */
 142         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 143         /*
 144          * When an HFS name cannot be encoded with the current
 145          * volume encoding we use MacRoman as a fallback.
 146          */
 147         if (error || (utf8chars == 0))
 148                 (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 149
 150         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 151         vcb->vcbVBMIOSize = kHFSBlockSize;
 152
 153         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 154                                                   hfsmp->hfs_logical_block_count);
 155
 156         bzero(&cndesc, sizeof(cndesc));
 157         cndesc.cd_parentcnid = kHFSRootParentID;
 158         cndesc.cd_flags |= CD_ISMETA;
 159         bzero(&cnattr, sizeof(cnattr));
 160         cnattr.ca_linkcount = 1;
 161         cnattr.ca_mode = S_IFREG;
 162         bzero(&fork, sizeof(fork));
 163
 164         /*
 165          * Set up Extents B-tree vnode
 166          */
 167         cndesc.cd_nameptr = hfs_extname;
 168         cndesc.cd_namelen = strlen((char *)hfs_extname);
 169         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 170         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 171         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 172         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 173         fork.cf_vblocks = 0;
 174         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 175         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 176         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 177         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 178         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 179         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 180         cnattr.ca_blocks = fork.cf_blocks;
 181
 182         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 183                                 &hfsmp->hfs_extents_vp);
 184         if (error) goto MtVolErr;
 185         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 186                                          (KeyCompareProcPtr)CompareExtentKeys));
 187         if (error) {
 188                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 189                 goto MtVolErr;
 190         }
 191         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 192
 193         /*
 194          * Set up Catalog B-tree vnode...
 195          */
 196         cndesc.cd_nameptr = hfs_catname;
 197         cndesc.cd_namelen = strlen((char *)hfs_catname);
 198         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 199         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 200         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 201         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 202         fork.cf_vblocks = 0;
 203         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 204         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 205         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 206         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 207         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 208         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 209         cnattr.ca_blocks = fork.cf_blocks;
 210
 211         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 212                                 &hfsmp->hfs_catalog_vp);
 213         if (error) {
 214                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 215                 goto MtVolErr;
 216         }
 217         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 218                                          (KeyCompareProcPtr)CompareCatalogKeys));
 219         if (error) {
 220                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 221                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 222                 goto MtVolErr;
 223         }
 224         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 225
 226         /*
 227          * Set up dummy Allocation file vnode (used only for locking bitmap)
 228          */
 229         cndesc.cd_nameptr = hfs_vbmname;
 230         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 231         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 232         bzero(&fork, sizeof(fork));
 233         cnattr.ca_blocks = 0;
 234
 235         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 236                                  &hfsmp->hfs_allocation_vp);
 237         if (error) {
 238                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 239                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 240                 goto MtVolErr;
 241         }
 242         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 243
 244         /* mark the volume dirty (clear clean unmount bit) */
 245         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 246
 247     if (error == noErr)
 248       {
 249                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
 250       }
 251
 252     if ( error == noErr )
 253       {
 254         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )             //      if the disk is not write protected
 255           {
 256             MarkVCBDirty( vcb );                                                                //      mark VCB dirty so it will be written
 257           }
 258       }
 259
 260         /*
 261          * all done with system files so we can unlock now...
 262          */
 263         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 264         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 265         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 266
 267     goto        CmdDone;
 268
 269     //--        Release any resources allocated so far before exiting with an error:
 270 MtVolErr:
 271         ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 272         ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 273
 274 CmdDone:
 275     return (error);
 276 }
 277
 278 //*******************************************************************************
 279 //      Routine:        hfs_MountHFSPlusVolume
 280 //
 281 //
 282 //*******************************************************************************
 283
 284 __private_extern__
 285 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 286         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 287 {
 288         register ExtendedVCB *vcb;
 289         struct cat_desc cndesc;
 290         struct cat_attr cnattr;
 291         struct cat_fork cfork;
 292         u_int32_t blockSize;
 293         daddr64_t spare_sectors;
 294         struct BTreeInfoRec btinfo;
 295         u_int16_t  signature;
 296         u_int16_t  hfs_version;
 297         int  i;
 298         OSErr retval;
 299
 300         signature = SWAP_BE16(vhp->signature);
 301         hfs_version = SWAP_BE16(vhp->version);
 302
 303         if (signature == kHFSPlusSigWord) {
 304                 if (hfs_version != kHFSPlusVersion) {
 305                         printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version);
 306                         return (EINVAL);
 307                 }
 308         } else if (signature == kHFSXSigWord) {
 309                 if (hfs_version != kHFSXVersion) {
 310                         printf("hfs_mount: invalid HFSX version: %d\n", hfs_version);
 311                         return (EINVAL);
 312                 }
 313                 /* The in-memory signature is always 'H+'. */
 314                 signature = kHFSPlusSigWord;
 315                 hfsmp->hfs_flags |= HFS_X;
 316         } else {
 317                 /* Removed printf for invalid HFS+ signature because it gives
 318                  * false error for UFS root volume
 319                  */
 320                 return (EINVAL);
 321         }
 322
 323         /* Block size must be at least 512 and a power of 2 */
 324         blockSize = SWAP_BE32(vhp->blockSize);
 325         if (blockSize < 512 || !powerof2(blockSize))
 326                 return (EINVAL);
 327
 328         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 329         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 330             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0)
 331                 return (EINVAL);
 332
 333         /* Make sure we can live with the physical block size. */
 334         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 335             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
 336             (blockSize < hfsmp->hfs_logical_block_size)) {
 337                 return (ENXIO);
 338         }
 339
 340         /* If allocation block size is less than the physical
 341          * block size, we assume that the physical block size
 342          * is same as logical block size.  The physical block
 343          * size value is used to round down the offsets for
 344          * reading and writing the primary and alternate volume
 345          * headers at physical block boundary and will cause
 346          * problems if it is less than the block size.
 347          */
 348         if (blockSize < hfsmp->hfs_physical_block_size) {
 349                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 350                 hfsmp->hfs_log_per_phys = 1;
 351         }
 352
 353         /*
 354          * The VolumeHeader seems OK: transfer info from it into VCB
 355          * Note - the VCB starts out clear (all zeros)
 356          */
 357         vcb = HFSTOVCB(hfsmp);
 358
 359         vcb->vcbSigWord = signature;
 360         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 361         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 362         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 363         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 364         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 365         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 366         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 367         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 368         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 369
 370         /* copy 32 bytes of Finder info */
 371         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 372
 373         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 374         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 375                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 376
 377         /* Now fill in the Extended VCB info */
 378         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 379         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 380         vcb->allocLimit         = vcb->totalBlocks;
 381         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 382         vcb->blockSize          = blockSize;
 383         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 384         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 385
 386         vcb->hfsPlusIOPosOffset = embeddedOffset;
 387
 388         /* Default to no free block reserve */
 389         vcb->reserveBlocks = 0;
 390
 391         /*
 392          * Update the logical block size in the mount struct
 393          * (currently set up from the wrapper MDB) using the
 394          * new blocksize value:
 395          */
 396         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 397         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 398
 399         /*
 400          * Validate and initialize the location of the alternate volume header.
 401          */
 402         spare_sectors = hfsmp->hfs_logical_block_count -
 403                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 404                            hfsmp->hfs_logical_block_size);
 405
 406         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 407                 hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
 408         } else {
 409                 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 410                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 411                                                           hfsmp->hfs_logical_block_count);
 412         }
 413
 414         bzero(&cndesc, sizeof(cndesc));
 415         cndesc.cd_parentcnid = kHFSRootParentID;
 416         cndesc.cd_flags |= CD_ISMETA;
 417         bzero(&cnattr, sizeof(cnattr));
 418         cnattr.ca_linkcount = 1;
 419         cnattr.ca_mode = S_IFREG;
 420
 421         /*
 422          * Set up Extents B-tree vnode
 423          */
 424         cndesc.cd_nameptr = hfs_extname;
 425         cndesc.cd_namelen = strlen((char *)hfs_extname);
 426         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 427
 428         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 429         cfork.cf_new_size= 0;
 430         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 431         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 432         cfork.cf_vblocks = 0;
 433         cnattr.ca_blocks = cfork.cf_blocks;
 434         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 435                 cfork.cf_extents[i].startBlock =
 436                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 437                 cfork.cf_extents[i].blockCount =
 438                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 439         }
 440         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 441                                  &hfsmp->hfs_extents_vp);
 442         if (retval)
 443         {
 444                 goto ErrorExit;
 445         }
 446         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 447         hfs_unlock(hfsmp->hfs_extents_cp);
 448
 449         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 450                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 451         if (retval)
 452         {
 453                 goto ErrorExit;
 454         }
 455         /*
 456          * Set up Catalog B-tree vnode
 457          */
 458         cndesc.cd_nameptr = hfs_catname;
 459         cndesc.cd_namelen = strlen((char *)hfs_catname);
 460         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 461
 462         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 463         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 464         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 465         cfork.cf_vblocks = 0;
 466         cnattr.ca_blocks = cfork.cf_blocks;
 467         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 468                 cfork.cf_extents[i].startBlock =
 469                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 470                 cfork.cf_extents[i].blockCount =
 471                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 472         }
 473         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 474                                  &hfsmp->hfs_catalog_vp);
 475         if (retval) {
 476                 goto ErrorExit;
 477         }
 478         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 479         hfs_unlock(hfsmp->hfs_catalog_cp);
 480
 481         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 482                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 483         if (retval) {
 484                 goto ErrorExit;
 485         }
 486         if ((hfsmp->hfs_flags & HFS_X) &&
 487             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 488                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 489                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 490                         /* Install a case-sensitive key compare */
 491                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 492                                           (KeyCompareProcPtr)cat_binarykeycompare);
 493                 }
 494         }
 495
 496         /*
 497          * Set up Allocation file vnode
 498          */
 499         cndesc.cd_nameptr = hfs_vbmname;
 500         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 501         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 502
 503         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 504         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 505         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 506         cfork.cf_vblocks = 0;
 507         cnattr.ca_blocks = cfork.cf_blocks;
 508         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 509                 cfork.cf_extents[i].startBlock =
 510                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 511                 cfork.cf_extents[i].blockCount =
 512                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 513         }
 514         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 515                                  &hfsmp->hfs_allocation_vp);
 516         if (retval) {
 517                 goto ErrorExit;
 518         }
 519         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 520         hfs_unlock(hfsmp->hfs_allocation_cp);
 521
 522         /*
 523          * Set up Attribute B-tree vnode
 524          */
 525         if (vhp->attributesFile.totalBlocks != 0) {
 526                 cndesc.cd_nameptr = hfs_attrname;
 527                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 528                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 529
 530                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 531                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 532                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 533                 cfork.cf_vblocks = 0;
 534                 cnattr.ca_blocks = cfork.cf_blocks;
 535                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 536                         cfork.cf_extents[i].startBlock =
 537                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 538                         cfork.cf_extents[i].blockCount =
 539                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 540                 }
 541                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 542                                          &hfsmp->hfs_attribute_vp);
 543                 if (retval) {
 544                         goto ErrorExit;
 545                 }
 546                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 547                 hfs_unlock(hfsmp->hfs_attribute_cp);
 548                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 549                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 550                 if (retval) {
 551                         goto ErrorExit;
 552                 }
 553         }
 554
 555         /*
 556          * Set up Startup file vnode
 557          */
 558         if (vhp->startupFile.totalBlocks != 0) {
 559                 cndesc.cd_nameptr = hfs_startupname;
 560                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 561                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 562
 563                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 564                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 565                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 566                 cfork.cf_vblocks = 0;
 567                 cnattr.ca_blocks = cfork.cf_blocks;
 568                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 569                         cfork.cf_extents[i].startBlock =
 570                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 571                         cfork.cf_extents[i].blockCount =
 572                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 573                 }
 574                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 575                                          &hfsmp->hfs_startup_vp);
 576                 if (retval) {
 577                         goto ErrorExit;
 578                 }
 579                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 580                 hfs_unlock(hfsmp->hfs_startup_cp);
 581         }
 582
 583         /* Pick up volume name and create date */
 584         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL);
 585         if (retval) {
 586                 goto ErrorExit;
 587         }
 588         vcb->vcbCrDate = cnattr.ca_itime;
 589         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 590         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 591         cat_releasedesc(&cndesc);
 592
 593         /* mark the volume dirty (clear clean unmount bit) */
 594         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 595         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 596                 hfs_flushvolumeheader(hfsmp, TRUE, 0);
 597         }
 598
 599         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 600         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 601                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 602         }
 603
 604         //
 605         // Check if we need to do late journal initialization.  This only
 606         // happens if a previous version of MacOS X (or 9) touched the disk.
 607         // In that case hfs_late_journal_init() will go re-locate the journal
 608         // and journal_info_block files and validate that they're still kosher.
 609         //
 610         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 611                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 612                 && (hfsmp->jnl == NULL)) {
 613
 614                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 615                 if (retval != 0) {
 616                         if (retval == EROFS) {
 617                                 // EROFS is a special error code that means the volume has an external
 618                                 // journal which we couldn't find.  in that case we do not want to
 619                                 // rewrite the volume header - we'll just refuse to mount the volume.
 620                                 retval = EINVAL;
 621                                 goto ErrorExit;
 622                         }
 623
 624                         hfsmp->jnl = NULL;
 625
 626                         // if the journal failed to open, then set the lastMountedVersion
 627                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 628                         // of just bailing out because the volume is journaled.
 629                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 630                                 HFSPlusVolumeHeader *jvhp;
 631                                 daddr64_t mdb_offset;
 632                                 struct buf *bp = NULL;
 633
 634                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 635
 636                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 637
 638                                 bp = NULL;
 639                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 640                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 641                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 642                                 if (retval == 0) {
 643                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 644
 645                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 646                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 647                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 648                                                 buf_bwrite(bp);
 649                                         } else {
 650                                                 buf_brelse(bp);
 651                                         }
 652                                         bp = NULL;
 653                                 } else if (bp) {
 654                                         buf_brelse(bp);
 655                                         // clear this so the error exit path won't try to use it
 656                                         bp = NULL;
 657                             }
 658                         }
 659
 660                         retval = EINVAL;
 661                         goto ErrorExit;
 662                 } else if (hfsmp->jnl) {
 663                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 664                 }
 665         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 666                 struct cat_attr jinfo_attr, jnl_attr;
 667
 668                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 669                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 670                 }
 671
 672                 // if we're here we need to fill in the fileid's for the
 673                 // journal and journal_info_block.
 674                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 675                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 676                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 677                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 678                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 679                 }
 680
 681                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 682                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 683                 }
 684
 685                 if (hfsmp->jnl == NULL) {
 686                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 687                 }
 688         }
 689
 690         /*
 691          * Establish a metadata allocation zone.
 692          */
 693         hfs_metadatazone_init(hfsmp);
 694
 695         /*
 696          * Make any metadata zone adjustments.
 697          */
 698         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
 699                 /* Keep the roving allocator out of the metadata zone. */
 700                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
 701                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
 702                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
 703                 }
 704         } else {
 705                 if (vcb->nextAllocation <= 1) {
 706                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
 707                 }
 708         }
 709         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
 710
 711         /* Setup private/hidden directories for hardlinks. */
 712         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 713         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 714
 715         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 716                 hfs_remove_orphans(hfsmp);
 717
 718         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 719         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 720         {
 721                 retval = hfs_erase_unused_nodes(hfsmp);
 722                 if (retval)
 723                         goto ErrorExit;
 724         }
 725
 726         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 727         {
 728                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 729         }
 730
 731         /*
 732          * Allow hot file clustering if conditions allow.
 733          */
 734         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
 735             ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 736             ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
 737                 (void) hfs_recording_init(hfsmp);
 738         }
 739
 740         /* Force ACLs on HFS+ file systems. */
 741         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 742
 743         /* Check if volume supports writing of extent-based extended attributes */
 744         hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE);
 745
 746         return (0);
 747
 748 ErrorExit:
 749         /*
 750          * A fatal error occurred and the volume cannot be mounted
 751          * release any resources that we aquired...
 752          */
 753         if (hfsmp->hfs_attribute_vp)
 754                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
 755         ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
 756         ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 757         ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 758
 759         return (retval);
 760 }
 761
 762
 763 /*
 764  * ReleaseMetaFileVNode
 765  *
 766  * vp   L - -
 767  */
 768 static void ReleaseMetaFileVNode(struct vnode *vp)
 769 {
 770         struct filefork *fp;
 771
 772         if (vp && (fp = VTOF(vp))) {
 773                 if (fp->fcbBTCBPtr != NULL) {
 774                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
 775                         (void) BTClosePath(fp);
 776                         hfs_unlock(VTOC(vp));
 777                 }
 778
 779                 /* release the node even if BTClosePath fails */
 780                 vnode_recycle(vp);
 781                 vnode_put(vp);
 782         }
 783 }
 784
 785
 786 /*************************************************************
 787 *
 788 * Unmounts a hfs volume.
 789 *       At this point vflush() has been called (to dump all non-metadata files)
 790 *
 791 *************************************************************/
 792
 793 __private_extern__
 794 int
 795 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
 796 {
 797         /* Get rid of our attribute data vnode (if any). */
 798         if (hfsmp->hfs_attrdata_vp) {
 799                 vnode_t advp = hfsmp->hfs_attrdata_vp;
 800
 801                 if (vnode_get(advp) == 0) {
 802                         vnode_rele_ext(advp, O_EVTONLY, 0);
 803                         vnode_put(advp);
 804                 }
 805                 hfsmp->hfs_attrdata_vp = NULLVP;
 806         }
 807
 808         if (hfsmp->hfs_startup_vp)
 809                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
 810
 811         if (hfsmp->hfs_allocation_vp)
 812                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
 813
 814         if (hfsmp->hfs_attribute_vp)
 815                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
 816
 817         ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 818         ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 819
 820         /*
 821          * Setting these pointers to NULL so that any references
 822          * past this point will fail, and tell us the point of failure.
 823          * Also, facilitates a check in hfs_update for a null catalog
 824          * vp
 825          */
 826         hfsmp->hfs_allocation_vp = NULL;
 827         hfsmp->hfs_attribute_vp = NULL;
 828         hfsmp->hfs_catalog_vp = NULL;
 829         hfsmp->hfs_extents_vp = NULL;
 830         hfsmp->hfs_startup_vp = NULL;
 831
 832         return (0);
 833 }
 834
 835
 836 /*
 837  * Test if fork has overflow extents.
 838  */
 839 __private_extern__
 840 int
 841 overflow_extents(struct filefork *fp)
 842 {
 843         u_int32_t blocks;
 844
 845         //
 846         // If the vnode pointer is NULL then we're being called
 847         // from hfs_remove_orphans() with a faked-up filefork
 848         // and therefore it has to be an HFS+ volume.  Otherwise
 849         // we check through the volume header to see what type
 850         // of volume we're on.
 851         //
 852         if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
 853                 if (fp->ff_extents[7].blockCount == 0)
 854                         return (0);
 855
 856                 blocks = fp->ff_extents[0].blockCount +
 857                          fp->ff_extents[1].blockCount +
 858                          fp->ff_extents[2].blockCount +
 859                          fp->ff_extents[3].blockCount +
 860                          fp->ff_extents[4].blockCount +
 861                          fp->ff_extents[5].blockCount +
 862                          fp->ff_extents[6].blockCount +
 863                          fp->ff_extents[7].blockCount;
 864         } else {
 865                 if (fp->ff_extents[2].blockCount == 0)
 866                         return false;
 867
 868                 blocks = fp->ff_extents[0].blockCount +
 869                          fp->ff_extents[1].blockCount +
 870                          fp->ff_extents[2].blockCount;
 871           }
 872
 873         return (fp->ff_blocks > blocks);
 874 }
 875
 876
 877 /*
 878  * Lock HFS system file(s).
 879  */
 880 __private_extern__
 881 int
 882 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
 883 {
 884         /*
 885          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
 886          */
 887         if (flags & SFL_CATALOG) {
 888
 889 #ifdef HFS_CHECK_LOCK_ORDER
 890                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
 891                         panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
 892                 }
 893                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
 894                         panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
 895                 }
 896                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
 897                         panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
 898                 }
 899 #endif /* HFS_CHECK_LOCK_ORDER */
 900
 901                 (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
 902                 /*
 903                  * When the catalog file has overflow extents then
 904                  * also acquire the extents b-tree lock if its not
 905                  * already requested.
 906                  */
 907                 if ((flags & SFL_EXTENTS) == 0 &&
 908                     overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) {
 909                         flags |= SFL_EXTENTS;
 910                 }
 911         }
 912         if (flags & SFL_ATTRIBUTE) {
 913
 914 #ifdef HFS_CHECK_LOCK_ORDER
 915                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
 916                         panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
 917                 }
 918                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
 919                         panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
 920                 }
 921 #endif /* HFS_CHECK_LOCK_ORDER */
 922
 923                 if (hfsmp->hfs_attribute_cp) {
 924                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype);
 925                         /*
 926                          * When the attribute file has overflow extents then
 927                          * also acquire the extents b-tree lock if its not
 928                          * already requested.
 929                          */
 930                         if ((flags & SFL_EXTENTS) == 0 &&
 931                             overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) {
 932                                 flags |= SFL_EXTENTS;
 933                         }
 934                 } else {
 935                         flags &= ~SFL_ATTRIBUTE;
 936                 }
 937         }
 938         if (flags & SFL_STARTUP) {
 939 #ifdef HFS_CHECK_LOCK_ORDER
 940                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
 941                         panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
 942                 }
 943 #endif /* HFS_CHECK_LOCK_ORDER */
 944
 945                 (void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
 946                 /*
 947                  * When the startup file has overflow extents then
 948                  * also acquire the extents b-tree lock if its not
 949                  * already requested.
 950                  */
 951                 if ((flags & SFL_EXTENTS) == 0 &&
 952                     overflow_extents(VTOF(hfsmp->hfs_startup_vp))) {
 953                         flags |= SFL_EXTENTS;
 954                 }
 955         }
 956         /*
 957          * To prevent locks being taken in the wrong order, the extent lock
 958          * gets a bitmap lock as well.
 959          */
 960         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
 961                 /*
 962                  * Since the only bitmap operations are clearing and
 963                  * setting bits we always need exclusive access. And
 964                  * when we have a journal, we can "hide" behind that
 965                  * lock since we can only change the bitmap from
 966                  * within a transaction.
 967                  */
 968                 if (hfsmp->jnl || (hfsmp->hfs_allocation_cp == NULL)) {
 969                         flags &= ~SFL_BITMAP;
 970                 } else {
 971                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
 972                         /* The bitmap lock is also grabbed when only extent lock
 973                          * was requested. Set the bitmap lock bit in the lock
 974                          * flags which callers will use during unlock.
 975                          */
 976                         flags |= SFL_BITMAP;
 977                 }
 978         }
 979         if (flags & SFL_EXTENTS) {
 980                 /*
 981                  * Since the extents btree lock is recursive we always
 982                  * need exclusive access.
 983                  */
 984                 (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
 985         }
 986         return (flags);
 987 }
 988
 989 /*
 990  * unlock HFS system file(s).
 991  */
 992 __private_extern__
 993 void
 994 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
 995 {
 996         struct timeval tv;
 997         u_int32_t lastfsync;
 998         int numOfLockedBuffs;
 999
1000         if (hfsmp->jnl == NULL) {
1001                 microuptime(&tv);
1002                 lastfsync = tv.tv_sec;
1003         }
1004         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1005                 hfs_unlock(hfsmp->hfs_startup_cp);
1006         }
1007         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1008                 if (hfsmp->jnl == NULL) {
1009                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1010                         numOfLockedBuffs = count_lock_queue();
1011                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1012                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1013                               kMaxSecsForFsync))) {
1014                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1015                         }
1016                 }
1017                 hfs_unlock(hfsmp->hfs_attribute_cp);
1018         }
1019         if (flags & SFL_CATALOG) {
1020                 if (hfsmp->jnl == NULL) {
1021                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1022                         numOfLockedBuffs = count_lock_queue();
1023                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1024                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1025                               kMaxSecsForFsync))) {
1026                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1027                         }
1028                 }
1029                 hfs_unlock(hfsmp->hfs_catalog_cp);
1030         }
1031         if (flags & SFL_BITMAP) {
1032                 hfs_unlock(hfsmp->hfs_allocation_cp);
1033         }
1034         if (flags & SFL_EXTENTS) {
1035                 if (hfsmp->jnl == NULL) {
1036                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1037                         numOfLockedBuffs = count_lock_queue();
1038                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1039                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1040                               kMaxSecsForFsync))) {
1041                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1042                         }
1043                 }
1044                 hfs_unlock(hfsmp->hfs_extents_cp);
1045         }
1046 }
1047
1048
1049 /*
1050  * RequireFileLock
1051  *
1052  * Check to see if a vnode is locked in the current context
1053  * This is to be used for debugging purposes only!!
1054  */
1055 #if HFS_DIAGNOSTIC
1056 void RequireFileLock(FileReference vp, int shareable)
1057 {
1058         int locked;
1059
1060         /* The extents btree and allocation bitmap are always exclusive. */
1061         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1062             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1063                 shareable = 0;
1064         }
1065
1066         locked = VTOC(vp)->c_lockowner == (void *)current_thread();
1067
1068         if (!locked && !shareable) {
1069                 switch (VTOC(vp)->c_fileid) {
1070                 case kHFSExtentsFileID:
1071                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1072                         break;
1073                 case kHFSCatalogFileID:
1074                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1075                         break;
1076                 case kHFSAllocationFileID:
1077                         /* The allocation file can hide behind the jornal lock. */
1078                         if (VTOHFS(vp)->jnl == NULL)
1079                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1080                         break;
1081                 case kHFSStartupFileID:
1082                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1083                 case kHFSAttributesFileID:
1084                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1085                         break;
1086                 }
1087         }
1088 }
1089 #endif
1090
1091
1092 /*
1093  * There are three ways to qualify for ownership rights on an object:
1094  *
1095  * 1. (a) Your UID matches the cnode's UID.
1096  *    (b) The object in question is owned by "unknown"
1097  * 2. (a) Permissions on the filesystem are being ignored and
1098  *        your UID matches the replacement UID.
1099  *    (b) Permissions on the filesystem are being ignored and
1100  *        the replacement UID is "unknown".
1101  * 3. You are root.
1102  *
1103  */
1104 int
1105 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1106                 __unused struct proc *p, int invokesuperuserstatus)
1107 {
1108         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1109             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1110             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1111               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1112                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1113             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1114                 return (0);
1115         } else {
1116                 return (EPERM);
1117         }
1118 }
1119
1120
1121 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1122                                u_int32_t blockSizeLimit,
1123                                u_int32_t baseMultiple) {
1124     /*
1125        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1126        specified limit but still an even multiple of the baseMultiple.
1127      */
1128     int baseBlockCount, blockCount;
1129     u_int32_t trialBlockSize;
1130
1131     if (allocationBlockSize % baseMultiple != 0) {
1132         /*
1133            Whoops: the allocation blocks aren't even multiples of the specified base:
1134            no amount of dividing them into even parts will be a multiple, either then!
1135         */
1136         return 512;             /* Hope for the best */
1137     };
1138
1139     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1140        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1141        Even though the former (the result of the loop below) is the larger allocation
1142        block size, the latter is more efficient: */
1143     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1144
1145     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1146     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1147
1148     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1149         trialBlockSize = blockCount * baseMultiple;
1150         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1151             if ((trialBlockSize <= blockSizeLimit) &&
1152                 (trialBlockSize % baseMultiple == 0)) {
1153                 return trialBlockSize;
1154             };
1155         };
1156     };
1157
1158     /* Note: we should never get here, since blockCount = 1 should always work,
1159        but this is nice and safe and makes the compiler happy, too ... */
1160     return 512;
1161 }
1162
1163
1164 __private_extern__
1165 u_int32_t
1166 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1167                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1168 {
1169         struct hfsmount * hfsmp;
1170         struct cat_desc jdesc;
1171         int lockflags;
1172         int error;
1173
1174         if (vcb->vcbSigWord != kHFSPlusSigWord)
1175                 return (0);
1176
1177         hfsmp = VCBTOHFS(vcb);
1178
1179         memset(&jdesc, 0, sizeof(struct cat_desc));
1180         jdesc.cd_parentcnid = kRootDirID;
1181         jdesc.cd_nameptr = (const u_int8_t *)name;
1182         jdesc.cd_namelen = strlen(name);
1183
1184         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1185         error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL);
1186         hfs_systemfile_unlock(hfsmp, lockflags);
1187
1188         if (error == 0) {
1189                 return (fattr->ca_fileid);
1190         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1191                 return (0);
1192         }
1193
1194         return (0);     /* XXX what callers expect on an error */
1195 }
1196
1197
1198 /*
1199  * On HFS Plus Volumes, there can be orphaned files or directories
1200  * These are files or directories that were unlinked while busy.
1201  * If the volume was not cleanly unmounted then some of these may
1202  * have persisted and need to be removed.
1203  */
1204 __private_extern__
1205 void
1206 hfs_remove_orphans(struct hfsmount * hfsmp)
1207 {
1208         struct BTreeIterator * iterator = NULL;
1209         struct FSBufferDescriptor btdata;
1210         struct HFSPlusCatalogFile filerec;
1211         struct HFSPlusCatalogKey * keyp;
1212         struct proc *p = current_proc();
1213         FCB *fcb;
1214         ExtendedVCB *vcb;
1215         char filename[32];
1216         char tempname[32];
1217         size_t namelen;
1218         cat_cookie_t cookie;
1219         int catlock = 0;
1220         int catreserve = 0;
1221         int started_tr = 0;
1222         int lockflags;
1223         int result;
1224         int orphaned_files = 0;
1225         int orphaned_dirs = 0;
1226
1227         bzero(&cookie, sizeof(cookie));
1228
1229         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1230                 return;
1231
1232         vcb = HFSTOVCB(hfsmp);
1233         fcb = VTOF(hfsmp->hfs_catalog_vp);
1234
1235         btdata.bufferAddress = &filerec;
1236         btdata.itemSize = sizeof(filerec);
1237         btdata.itemCount = 1;
1238
1239         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1240         bzero(iterator, sizeof(*iterator));
1241
1242         /* Build a key to "temp" */
1243         keyp = (HFSPlusCatalogKey*)&iterator->key;
1244         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1245         keyp->nodeName.length = 4;  /* "temp" */
1246         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1247         keyp->nodeName.unicode[0] = 't';
1248         keyp->nodeName.unicode[1] = 'e';
1249         keyp->nodeName.unicode[2] = 'm';
1250         keyp->nodeName.unicode[3] = 'p';
1251
1252         /*
1253          * Position the iterator just before the first real temp file/dir.
1254          */
1255         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1256         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1257         hfs_systemfile_unlock(hfsmp, lockflags);
1258
1259         /* Visit all the temp files/dirs in the HFS+ private directory. */
1260         for (;;) {
1261                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1262                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1263                 hfs_systemfile_unlock(hfsmp, lockflags);
1264                 if (result)
1265                         break;
1266                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1267                         break;
1268
1269                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1270                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1271
1272                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1273                                 HFS_DELETE_PREFIX, filerec.fileID);
1274
1275                 /*
1276                  * Delete all files (and directories) named "tempxxx",
1277                  * where xxx is the file's cnid in decimal.
1278                  *
1279                  */
1280                 if (bcmp(tempname, filename, namelen) == 0) {
1281                         struct filefork dfork;
1282                         struct filefork rfork;
1283                         struct cnode cnode;
1284
1285                         bzero(&dfork, sizeof(dfork));
1286                         bzero(&rfork, sizeof(rfork));
1287                         bzero(&cnode, sizeof(cnode));
1288
1289                         /* Delete any attributes, ignore errors */
1290                         (void) hfs_removeallattr(hfsmp, filerec.fileID);
1291
1292                         if (hfs_start_transaction(hfsmp) != 0) {
1293                             printf("hfs_remove_orphans: failed to start transaction\n");
1294                             goto exit;
1295                         }
1296                         started_tr = 1;
1297
1298                         /*
1299                          * Reserve some space in the Catalog file.
1300                          */
1301                         if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1302                             printf("hfs_remove_orphans: cat_preflight failed\n");
1303                                 goto exit;
1304                         }
1305                         catreserve = 1;
1306
1307                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1308                         catlock = 1;
1309
1310                         /* Build a fake cnode */
1311                         cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1312                                         &dfork.ff_data, &rfork.ff_data);
1313                         cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1314                         cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1315                         cnode.c_desc.cd_namelen = namelen;
1316                         cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1317                         cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1318
1319                         /* Position iterator at previous entry */
1320                         if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1321                             NULL, NULL) != 0) {
1322                                 break;
1323                         }
1324
1325                         /* Truncate the file to zero (both forks) */
1326                         if (dfork.ff_blocks > 0) {
1327                                 u_int64_t fsize;
1328
1329                                 dfork.ff_cp = &cnode;
1330                                 cnode.c_datafork = &dfork;
1331                                 cnode.c_rsrcfork = NULL;
1332                                 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1333                                 while (fsize > 0) {
1334                                     if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
1335                                                 fsize -= HFS_BIGFILE_SIZE;
1336                                         } else {
1337                                                 fsize = 0;
1338                                         }
1339
1340                                         if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) {
1341                                                 printf("hfs: error truncting data fork!\n");
1342                                                 break;
1343                                         }
1344
1345                                         //
1346                                         // if we're iteratively truncating this file down,
1347                                         // then end the transaction and start a new one so
1348                                         // that no one transaction gets too big.
1349                                         //
1350                                         if (fsize > 0 && started_tr) {
1351                                                 /* Drop system file locks before starting
1352                                                  * another transaction to preserve lock order.
1353                                                  */
1354                                                 hfs_systemfile_unlock(hfsmp, lockflags);
1355                                                 catlock = 0;
1356                                                 hfs_end_transaction(hfsmp);
1357
1358                                                 if (hfs_start_transaction(hfsmp) != 0) {
1359                                                         started_tr = 0;
1360                                                         break;
1361                                                 }
1362                                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1363                                                 catlock = 1;
1364                                         }
1365                                 }
1366                         }
1367
1368                         if (rfork.ff_blocks > 0) {
1369                                 rfork.ff_cp = &cnode;
1370                                 cnode.c_datafork = NULL;
1371                                 cnode.c_rsrcfork = &rfork;
1372                                 if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) {
1373                                         printf("hfs: error truncting rsrc fork!\n");
1374                                         break;
1375                                 }
1376                         }
1377
1378                         /* Remove the file or folder record from the Catalog */
1379                         if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1380                                 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1381                                 hfs_systemfile_unlock(hfsmp, lockflags);
1382                                 catlock = 0;
1383                                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1384                                 break;
1385                         }
1386
1387                         if (cnode.c_attr.ca_mode & S_IFDIR) {
1388                                 orphaned_dirs++;
1389                         }
1390                         else {
1391                                 orphaned_files++;
1392                         }
1393
1394                         /* Update parent and volume counts */
1395                         hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1396                         if (cnode.c_attr.ca_mode & S_IFDIR) {
1397                                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1398                         }
1399
1400                         (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1401                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1402
1403                         /* Drop locks and end the transaction */
1404                         hfs_systemfile_unlock(hfsmp, lockflags);
1405                         cat_postflight(hfsmp, &cookie, p);
1406                         catlock = catreserve = 0;
1407
1408                         /*
1409                            Now that Catalog is unlocked, update the volume info, making
1410                            sure to differentiate between files and directories
1411                         */
1412                         if (cnode.c_attr.ca_mode & S_IFDIR) {
1413                                 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1414                         }
1415                         else{
1416                                 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1417                         }
1418
1419                         if (started_tr) {
1420                                 hfs_end_transaction(hfsmp);
1421                                 started_tr = 0;
1422                         }
1423
1424                 } /* end if */
1425         } /* end for */
1426         if (orphaned_files > 0 || orphaned_dirs > 0)
1427                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1428 exit:
1429         if (catlock) {
1430                 hfs_systemfile_unlock(hfsmp, lockflags);
1431         }
1432         if (catreserve) {
1433                 cat_postflight(hfsmp, &cookie, p);
1434         }
1435         if (started_tr) {
1436                 hfs_end_transaction(hfsmp);
1437         }
1438
1439         FREE(iterator, M_TEMP);
1440         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1441 }
1442
1443
1444 /*
1445  * This will return the correct logical block size for a given vnode.
1446  * For most files, it is the allocation block size, for meta data like
1447  * BTrees, this is kept as part of the BTree private nodeSize
1448  */
1449 u_int32_t
1450 GetLogicalBlockSize(struct vnode *vp)
1451 {
1452 u_int32_t logBlockSize;
1453
1454         DBG_ASSERT(vp != NULL);
1455
1456         /* start with default */
1457         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1458
1459         if (vnode_issystem(vp)) {
1460                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1461                         BTreeInfoRec                    bTreeInfo;
1462
1463                         /*
1464                          * We do not lock the BTrees, because if we are getting block..then the tree
1465                          * should be locked in the first place.
1466                          * We just want the nodeSize wich will NEVER change..so even if the world
1467                          * is changing..the nodeSize should remain the same. Which argues why lock
1468                          * it in the first place??
1469                          */
1470
1471                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1472
1473                         logBlockSize = bTreeInfo.nodeSize;
1474
1475                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1476                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1477                 }
1478         }
1479
1480         DBG_ASSERT(logBlockSize > 0);
1481
1482         return logBlockSize;
1483 }
1484
1485 __private_extern__
1486 u_int32_t
1487 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
1488 {
1489         u_int32_t freeblks;
1490         u_int32_t rsrvblks;
1491         u_int32_t loanblks;
1492
1493         /*
1494          * We don't bother taking the mount lock
1495          * to look at these values since the values
1496          * themselves are each updated atomically
1497          * on aligned addresses.
1498          */
1499         freeblks = hfsmp->freeBlocks;
1500         rsrvblks = hfsmp->reserveBlocks;
1501         loanblks = hfsmp->loanedBlocks;
1502         if (wantreserve) {
1503                 if (freeblks > rsrvblks)
1504                         freeblks -= rsrvblks;
1505                 else
1506                         freeblks = 0;
1507         }
1508         if (freeblks > loanblks)
1509                 freeblks -= loanblks;
1510         else
1511                 freeblks = 0;
1512
1513 #ifdef HFS_SPARSE_DEV
1514         /*
1515          * When the underlying device is sparse, check the
1516          * available space on the backing store volume.
1517          */
1518         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1519                 struct vfsstatfs *vfsp;  /* 272 bytes */
1520                 u_int64_t vfreeblks;
1521                 u_int32_t loanedblks;
1522                 struct mount * backingfs_mp;
1523                 struct timeval now;
1524
1525                 backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
1526
1527                 microtime(&now);
1528                 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1529                     vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1530                     hfsmp->hfs_last_backingstatfs = now.tv_sec;
1531                 }
1532
1533                 if ((vfsp = vfs_statfs(backingfs_mp))) {
1534                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1535                         vfreeblks = vfsp->f_bavail;
1536                         /* Normalize block count if needed. */
1537                         if (vfsp->f_bsize != hfsmp->blockSize) {
1538                                 vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
1539                         }
1540                         if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
1541                                 vfreeblks -= hfsmp->hfs_sparsebandblks;
1542                         else
1543                                 vfreeblks = 0;
1544
1545                         /* Take into account any delayed allocations. */
1546                         loanedblks = 2 * hfsmp->loanedBlocks;
1547                         if (vfreeblks > loanedblks)
1548                                 vfreeblks -= loanedblks;
1549                         else
1550                                 vfreeblks = 0;
1551
1552                         if (hfsmp->hfs_backingfs_maxblocks) {
1553                                 vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
1554                         }
1555                         freeblks = MIN(vfreeblks, freeblks);
1556                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1557                 }
1558         }
1559 #endif /* HFS_SPARSE_DEV */
1560
1561         return (freeblks);
1562 }
1563
1564 /*
1565  * Map HFS Common errors (negative) to BSD error codes (positive).
1566  * Positive errors (ie BSD errors) are passed through unchanged.
1567  */
1568 short MacToVFSError(OSErr err)
1569 {
1570         if (err >= 0)
1571                 return err;
1572
1573         switch (err) {
1574         case dskFulErr:                 /*    -34 */
1575         case btNoSpaceAvail:            /* -32733 */
1576                 return ENOSPC;
1577         case fxOvFlErr:                 /* -32750 */
1578                 return EOVERFLOW;
1579
1580         case btBadNode:                 /* -32731 */
1581                 return EIO;
1582
1583         case memFullErr:                /*  -108 */
1584                 return ENOMEM;          /*   +12 */
1585
1586         case cmExists:                  /* -32718 */
1587         case btExists:                  /* -32734 */
1588                 return EEXIST;          /*    +17 */
1589
1590         case cmNotFound:                /* -32719 */
1591         case btNotFound:                /* -32735 */
1592                 return ENOENT;          /*     28 */
1593
1594         case cmNotEmpty:                /* -32717 */
1595                 return ENOTEMPTY;       /*     66 */
1596
1597         case cmFThdDirErr:              /* -32714 */
1598                 return EISDIR;          /*     21 */
1599
1600         case fxRangeErr:                /* -32751 */
1601                 return ERANGE;
1602
1603         case bdNamErr:                  /*   -37 */
1604                 return ENAMETOOLONG;    /*    63 */
1605
1606         case paramErr:                  /*   -50 */
1607         case fileBoundsErr:             /* -1309 */
1608                 return EINVAL;          /*   +22 */
1609
1610         case fsBTBadNodeSize:
1611                 return ENXIO;
1612
1613         default:
1614                 return EIO;             /*   +5 */
1615         }
1616 }
1617
1618
1619 /*
1620  * Find the current thread's directory hint for a given index.
1621  *
1622  * Requires an exclusive lock on directory cnode.
1623  *
1624  * Use detach if the cnode lock must be dropped while the hint is still active.
1625  */
1626 __private_extern__
1627 directoryhint_t *
1628 hfs_getdirhint(struct cnode *dcp, int index, int detach)
1629 {
1630         struct timeval tv;
1631         directoryhint_t *hint;
1632         boolean_t need_remove, need_init;
1633         const u_int8_t * name;
1634
1635         microuptime(&tv);
1636
1637         /*
1638          *  Look for an existing hint first.  If not found, create a new one (when
1639          *  the list is not full) or recycle the oldest hint.  Since new hints are
1640          *  always added to the head of the list, the last hint is always the
1641          *  oldest.
1642          */
1643         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1644                 if (hint->dh_index == index)
1645                         break;
1646         }
1647         if (hint != NULL) { /* found an existing hint */
1648                 need_init = false;
1649                 need_remove = true;
1650         } else { /* cannot find an existing hint */
1651                 need_init = true;
1652                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
1653                         /* Create a default directory hint */
1654                         MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
1655                         ++dcp->c_dirhintcnt;
1656                         need_remove = false;
1657                 } else {                                /* recycle the last (i.e., the oldest) hint */
1658                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
1659                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
1660                             (name = hint->dh_desc.cd_nameptr)) {
1661                                 hint->dh_desc.cd_nameptr = NULL;
1662                                 hint->dh_desc.cd_namelen = 0;
1663                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
1664                                 vfs_removename((const char *)name);
1665                         }
1666                         need_remove = true;
1667                 }
1668         }
1669
1670         if (need_remove)
1671                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1672
1673         if (detach)
1674                 --dcp->c_dirhintcnt;
1675         else
1676                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1677
1678         if (need_init) {
1679                 hint->dh_index = index;
1680                 hint->dh_desc.cd_flags = 0;
1681                 hint->dh_desc.cd_encoding = 0;
1682                 hint->dh_desc.cd_namelen = 0;
1683                 hint->dh_desc.cd_nameptr = NULL;
1684                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
1685                 hint->dh_desc.cd_hint = dcp->c_childhint;
1686                 hint->dh_desc.cd_cnid = 0;
1687         }
1688         hint->dh_time = tv.tv_sec;
1689         return (hint);
1690 }
1691
1692 /*
1693  * Release a single directory hint.
1694  *
1695  * Requires an exclusive lock on directory cnode.
1696  */
1697 __private_extern__
1698 void
1699 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
1700 {
1701         const u_int8_t * name;
1702         directoryhint_t *hint;
1703
1704         /* Check if item is on list (could be detached) */
1705         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1706                 if (hint == relhint) {
1707                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
1708                         --dcp->c_dirhintcnt;
1709                         break;
1710                 }
1711         }
1712         name = relhint->dh_desc.cd_nameptr;
1713         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1714                 relhint->dh_desc.cd_nameptr = NULL;
1715                 relhint->dh_desc.cd_namelen = 0;
1716                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
1717                 vfs_removename((const char *)name);
1718         }
1719         FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
1720 }
1721
1722 /*
1723  * Release directory hints for given directory
1724  *
1725  * Requires an exclusive lock on directory cnode.
1726  */
1727 __private_extern__
1728 void
1729 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
1730 {
1731         struct timeval tv;
1732         directoryhint_t *hint, *prev;
1733         const u_int8_t * name;
1734
1735         if (stale_hints_only)
1736                 microuptime(&tv);
1737
1738         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
1739         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
1740                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
1741                         break;  /* stop here if this entry is too new */
1742                 name = hint->dh_desc.cd_nameptr;
1743                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1744                         hint->dh_desc.cd_nameptr = NULL;
1745                         hint->dh_desc.cd_namelen = 0;
1746                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
1747                         vfs_removename((const char *)name);
1748                 }
1749                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
1750                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1751                 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
1752                 --dcp->c_dirhintcnt;
1753         }
1754 }
1755
1756 /*
1757  * Insert a detached directory hint back into the list of dirhints.
1758  *
1759  * Requires an exclusive lock on directory cnode.
1760  */
1761 __private_extern__
1762 void
1763 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
1764 {
1765         directoryhint_t *test;
1766
1767         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
1768                 if (test == hint)
1769                         panic("hfs_insertdirhint: hint %p already on list!", hint);
1770         }
1771
1772         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1773         ++dcp->c_dirhintcnt;
1774 }
1775
1776 /*
1777  * Perform a case-insensitive compare of two UTF-8 filenames.
1778  *
1779  * Returns 0 if the strings match.
1780  */
1781 __private_extern__
1782 int
1783 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
1784 {
1785         u_int16_t *ustr1, *ustr2;
1786         size_t ulen1, ulen2;
1787         size_t maxbytes;
1788         int cmp = -1;
1789
1790         if (len1 != len2)
1791                 return (cmp);
1792
1793         maxbytes = kHFSPlusMaxFileNameChars << 1;
1794         MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
1795         ustr2 = ustr1 + (maxbytes >> 1);
1796
1797         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
1798                 goto out;
1799         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
1800                 goto out;
1801
1802         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
1803 out:
1804         FREE(ustr1, M_TEMP);
1805         return (cmp);
1806 }
1807
1808
1809 typedef struct jopen_cb_info {
1810         off_t   jsize;
1811         char   *desired_uuid;
1812         struct  vnode *jvp;
1813         size_t  blksize;
1814         int     need_clean;
1815         int     need_init;
1816 } jopen_cb_info;
1817
1818 static int
1819 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
1820 {
1821         struct nameidata nd;
1822         jopen_cb_info *ji = (jopen_cb_info *)arg;
1823         char bsd_name[256];
1824         int error;
1825
1826         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
1827         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
1828
1829         if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
1830                 return 1;   // keep iterating
1831         }
1832
1833         // if we're here, either the desired uuid matched or there was no
1834         // desired uuid so let's try to open the device for writing and
1835         // see if it works.  if it does, we'll use it.
1836
1837         NDINIT(&nd, LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
1838         if ((error = namei(&nd))) {
1839                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
1840                 return 1;   // keep iterating
1841         }
1842
1843         ji->jvp = nd.ni_vp;
1844         nameidone(&nd);
1845
1846         if (ji->jvp == NULL) {
1847                 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
1848         } else {
1849                 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
1850                 if (error == 0) {
1851                         // if the journal is dirty and we didn't specify a desired
1852                         // journal device uuid, then do not use the journal.  but
1853                         // if the journal is just invalid (e.g. it hasn't been
1854                         // initialized) then just set the need_init flag.
1855                         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
1856                                 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
1857                                 if (error == EBUSY) {
1858                                         VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
1859                                         vnode_put(ji->jvp);
1860                                         ji->jvp = NULL;
1861                                         return 1;    // keep iterating
1862                                 } else if (error == EINVAL) {
1863                                         ji->need_init = 1;
1864                                 }
1865                         }
1866
1867                         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
1868                                 strlcpy(ji->desired_uuid, uuid_str, 128);
1869                         }
1870                         vnode_setmountedon(ji->jvp);
1871                         // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
1872                         return 0;   // stop iterating
1873                 } else {
1874                         vnode_put(ji->jvp);
1875                         ji->jvp = NULL;
1876                 }
1877         }
1878
1879         return 1;   // keep iterating
1880 }
1881
1882 extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
1883 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
1884 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
1885 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
1886
1887
1888 static vnode_t
1889 open_journal_dev(const char *vol_device,
1890                  int need_clean,
1891                  char *uuid_str,
1892                  char *machine_serial_num,
1893                  off_t jsize,
1894                  size_t blksize,
1895                  int *need_init)
1896 {
1897     int retry_counter=0;
1898     jopen_cb_info ji;
1899
1900     ji.jsize        = jsize;
1901     ji.desired_uuid = uuid_str;
1902     ji.jvp          = NULL;
1903     ji.blksize      = blksize;
1904     ji.need_clean   = need_clean;
1905     ji.need_init    = 0;
1906
1907 //    if (uuid_str[0] == '\0') {
1908 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
1909 //    } else {
1910 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
1911 //    }
1912     while (ji.jvp == NULL && retry_counter++ < 4) {
1913             if (retry_counter > 1) {
1914                     if (uuid_str[0]) {
1915                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
1916                     } else {
1917                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
1918                     }
1919                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
1920             }
1921
1922             IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
1923     }
1924
1925     if (ji.jvp == NULL) {
1926             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
1927                    vol_device, uuid_str, machine_serial_num);
1928     }
1929
1930     *need_init = ji.need_init;
1931
1932     return ji.jvp;
1933 }
1934
1935
1936 __private_extern__
1937 int
1938 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
1939                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
1940                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
1941 {
1942         JournalInfoBlock *jibp;
1943         struct buf       *jinfo_bp, *bp;
1944         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
1945         int               retval, write_jibp = 0;
1946         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
1947         struct vnode     *devvp;
1948         struct hfs_mount_args *args = _args;
1949         u_int32_t         jib_flags;
1950         u_int64_t         jib_offset;
1951         u_int64_t         jib_size;
1952         const char *dev_name;
1953
1954         devvp = hfsmp->hfs_devvp;
1955         dev_name = vnode_name(devvp);
1956         if (dev_name == NULL) {
1957                 dev_name = "unknown-dev";
1958         }
1959
1960         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
1961                 arg_flags  = args->journal_flags;
1962                 arg_tbufsz = args->journal_tbuffer_size;
1963         }
1964
1965         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
1966
1967         jinfo_bp = NULL;
1968         retval = (int)buf_meta_bread(devvp,
1969                                                 (daddr64_t)((embeddedOffset/blksize) +
1970                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
1971                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
1972         if (retval) {
1973                 if (jinfo_bp) {
1974                         buf_brelse(jinfo_bp);
1975                 }
1976                 return retval;
1977         }
1978
1979         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
1980         jib_flags  = SWAP_BE32(jibp->flags);
1981         jib_size   = SWAP_BE64(jibp->size);
1982
1983         if (jib_flags & kJIJournalInFSMask) {
1984                 hfsmp->jvp = hfsmp->hfs_devvp;
1985                 jib_offset = SWAP_BE64(jibp->offset);
1986         } else {
1987             int need_init=0;
1988
1989             // if the volume was unmounted cleanly then we'll pick any
1990             // available external journal partition
1991             //
1992             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
1993                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
1994             }
1995
1996             hfsmp->jvp = open_journal_dev(dev_name,
1997                                           !(jib_flags & kJIJournalNeedInitMask),
1998                                           (char *)&jibp->ext_jnl_uuid[0],
1999                                           (char *)&jibp->machine_serial_num[0],
2000                                           jib_size,
2001                                           hfsmp->hfs_logical_block_size,
2002                                           &need_init);
2003             if (hfsmp->jvp == NULL) {
2004                 buf_brelse(jinfo_bp);
2005                 return EROFS;
2006             } else {
2007                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2008                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2009                     }
2010             }
2011
2012             jib_offset = 0;
2013             write_jibp = 1;
2014             if (need_init) {
2015                     jib_flags |= kJIJournalNeedInitMask;
2016             }
2017         }
2018
2019         // save this off for the hack-y check in hfs_remove()
2020         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2021         hfsmp->jnl_size  = jib_size;
2022
2023         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2024             // if the file system is read-only, check if the journal is empty.
2025             // if it is, then we can allow the mount.  otherwise we have to
2026             // return failure.
2027             retval = journal_is_clean(hfsmp->jvp,
2028                                       jib_offset + embeddedOffset,
2029                                       jib_size,
2030                                       devvp,
2031                                       hfsmp->hfs_logical_block_size);
2032
2033             hfsmp->jnl = NULL;
2034
2035             buf_brelse(jinfo_bp);
2036
2037             if (retval) {
2038                 const char *name = vnode_getname(devvp);
2039               printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2040                      name ? name : "");
2041                 if (name)
2042                         vnode_putname(name);
2043             }
2044
2045             return retval;
2046         }
2047
2048         if (jib_flags & kJIJournalNeedInitMask) {
2049                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2050                            jib_offset + embeddedOffset, jib_size);
2051                 hfsmp->jnl = journal_create(hfsmp->jvp,
2052                                                                         jib_offset + embeddedOffset,
2053                                                                         jib_size,
2054                                                                         devvp,
2055                                                                         blksize,
2056                                                                         arg_flags,
2057                                                                         arg_tbufsz,
2058                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
2059
2060                 // no need to start a transaction here... if this were to fail
2061                 // we'd just re-init it on the next mount.
2062                 jib_flags &= ~kJIJournalNeedInitMask;
2063                 jibp->flags  = SWAP_BE32(jib_flags);
2064                 buf_bwrite(jinfo_bp);
2065                 jinfo_bp = NULL;
2066                 jibp     = NULL;
2067         } else {
2068                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2069                 //         jib_offset + embeddedOffset,
2070                 //         jib_size, SWAP_BE32(vhp->blockSize));
2071
2072                 hfsmp->jnl = journal_open(hfsmp->jvp,
2073                                                                   jib_offset + embeddedOffset,
2074                                                                   jib_size,
2075                                                                   devvp,
2076                                                                   blksize,
2077                                                                   arg_flags,
2078                                                                   arg_tbufsz,
2079                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
2080
2081                 if (write_jibp) {
2082                         buf_bwrite(jinfo_bp);
2083                 } else {
2084                         buf_brelse(jinfo_bp);
2085                 }
2086                 jinfo_bp = NULL;
2087                 jibp     = NULL;
2088
2089                 if (hfsmp->jnl && mdbp) {
2090                         // reload the mdb because it could have changed
2091                         // if the journal had to be replayed.
2092                         if (mdb_offset == 0) {
2093                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2094                         }
2095                         bp = NULL;
2096                         retval = (int)buf_meta_bread(devvp,
2097                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2098                                         hfsmp->hfs_physical_block_size, cred, &bp);
2099                         if (retval) {
2100                                 if (bp) {
2101                                         buf_brelse(bp);
2102                                 }
2103                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2104                                            retval);
2105                                 return retval;
2106                         }
2107                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2108                         buf_brelse(bp);
2109                         bp = NULL;
2110                 }
2111         }
2112
2113
2114         //printf("journal @ 0x%x\n", hfsmp->jnl);
2115
2116         // if we expected the journal to be there and we couldn't
2117         // create it or open it then we have to bail out.
2118         if (hfsmp->jnl == NULL) {
2119                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2120                 return EINVAL;
2121         }
2122
2123         return 0;
2124 }
2125
2126
2127 //
2128 // This function will go and re-locate the .journal_info_block and
2129 // the .journal files in case they moved (which can happen if you
2130 // run Norton SpeedDisk).  If we fail to find either file we just
2131 // disable journaling for this volume and return.  We turn off the
2132 // journaling bit in the vcb and assume it will get written to disk
2133 // later (if it doesn't on the next mount we'd do the same thing
2134 // again which is harmless).  If we disable journaling we don't
2135 // return an error so that the volume is still mountable.
2136 //
2137 // If the info we find for the .journal_info_block and .journal files
2138 // isn't what we had stored, we re-set our cached info and proceed
2139 // with opening the journal normally.
2140 //
2141 static int
2142 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2143 {
2144         JournalInfoBlock *jibp;
2145         struct buf       *jinfo_bp;
2146         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2147         int               retval, write_jibp = 0, recreate_journal = 0;
2148         struct vnode     *devvp;
2149         struct cat_attr   jib_attr, jattr;
2150         struct cat_fork   jib_fork, jfork;
2151         ExtendedVCB      *vcb;
2152         u_int32_t            fid;
2153         struct hfs_mount_args *args = _args;
2154         u_int32_t         jib_flags;
2155         u_int64_t         jib_offset;
2156         u_int64_t         jib_size;
2157
2158         devvp = hfsmp->hfs_devvp;
2159         vcb = HFSTOVCB(hfsmp);
2160
2161         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2162                 if (args->journal_disable) {
2163                         return 0;
2164                 }
2165
2166                 arg_flags  = args->journal_flags;
2167                 arg_tbufsz = args->journal_tbuffer_size;
2168         }
2169
2170         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2171         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2172                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2173                            jib_fork.cf_extents[0].startBlock);
2174                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2175                 return 0;
2176         }
2177         hfsmp->hfs_jnlinfoblkid = fid;
2178
2179         // make sure the journal_info_block begins where we think it should.
2180         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2181                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2182                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2183
2184                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2185                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2186                 recreate_journal = 1;
2187         }
2188
2189
2190         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2191         jinfo_bp = NULL;
2192         retval = (int)buf_meta_bread(devvp,
2193                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2194                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2195                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2196         if (retval) {
2197                 if (jinfo_bp) {
2198                         buf_brelse(jinfo_bp);
2199                 }
2200                 printf("hfs: can't read journal info block. disabling journaling.\n");
2201                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2202                 return 0;
2203         }
2204
2205         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2206         jib_flags  = SWAP_BE32(jibp->flags);
2207         jib_offset = SWAP_BE64(jibp->offset);
2208         jib_size   = SWAP_BE64(jibp->size);
2209
2210         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2211         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2212                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2213                            jfork.cf_extents[0].startBlock);
2214                 buf_brelse(jinfo_bp);
2215                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2216                 return 0;
2217         }
2218         hfsmp->hfs_jnlfileid = fid;
2219
2220         // make sure the journal file begins where we think it should.
2221         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2222                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2223                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2224
2225                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2226                 write_jibp   = 1;
2227                 recreate_journal = 1;
2228         }
2229
2230         // check the size of the journal file.
2231         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2232                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2233                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2234
2235                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2236                 write_jibp = 1;
2237                 recreate_journal = 1;
2238         }
2239
2240         if (jib_flags & kJIJournalInFSMask) {
2241                 hfsmp->jvp = hfsmp->hfs_devvp;
2242                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2243         } else {
2244             const char *dev_name;
2245             int need_init = 0;
2246
2247             dev_name = vnode_name(devvp);
2248             if (dev_name == NULL) {
2249                     dev_name = "unknown-dev";
2250             }
2251
2252             // since the journal is empty, just use any available external journal
2253             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2254
2255             // this fills in the uuid of the device we actually get
2256             hfsmp->jvp = open_journal_dev(dev_name,
2257                                           !(jib_flags & kJIJournalNeedInitMask),
2258                                           (char *)&jibp->ext_jnl_uuid[0],
2259                                           (char *)&jibp->machine_serial_num[0],
2260                                           jib_size,
2261                                           hfsmp->hfs_logical_block_size,
2262                                           &need_init);
2263             if (hfsmp->jvp == NULL) {
2264                 buf_brelse(jinfo_bp);
2265                 return EROFS;
2266             } else {
2267                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2268                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2269                     }
2270             }
2271             jib_offset = 0;
2272             recreate_journal = 1;
2273             write_jibp = 1;
2274             if (need_init) {
2275                     jib_flags |= kJIJournalNeedInitMask;
2276             }
2277         }
2278
2279         // save this off for the hack-y check in hfs_remove()
2280         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2281         hfsmp->jnl_size  = jib_size;
2282
2283         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2284             // if the file system is read-only, check if the journal is empty.
2285             // if it is, then we can allow the mount.  otherwise we have to
2286             // return failure.
2287             retval = journal_is_clean(hfsmp->jvp,
2288                                       jib_offset,
2289                                       jib_size,
2290                                       devvp,
2291                                       hfsmp->hfs_logical_block_size);
2292
2293             hfsmp->jnl = NULL;
2294
2295             buf_brelse(jinfo_bp);
2296
2297             if (retval) {
2298                 const char *name = vnode_getname(devvp);
2299               printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2300                      name ? name : "");
2301                 if (name)
2302                         vnode_putname(name);
2303             }
2304
2305             return retval;
2306         }
2307
2308         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2309                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2310                            jib_offset, jib_size);
2311                 hfsmp->jnl = journal_create(hfsmp->jvp,
2312                                                                         jib_offset,
2313                                                                         jib_size,
2314                                                                         devvp,
2315                                                                         hfsmp->hfs_logical_block_size,
2316                                                                         arg_flags,
2317                                                                         arg_tbufsz,
2318                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
2319
2320                 // no need to start a transaction here... if this were to fail
2321                 // we'd just re-init it on the next mount.
2322                 jib_flags &= ~kJIJournalNeedInitMask;
2323                 write_jibp   = 1;
2324
2325         } else {
2326                 //
2327                 // if we weren't the last person to mount this volume
2328                 // then we need to throw away the journal because it
2329                 // is likely that someone else mucked with the disk.
2330                 // if the journal is empty this is no big deal.  if the
2331                 // disk is dirty this prevents us from replaying the
2332                 // journal over top of changes that someone else made.
2333                 //
2334                 arg_flags |= JOURNAL_RESET;
2335
2336                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2337                 //         jib_offset,
2338                 //         jib_size, SWAP_BE32(vhp->blockSize));
2339
2340                 hfsmp->jnl = journal_open(hfsmp->jvp,
2341                                                                   jib_offset,
2342                                                                   jib_size,
2343                                                                   devvp,
2344                                                                   hfsmp->hfs_logical_block_size,
2345                                                                   arg_flags,
2346                                                                   arg_tbufsz,
2347                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
2348         }
2349
2350
2351         if (write_jibp) {
2352                 jibp->flags  = SWAP_BE32(jib_flags);
2353                 jibp->offset = SWAP_BE64(jib_offset);
2354                 jibp->size   = SWAP_BE64(jib_size);
2355
2356                 buf_bwrite(jinfo_bp);
2357         } else {
2358                 buf_brelse(jinfo_bp);
2359         }
2360         jinfo_bp = NULL;
2361         jibp     = NULL;
2362
2363         //printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
2364
2365         // if we expected the journal to be there and we couldn't
2366         // create it or open it then we have to bail out.
2367         if (hfsmp->jnl == NULL) {
2368                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2369                 return EINVAL;
2370         }
2371
2372         return 0;
2373 }
2374
2375 /*
2376  * Calculate the allocation zone for metadata.
2377  *
2378  * This zone includes the following:
2379  *      Allocation Bitmap file
2380  *      Overflow Extents file
2381  *      Journal file
2382  *      Quota files
2383  *      Clustered Hot files
2384  *      Catalog file
2385  *
2386  *                          METADATA ALLOCATION ZONE
2387  * ____________________________________________________________________________
2388  * |    |    |     |               |                              |           |
2389  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2390  * |____|____|_____|_______________|______________________________|___________|
2391  *
2392  * <------------------------------- N * 128 MB ------------------------------->
2393  *
2394  */
2395 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
2396
2397 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2398 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2399 #define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2400 #define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2401 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2402 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2403
2404 void
2405 hfs_metadatazone_init(struct hfsmount *hfsmp)
2406 {
2407         ExtendedVCB  *vcb;
2408         u_int64_t  fs_size;
2409         u_int64_t  zonesize;
2410         u_int64_t  temp;
2411         u_int64_t  filesize;
2412         u_int32_t  blk;
2413         int  items, really_do_it=1;
2414
2415         vcb = HFSTOVCB(hfsmp);
2416         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2417
2418         /*
2419          * For volumes less than 10 GB, don't bother.
2420          */
2421         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2422                 really_do_it = 0;
2423         }
2424
2425         /*
2426          * Skip non-journaled volumes as well.
2427          */
2428         if (hfsmp->jnl == NULL) {
2429                 really_do_it = 0;
2430         }
2431
2432         /*
2433          * Start with space for the boot blocks and Volume Header.
2434          * 1536 = byte offset from start of volume to end of volume header:
2435          * 1024 bytes is the offset from the start of the volume to the
2436          * start of the volume header (defined by the volume format)
2437          * + 512 bytes (the size of the volume header).
2438          */
2439         zonesize = roundup(1536, hfsmp->blockSize);
2440
2441         /*
2442          * Add the on-disk size of allocation bitmap.
2443          */
2444         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2445
2446         /*
2447          * Add space for the Journal Info Block and Journal (if they're in
2448          * this file system).
2449          */
2450         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
2451                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
2452         }
2453
2454         /*
2455          * Add the existing size of the Extents Overflow B-tree.
2456          * (It rarely grows, so don't bother reserving additional room for it.)
2457          */
2458         zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2459
2460         /*
2461          * If there is an Attributes B-tree, leave room for 11 clumps worth.
2462          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
2463          * When installing a full OS install onto a 20GB volume, we use
2464          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2465          * us with another 3 or 4 clumps worth before we need another extent.
2466          */
2467         if (hfsmp->hfs_attribute_cp) {
2468                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
2469         }
2470
2471         /*
2472          * Leave room for 11 clumps of the Catalog B-tree.
2473          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
2474          * When installing a full OS install onto a 20GB volume, we use
2475          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2476          * us with another 3 or 4 clumps worth before we need another extent.
2477          */
2478         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
2479
2480         /*
2481          * Add space for hot file region.
2482          *
2483          * ...for now, use 5 MB per 1 GB (0.5 %)
2484          */
2485         filesize = (fs_size / 1024) * 5;
2486         if (filesize > HOTBAND_MAXIMUM_SIZE)
2487                 filesize = HOTBAND_MAXIMUM_SIZE;
2488         else if (filesize < HOTBAND_MINIMUM_SIZE)
2489                 filesize = HOTBAND_MINIMUM_SIZE;
2490         /*
2491          * Calculate user quota file requirements.
2492          */
2493         if (hfsmp->hfs_flags & HFS_QUOTAS) {
2494                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
2495                 if (items < QF_MIN_USERS)
2496                         items = QF_MIN_USERS;
2497                 else if (items > QF_MAX_USERS)
2498                         items = QF_MAX_USERS;
2499                 if (!powerof2(items)) {
2500                         int x = items;
2501                         items = 4;
2502                         while (x>>1 != 1) {
2503                                 x = x >> 1;
2504                                 items = items << 1;
2505                         }
2506                 }
2507                 filesize += (items + 1) * sizeof(struct dqblk);
2508                 /*
2509                  * Calculate group quota file requirements.
2510                  *
2511                  */
2512                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
2513                 if (items < QF_MIN_GROUPS)
2514                         items = QF_MIN_GROUPS;
2515                 else if (items > QF_MAX_GROUPS)
2516                         items = QF_MAX_GROUPS;
2517                 if (!powerof2(items)) {
2518                         int x = items;
2519                         items = 4;
2520                         while (x>>1 != 1) {
2521                                 x = x >> 1;
2522                                 items = items << 1;
2523                         }
2524                 }
2525                 filesize += (items + 1) * sizeof(struct dqblk);
2526         }
2527         zonesize += filesize;
2528
2529         /*
2530          * Round up entire zone to a bitmap block's worth.
2531          * The extra space goes to the catalog file and hot file area.
2532          */
2533         temp = zonesize;
2534         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
2535         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
2536         /*
2537          * If doing the round up for hfs_min_alloc_start would push us past
2538          * allocLimit, then just reset it back to 0.  Though using a value
2539          * bigger than allocLimit would not cause damage in the block allocator
2540          * code, this value could get stored in the volume header and make it out
2541          * to disk, making the volume header technically corrupt.
2542          */
2543         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
2544                 hfsmp->hfs_min_alloc_start = 0;
2545         }
2546
2547         if (really_do_it == 0) {
2548                 /* If metadata zone needs to be disabled because the
2549                  * volume was truncated, clear the bit and zero out
2550                  * the values that are no longer needed.
2551                  */
2552                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2553                         /* Disable metadata zone */
2554                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
2555
2556                         /* Zero out mount point values that are not required */
2557                         hfsmp->hfs_catalog_maxblks = 0;
2558                         hfsmp->hfs_hotfile_maxblks = 0;
2559                         hfsmp->hfs_hotfile_start = 0;
2560                         hfsmp->hfs_hotfile_end = 0;
2561                         hfsmp->hfs_hotfile_freeblks = 0;
2562                         hfsmp->hfs_metazone_start = 0;
2563                         hfsmp->hfs_metazone_end = 0;
2564                 }
2565
2566                 return;
2567         }
2568
2569         temp = zonesize - temp;  /* temp has extra space */
2570         filesize += temp / 3;
2571         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
2572
2573         hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
2574
2575         /* Convert to allocation blocks. */
2576         blk = zonesize / vcb->blockSize;
2577
2578         /* The default metadata zone location is at the start of volume. */
2579         hfsmp->hfs_metazone_start = 1;
2580         hfsmp->hfs_metazone_end = blk - 1;
2581
2582         /* The default hotfile area is at the end of the zone. */
2583         hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
2584         hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
2585         hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
2586 #if 0
2587         printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
2588         printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
2589         printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
2590 #endif
2591         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
2592 }
2593
2594
2595 static u_int32_t
2596 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
2597 {
2598         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
2599         int  lockflags;
2600         int  freeblocks;
2601
2602         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2603         freeblocks = MetaZoneFreeBlocks(vcb);
2604         hfs_systemfile_unlock(hfsmp, lockflags);
2605
2606         /* Minus Extents overflow file reserve. */
2607         freeblocks -=
2608                 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
2609         /* Minus catalog file reserve. */
2610         freeblocks -=
2611                 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
2612         if (freeblocks < 0)
2613                 freeblocks = 0;
2614
2615         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
2616 }
2617
2618 /*
2619  * Determine if a file is a "virtual" metadata file.
2620  * This includes journal and quota files.
2621  */
2622 __private_extern__
2623 int
2624 hfs_virtualmetafile(struct cnode *cp)
2625 {
2626         const char * filename;
2627
2628
2629         if (cp->c_parentcnid != kHFSRootFolderID)
2630                 return (0);
2631
2632         filename = (const char *)cp->c_desc.cd_nameptr;
2633         if (filename == NULL)
2634                 return (0);
2635
2636         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
2637             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
2638             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
2639             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
2640             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
2641                 return (1);
2642
2643         return (0);
2644 }
2645
2646
2647 //
2648 // Fire off a timed callback to sync the disk if the
2649 // volume is on ejectable media.
2650 //
2651  __private_extern__
2652 void
2653 hfs_sync_ejectable(struct hfsmount *hfsmp)
2654 {
2655         if (hfsmp->hfs_syncer)  {
2656                 clock_sec_t secs;
2657                 clock_usec_t usecs;
2658                 uint64_t now;
2659
2660                 clock_get_calendar_microtime(&secs, &usecs);
2661                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2662
2663                 if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
2664                         // if we have a sync scheduled but i/o is starting to pile up,
2665                         // don't call thread_call_enter_delayed() again because that
2666                         // will defer the sync.
2667                         return;
2668                 }
2669
2670                 if (hfsmp->hfs_sync_scheduled == 0) {
2671                         uint64_t deadline;
2672
2673                         hfsmp->hfs_last_sync_request_time = now;
2674
2675                         clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
2676
2677                         /*
2678                          * Increment hfs_sync_scheduled on the assumption that we're the
2679                          * first thread to schedule the timer.  If some other thread beat
2680                          * us, then we'll decrement it.  If we *were* the first to
2681                          * schedule the timer, then we need to keep track that the
2682                          * callback is waiting to complete.
2683                          */
2684                         OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2685                         if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
2686                                 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2687                         else
2688                                 OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2689                 }
2690         }
2691 }
2692
2693
2694 __private_extern__
2695 int
2696 hfs_start_transaction(struct hfsmount *hfsmp)
2697 {
2698         int ret, unlock_on_err=0;
2699         void * thread = current_thread();
2700
2701 #ifdef HFS_CHECK_LOCK_ORDER
2702         /*
2703          * You cannot start a transaction while holding a system
2704          * file lock. (unless the transaction is nested.)
2705          */
2706         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
2707                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
2708                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
2709                 }
2710                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
2711                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
2712                 }
2713                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
2714                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
2715                 }
2716         }
2717 #endif /* HFS_CHECK_LOCK_ORDER */
2718
2719     if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
2720         lck_rw_lock_shared(&hfsmp->hfs_global_lock);
2721         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
2722         unlock_on_err = 1;
2723     }
2724
2725         /* If a downgrade to read-only mount is in progress, no other
2726          * process than the downgrade process is allowed to modify
2727          * the file system.
2728          */
2729         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
2730                         (hfsmp->hfs_downgrading_proc != thread)) {
2731                 ret = EROFS;
2732                 goto out;
2733         }
2734
2735     if (hfsmp->jnl) {
2736         ret = journal_start_transaction(hfsmp->jnl);
2737         if (ret == 0) {
2738             OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
2739         }
2740     } else {
2741         ret = 0;
2742     }
2743
2744 out:
2745     if (ret != 0 && unlock_on_err) {
2746         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
2747         OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2748     }
2749
2750     return ret;
2751 }
2752
2753 __private_extern__
2754 int
2755 hfs_end_transaction(struct hfsmount *hfsmp)
2756 {
2757     int need_unlock=0, ret;
2758
2759     if (    hfsmp->jnl == NULL
2760         || (   journal_owner(hfsmp->jnl) == current_thread()
2761             && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
2762
2763             need_unlock = 1;
2764     }
2765
2766     if (hfsmp->jnl) {
2767         ret = journal_end_transaction(hfsmp->jnl);
2768     } else {
2769         ret = 0;
2770     }
2771
2772     if (need_unlock) {
2773         OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2774         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
2775         hfs_sync_ejectable(hfsmp);
2776     }
2777
2778     return ret;
2779 }
2780
2781
2782 __private_extern__
2783 int
2784 hfs_journal_flush(struct hfsmount *hfsmp)
2785 {
2786         int ret;
2787
2788         /* Only peek at hfsmp->jnl while holding the global lock */
2789         lck_rw_lock_shared(&hfsmp->hfs_global_lock);
2790         if (hfsmp->jnl) {
2791                 ret = journal_flush(hfsmp->jnl);
2792         } else {
2793                 ret = 0;
2794         }
2795         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
2796
2797         return ret;
2798 }
2799
2800
2801 /*
2802  * hfs_erase_unused_nodes
2803  *
2804  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
2805  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
2806  * zeroes to the unused nodes.
2807  *
2808  * How do we detect when a volume needs this repair?  We can't always be
2809  * certain.  If a volume was created after a certain date, then it may have
2810  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
2811  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
2812  * that means that the entire first clump must have been written to, which means
2813  * there shouldn't be unused and unwritten nodes in that first clump, and this
2814  * repair is not needed.
2815  *
2816  * We have defined a bit in the Volume Header's attributes to indicate when the
2817  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
2818  * As will fsck_hfs when it repairs the unused nodes.
2819  */
2820 __private_extern__
2821 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
2822 {
2823         int result;
2824         struct filefork *catalog;
2825         int lockflags;
2826
2827         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
2828         {
2829                 /* This volume has already been checked and repaired. */
2830                 return 0;
2831         }
2832
2833         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
2834         {
2835                 /* This volume is too old to have had the problem. */
2836                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
2837                 return 0;
2838         }
2839
2840         catalog = hfsmp->hfs_catalog_cp->c_datafork;
2841         if (catalog->ff_size > catalog->ff_clumpsize)
2842         {
2843                 /* The entire first clump must have been in use at some point. */
2844                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
2845                 return 0;
2846         }
2847
2848         /*
2849          * If we get here, we need to zero out those unused nodes.
2850          *
2851          * We start a transaction and lock the catalog since we're going to be
2852          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
2853          * do its writing via the journal, because that would be too much I/O
2854          * to fit in a transaction, and it's a pain to break it up into multiple
2855          * transactions.  (It behaves more like growing a B-tree would.)
2856          */
2857         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
2858         result = hfs_start_transaction(hfsmp);
2859         if (result)
2860                 goto done;
2861         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
2862         result = BTZeroUnusedNodes(catalog);
2863         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
2864         hfs_systemfile_unlock(hfsmp, lockflags);
2865         hfs_end_transaction(hfsmp);
2866         if (result == 0)
2867                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
2868         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
2869
2870 done:
2871         return result;
2872 }