bsd/hfs/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/buf.h>
  43 #include <sys/buf_internal.h>
  44 #include <sys/ubc.h>
  45 #include <sys/unistd.h>
  46 #include <sys/utfconv.h>
  47 #include <sys/kauth.h>
  48 #include <sys/fcntl.h>
  49 #include <sys/fsctl.h>
  50 #include <sys/vnode_internal.h>
  51 #include <kern/clock.h>
  52
  53 #include <libkern/OSAtomic.h>
  54
  55 #include "hfs.h"
  56 #include "hfs_catalog.h"
  57 #include "hfs_dbg.h"
  58 #include "hfs_mount.h"
  59 #include "hfs_endian.h"
  60 #include "hfs_cnode.h"
  61 #include "hfs_fsctl.h"
  62
  63 #include "hfscommon/headers/FileMgrInternal.h"
  64 #include "hfscommon/headers/BTreesInternal.h"
  65 #include "hfscommon/headers/HFSUnicodeWrappers.h"
  66
  67 static void ReleaseMetaFileVNode(struct vnode *vp);
  68 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  69
  70 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  71
  72 #define HFS_MOUNT_DEBUG 1
  73
  74
  75 //*******************************************************************************
  76 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  77 //       hence are not in the right byte order on little endian machines. It is
  78 //       the responsibility of the finder and other clients to swap the data.
  79 //*******************************************************************************
  80
  81 //*******************************************************************************
  82 //      Routine:        hfs_MountHFSVolume
  83 //
  84 //
  85 //*******************************************************************************
  86 unsigned char hfs_catname[] = "Catalog B-tree";
  87 unsigned char hfs_extname[] = "Extents B-tree";
  88 unsigned char hfs_vbmname[] = "Volume Bitmap";
  89 unsigned char hfs_attrname[] = "Attribute B-tree";
  90 unsigned char hfs_startupname[] = "Startup File";
  91
  92
  93 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
  94                 __unused struct proc *p)
  95 {
  96         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
  97         int error;
  98         ByteCount utf8chars;
  99         struct cat_desc cndesc;
 100         struct cat_attr cnattr;
 101         struct cat_fork fork;
 102         int newvnode_flags = 0;
 103
 104         /* Block size must be a multiple of 512 */
 105         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 106             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 107                 return (EINVAL);
 108
 109         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 110         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 111             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 112                 return (EINVAL);
 113         }
 114         hfsmp->hfs_flags |= HFS_STANDARD;
 115         /*
 116          * The MDB seems OK: transfer info from it into VCB
 117          * Note - the VCB starts out clear (all zeros)
 118          *
 119          */
 120         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 121         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 122         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 123         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 124         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 125         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 126         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 127         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 128         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 129         vcb->allocLimit         = vcb->totalBlocks;
 130         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 131         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 132         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 133         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 134         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 135         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 136         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 137         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 138         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 139         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 140         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 141         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 142                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 143
 144         /* convert hfs encoded name into UTF-8 string */
 145         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 146         /*
 147          * When an HFS name cannot be encoded with the current
 148          * volume encoding we use MacRoman as a fallback.
 149          */
 150         if (error || (utf8chars == 0)) {
 151                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 152                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 153                 if (error) {
 154                         goto MtVolErr;
 155                 }
 156         }
 157
 158         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 159         vcb->vcbVBMIOSize = kHFSBlockSize;
 160
 161         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 162                                                   hfsmp->hfs_logical_block_count);
 163
 164         bzero(&cndesc, sizeof(cndesc));
 165         cndesc.cd_parentcnid = kHFSRootParentID;
 166         cndesc.cd_flags |= CD_ISMETA;
 167         bzero(&cnattr, sizeof(cnattr));
 168         cnattr.ca_linkcount = 1;
 169         cnattr.ca_mode = S_IFREG;
 170         bzero(&fork, sizeof(fork));
 171
 172         /*
 173          * Set up Extents B-tree vnode
 174          */
 175         cndesc.cd_nameptr = hfs_extname;
 176         cndesc.cd_namelen = strlen((char *)hfs_extname);
 177         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 178         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 179         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 180         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 181         fork.cf_vblocks = 0;
 182         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 183         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 184         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 185         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 186         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 187         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 188         cnattr.ca_blocks = fork.cf_blocks;
 189
 190         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 191                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 192         if (error) {
 193                 if (HFS_MOUNT_DEBUG) {
 194                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 195                 }
 196                 goto MtVolErr;
 197         }
 198         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 199                                          (KeyCompareProcPtr)CompareExtentKeys));
 200         if (error) {
 201                 if (HFS_MOUNT_DEBUG) {
 202                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 203                 }
 204                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 205                 goto MtVolErr;
 206         }
 207         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 208
 209         /*
 210          * Set up Catalog B-tree vnode...
 211          */
 212         cndesc.cd_nameptr = hfs_catname;
 213         cndesc.cd_namelen = strlen((char *)hfs_catname);
 214         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 215         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 216         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 217         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 218         fork.cf_vblocks = 0;
 219         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 220         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 221         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 222         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 223         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 224         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 225         cnattr.ca_blocks = fork.cf_blocks;
 226
 227         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 228                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 229         if (error) {
 230                 if (HFS_MOUNT_DEBUG) {
 231                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 232                 }
 233                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 234                 goto MtVolErr;
 235         }
 236         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 237                                          (KeyCompareProcPtr)CompareCatalogKeys));
 238         if (error) {
 239                 if (HFS_MOUNT_DEBUG) {
 240                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 241                 }
 242                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 243                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 244                 goto MtVolErr;
 245         }
 246         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 247
 248         /*
 249          * Set up dummy Allocation file vnode (used only for locking bitmap)
 250          */
 251         cndesc.cd_nameptr = hfs_vbmname;
 252         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 253         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 254         bzero(&fork, sizeof(fork));
 255         cnattr.ca_blocks = 0;
 256
 257         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 258                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 259         if (error) {
 260                 if (HFS_MOUNT_DEBUG) {
 261                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 262                 }
 263                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 264                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 265                 goto MtVolErr;
 266         }
 267         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 268
 269         /* mark the volume dirty (clear clean unmount bit) */
 270         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 271
 272     if (error == noErr) {
 273                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 274                 if (HFS_MOUNT_DEBUG) {
 275                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 276                 }
 277         }
 278
 279     if (error == noErr) {
 280                 /* If the disk isn't write protected.. */
 281         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 282             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 283                 }
 284         }
 285
 286         /*
 287          * all done with system files so we can unlock now...
 288          */
 289         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 290         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 291         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 292
 293         if (error == noErr) {
 294                 /* If successful, then we can just return once we've unlocked the cnodes */
 295                 return error;
 296         }
 297
 298     //--        Release any resources allocated so far before exiting with an error:
 299 MtVolErr:
 300         hfsUnmount(hfsmp, NULL);
 301
 302     return (error);
 303 }
 304
 305 //*******************************************************************************
 306 //      Routine:        hfs_MountHFSPlusVolume
 307 //
 308 //
 309 //*******************************************************************************
 310
 311 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 312         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 313 {
 314         register ExtendedVCB *vcb;
 315         struct cat_desc cndesc;
 316         struct cat_attr cnattr;
 317         struct cat_fork cfork;
 318         u_int32_t blockSize;
 319         daddr64_t spare_sectors;
 320         struct BTreeInfoRec btinfo;
 321         u_int16_t  signature;
 322         u_int16_t  hfs_version;
 323         int newvnode_flags = 0;
 324         int  i;
 325         OSErr retval;
 326         char converted_volname[256];
 327         size_t volname_length = 0;
 328         size_t conv_volname_length = 0;
 329
 330         signature = SWAP_BE16(vhp->signature);
 331         hfs_version = SWAP_BE16(vhp->version);
 332
 333         if (signature == kHFSPlusSigWord) {
 334                 if (hfs_version != kHFSPlusVersion) {
 335                         printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version);
 336                         return (EINVAL);
 337                 }
 338         } else if (signature == kHFSXSigWord) {
 339                 if (hfs_version != kHFSXVersion) {
 340                         printf("hfs_mount: invalid HFSX version: %d\n", hfs_version);
 341                         return (EINVAL);
 342                 }
 343                 /* The in-memory signature is always 'H+'. */
 344                 signature = kHFSPlusSigWord;
 345                 hfsmp->hfs_flags |= HFS_X;
 346         } else {
 347                 /* Removed printf for invalid HFS+ signature because it gives
 348                  * false error for UFS root volume
 349                  */
 350                 if (HFS_MOUNT_DEBUG) {
 351                         printf("hfs_mounthfsplus: unknown Volume Signature\n");
 352                 }
 353                 return (EINVAL);
 354         }
 355
 356         /* Block size must be at least 512 and a power of 2 */
 357         blockSize = SWAP_BE32(vhp->blockSize);
 358         if (blockSize < 512 || !powerof2(blockSize)) {
 359                 if (HFS_MOUNT_DEBUG) {
 360                         printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
 361                 }
 362                 return (EINVAL);
 363         }
 364
 365         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 366         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 367             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 368                 if (HFS_MOUNT_DEBUG) {
 369                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 370                 }
 371                 return (EINVAL);
 372         }
 373
 374         /* Make sure we can live with the physical block size. */
 375         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 376             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
 377             (blockSize < hfsmp->hfs_logical_block_size)) {
 378                 if (HFS_MOUNT_DEBUG) {
 379                         printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 380                                         blockSize, hfsmp->hfs_logical_block_size);
 381                 }
 382                 return (ENXIO);
 383         }
 384
 385         /* If allocation block size is less than the physical
 386          * block size, we assume that the physical block size
 387          * is same as logical block size.  The physical block
 388          * size value is used to round down the offsets for
 389          * reading and writing the primary and alternate volume
 390          * headers at physical block boundary and will cause
 391          * problems if it is less than the block size.
 392          */
 393         if (blockSize < hfsmp->hfs_physical_block_size) {
 394                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 395                 hfsmp->hfs_log_per_phys = 1;
 396         }
 397
 398         /*
 399          * The VolumeHeader seems OK: transfer info from it into VCB
 400          * Note - the VCB starts out clear (all zeros)
 401          */
 402         vcb = HFSTOVCB(hfsmp);
 403
 404         vcb->vcbSigWord = signature;
 405         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 406         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 407         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 408         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 409         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 410         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 411         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 412         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 413         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 414
 415         /* copy 32 bytes of Finder info */
 416         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 417
 418         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 419         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 420                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 421
 422         /* Now fill in the Extended VCB info */
 423         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 424         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 425         vcb->allocLimit         = vcb->totalBlocks;
 426         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 427         vcb->blockSize          = blockSize;
 428         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 429         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 430
 431         vcb->hfsPlusIOPosOffset = embeddedOffset;
 432
 433         /* Default to no free block reserve */
 434         vcb->reserveBlocks = 0;
 435
 436         /*
 437          * Update the logical block size in the mount struct
 438          * (currently set up from the wrapper MDB) using the
 439          * new blocksize value:
 440          */
 441         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 442         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 443
 444         /*
 445          * Validate and initialize the location of the alternate volume header.
 446          */
 447         spare_sectors = hfsmp->hfs_logical_block_count -
 448                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 449                            hfsmp->hfs_logical_block_size);
 450
 451         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 452                 hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
 453         } else {
 454                 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 455                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 456                                                           hfsmp->hfs_logical_block_count);
 457         }
 458
 459         bzero(&cndesc, sizeof(cndesc));
 460         cndesc.cd_parentcnid = kHFSRootParentID;
 461         cndesc.cd_flags |= CD_ISMETA;
 462         bzero(&cnattr, sizeof(cnattr));
 463         cnattr.ca_linkcount = 1;
 464         cnattr.ca_mode = S_IFREG;
 465
 466         /*
 467          * Set up Extents B-tree vnode
 468          */
 469         cndesc.cd_nameptr = hfs_extname;
 470         cndesc.cd_namelen = strlen((char *)hfs_extname);
 471         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 472
 473         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 474         cfork.cf_new_size= 0;
 475         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 476         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 477         cfork.cf_vblocks = 0;
 478         cnattr.ca_blocks = cfork.cf_blocks;
 479         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 480                 cfork.cf_extents[i].startBlock =
 481                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 482                 cfork.cf_extents[i].blockCount =
 483                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 484         }
 485         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 486                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 487         if (retval)
 488         {
 489                 if (HFS_MOUNT_DEBUG) {
 490                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 491                 }
 492                 goto ErrorExit;
 493         }
 494         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 495         hfs_unlock(hfsmp->hfs_extents_cp);
 496
 497         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 498                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 499         if (retval)
 500         {
 501                 if (HFS_MOUNT_DEBUG) {
 502                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 503                 }
 504                 goto ErrorExit;
 505         }
 506         /*
 507          * Set up Catalog B-tree vnode
 508          */
 509         cndesc.cd_nameptr = hfs_catname;
 510         cndesc.cd_namelen = strlen((char *)hfs_catname);
 511         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 512
 513         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 514         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 515         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 516         cfork.cf_vblocks = 0;
 517         cnattr.ca_blocks = cfork.cf_blocks;
 518         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 519                 cfork.cf_extents[i].startBlock =
 520                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 521                 cfork.cf_extents[i].blockCount =
 522                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 523         }
 524         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 525                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 526         if (retval) {
 527                 if (HFS_MOUNT_DEBUG) {
 528                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 529                 }
 530                 goto ErrorExit;
 531         }
 532         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 533         hfs_unlock(hfsmp->hfs_catalog_cp);
 534
 535         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 536                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 537         if (retval) {
 538                 if (HFS_MOUNT_DEBUG) {
 539                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 540                 }
 541                 goto ErrorExit;
 542         }
 543         if ((hfsmp->hfs_flags & HFS_X) &&
 544             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 545                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 546                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 547                         /* Install a case-sensitive key compare */
 548                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 549                                           (KeyCompareProcPtr)cat_binarykeycompare);
 550                 }
 551         }
 552
 553         /*
 554          * Set up Allocation file vnode
 555          */
 556         cndesc.cd_nameptr = hfs_vbmname;
 557         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 558         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 559
 560         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 561         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 562         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 563         cfork.cf_vblocks = 0;
 564         cnattr.ca_blocks = cfork.cf_blocks;
 565         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 566                 cfork.cf_extents[i].startBlock =
 567                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 568                 cfork.cf_extents[i].blockCount =
 569                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 570         }
 571         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 572                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 573         if (retval) {
 574                 if (HFS_MOUNT_DEBUG) {
 575                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 576                 }
 577                 goto ErrorExit;
 578         }
 579         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 580         hfs_unlock(hfsmp->hfs_allocation_cp);
 581
 582         /*
 583          * Set up Attribute B-tree vnode
 584          */
 585         if (vhp->attributesFile.totalBlocks != 0) {
 586                 cndesc.cd_nameptr = hfs_attrname;
 587                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 588                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 589
 590                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 591                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 592                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 593                 cfork.cf_vblocks = 0;
 594                 cnattr.ca_blocks = cfork.cf_blocks;
 595                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 596                         cfork.cf_extents[i].startBlock =
 597                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 598                         cfork.cf_extents[i].blockCount =
 599                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 600                 }
 601                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 602                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 603                 if (retval) {
 604                         if (HFS_MOUNT_DEBUG) {
 605                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 606                         }
 607                         goto ErrorExit;
 608                 }
 609                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 610                 hfs_unlock(hfsmp->hfs_attribute_cp);
 611                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 612                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 613                 if (retval) {
 614                         if (HFS_MOUNT_DEBUG) {
 615                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 616                         }
 617                         goto ErrorExit;
 618                 }
 619
 620                 /* Initialize vnode for virtual attribute data file that spans the
 621                  * entire file system space for performing I/O to attribute btree
 622                  * We hold iocount on the attrdata vnode for the entire duration
 623                  * of mount (similar to btree vnodes)
 624                  */
 625                 retval = init_attrdata_vnode(hfsmp);
 626                 if (retval) {
 627                         if (HFS_MOUNT_DEBUG) {
 628                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 629                         }
 630                         goto ErrorExit;
 631                 }
 632         }
 633
 634         /*
 635          * Set up Startup file vnode
 636          */
 637         if (vhp->startupFile.totalBlocks != 0) {
 638                 cndesc.cd_nameptr = hfs_startupname;
 639                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 640                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 641
 642                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 643                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 644                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 645                 cfork.cf_vblocks = 0;
 646                 cnattr.ca_blocks = cfork.cf_blocks;
 647                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 648                         cfork.cf_extents[i].startBlock =
 649                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 650                         cfork.cf_extents[i].blockCount =
 651                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 652                 }
 653                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 654                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 655                 if (retval) {
 656                         if (HFS_MOUNT_DEBUG) {
 657                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 658                         }
 659                         goto ErrorExit;
 660                 }
 661                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 662                 hfs_unlock(hfsmp->hfs_startup_cp);
 663         }
 664
 665         /* Pick up volume name and create date */
 666         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 667         if (retval) {
 668                 if (HFS_MOUNT_DEBUG) {
 669                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 670                 }
 671                 goto ErrorExit;
 672         }
 673         vcb->hfs_itime = cnattr.ca_itime;
 674         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 675         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 676         volname_length = strlen ((const char*)vcb->vcbVN);
 677         cat_releasedesc(&cndesc);
 678
 679 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
 680
 681
 682         /* Send the volume name down to CoreStorage if necessary */
 683         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 684         if (retval == 0) {
 685                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 686         }
 687
 688         /* reset retval == 0. we don't care about errors in volname conversion */
 689         retval = 0;
 690
 691         /* mark the volume dirty (clear clean unmount bit) */
 692         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 693         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 694                 hfs_flushvolumeheader(hfsmp, TRUE, 0);
 695         }
 696
 697         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 698         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 699                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 700         }
 701
 702         //
 703         // Check if we need to do late journal initialization.  This only
 704         // happens if a previous version of MacOS X (or 9) touched the disk.
 705         // In that case hfs_late_journal_init() will go re-locate the journal
 706         // and journal_info_block files and validate that they're still kosher.
 707         //
 708         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 709                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 710                 && (hfsmp->jnl == NULL)) {
 711
 712                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 713                 if (retval != 0) {
 714                         if (retval == EROFS) {
 715                                 // EROFS is a special error code that means the volume has an external
 716                                 // journal which we couldn't find.  in that case we do not want to
 717                                 // rewrite the volume header - we'll just refuse to mount the volume.
 718                                 if (HFS_MOUNT_DEBUG) {
 719                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 720                                 }
 721                                 retval = EINVAL;
 722                                 goto ErrorExit;
 723                         }
 724
 725                         hfsmp->jnl = NULL;
 726
 727                         // if the journal failed to open, then set the lastMountedVersion
 728                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 729                         // of just bailing out because the volume is journaled.
 730                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 731                                 HFSPlusVolumeHeader *jvhp;
 732                                 daddr64_t mdb_offset;
 733                                 struct buf *bp = NULL;
 734
 735                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 736
 737                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 738
 739                                 bp = NULL;
 740                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 741                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 742                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 743                                 if (retval == 0) {
 744                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 745
 746                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 747                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 748                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 749                                                 buf_bwrite(bp);
 750                                         } else {
 751                                                 buf_brelse(bp);
 752                                         }
 753                                         bp = NULL;
 754                                 } else if (bp) {
 755                                         buf_brelse(bp);
 756                                         // clear this so the error exit path won't try to use it
 757                                         bp = NULL;
 758                             }
 759                         }
 760
 761                         if (HFS_MOUNT_DEBUG) {
 762                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 763                         }
 764                         retval = EINVAL;
 765                         goto ErrorExit;
 766                 } else if (hfsmp->jnl) {
 767                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 768                 }
 769         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 770                 struct cat_attr jinfo_attr, jnl_attr;
 771
 772                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 773                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 774                 }
 775
 776                 // if we're here we need to fill in the fileid's for the
 777                 // journal and journal_info_block.
 778                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 779                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 780                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 781                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 782                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 783                 }
 784
 785                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 786                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 787                 }
 788
 789                 if (hfsmp->jnl == NULL) {
 790                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 791                 }
 792         }
 793
 794         /*
 795          * Establish a metadata allocation zone.
 796          */
 797         hfs_metadatazone_init(hfsmp, false);
 798
 799         /*
 800          * Make any metadata zone adjustments.
 801          */
 802         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
 803                 /* Keep the roving allocator out of the metadata zone. */
 804                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
 805                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
 806                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
 807                 }
 808         } else {
 809                 if (vcb->nextAllocation <= 1) {
 810                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
 811                 }
 812         }
 813         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
 814
 815         /* Setup private/hidden directories for hardlinks. */
 816         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 817         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 818
 819         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 820                 hfs_remove_orphans(hfsmp);
 821
 822         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 823         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 824         {
 825                 retval = hfs_erase_unused_nodes(hfsmp);
 826                 if (retval) {
 827                         if (HFS_MOUNT_DEBUG) {
 828                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
 829                         }
 830
 831                         goto ErrorExit;
 832                 }
 833         }
 834
 835         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 836         {
 837                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 838         }
 839
 840         /*
 841          * Allow hot file clustering if conditions allow.
 842          */
 843         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
 844             ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
 845                 (void) hfs_recording_init(hfsmp);
 846         }
 847
 848         /* Force ACLs on HFS+ file systems. */
 849         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 850
 851         /* Enable extent-based extended attributes by default */
 852         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
 853
 854         /* See if this volume should have per-file content protection enabled */
 855         if (vcb->vcbAtrb & kHFSContentProtectionMask) {
 856                 vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
 857         }
 858
 859         return (0);
 860
 861 ErrorExit:
 862         /*
 863          * A fatal error occurred and the volume cannot be mounted, so
 864          * release any resources that we acquired...
 865          */
 866         hfsUnmount(hfsmp, NULL);
 867
 868         if (HFS_MOUNT_DEBUG) {
 869                 printf("hfs_mounthfsplus: encountered errorr (%d)\n", retval);
 870         }
 871         return (retval);
 872 }
 873
 874
 875 /*
 876  * ReleaseMetaFileVNode
 877  *
 878  * vp   L - -
 879  */
 880 static void ReleaseMetaFileVNode(struct vnode *vp)
 881 {
 882         struct filefork *fp;
 883
 884         if (vp && (fp = VTOF(vp))) {
 885                 if (fp->fcbBTCBPtr != NULL) {
 886                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
 887                         (void) BTClosePath(fp);
 888                         hfs_unlock(VTOC(vp));
 889                 }
 890
 891                 /* release the node even if BTClosePath fails */
 892                 vnode_recycle(vp);
 893                 vnode_put(vp);
 894         }
 895 }
 896
 897
 898 /*************************************************************
 899 *
 900 * Unmounts a hfs volume.
 901 *       At this point vflush() has been called (to dump all non-metadata files)
 902 *
 903 *************************************************************/
 904
 905 int
 906 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
 907 {
 908         /* Get rid of our attribute data vnode (if any).  This is done
 909          * after the vflush() during mount, so we don't need to worry
 910          * about any locks.
 911          */
 912         if (hfsmp->hfs_attrdata_vp) {
 913                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
 914                 hfsmp->hfs_attrdata_vp = NULLVP;
 915         }
 916
 917         if (hfsmp->hfs_startup_vp) {
 918                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
 919                 hfsmp->hfs_startup_cp = NULL;
 920                 hfsmp->hfs_startup_vp = NULL;
 921         }
 922
 923         if (hfsmp->hfs_attribute_vp) {
 924                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
 925                 hfsmp->hfs_attribute_cp = NULL;
 926                 hfsmp->hfs_attribute_vp = NULL;
 927         }
 928
 929         if (hfsmp->hfs_catalog_vp) {
 930                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 931                 hfsmp->hfs_catalog_cp = NULL;
 932                 hfsmp->hfs_catalog_vp = NULL;
 933         }
 934
 935         if (hfsmp->hfs_extents_vp) {
 936                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 937                 hfsmp->hfs_extents_cp = NULL;
 938                 hfsmp->hfs_extents_vp = NULL;
 939         }
 940
 941         if (hfsmp->hfs_allocation_vp) {
 942                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
 943                 hfsmp->hfs_allocation_cp = NULL;
 944                 hfsmp->hfs_allocation_vp = NULL;
 945         }
 946
 947         return (0);
 948 }
 949
 950
 951 /*
 952  * Test if fork has overflow extents.
 953  */
 954 __private_extern__
 955 int
 956 overflow_extents(struct filefork *fp)
 957 {
 958         u_int32_t blocks;
 959
 960         //
 961         // If the vnode pointer is NULL then we're being called
 962         // from hfs_remove_orphans() with a faked-up filefork
 963         // and therefore it has to be an HFS+ volume.  Otherwise
 964         // we check through the volume header to see what type
 965         // of volume we're on.
 966         //
 967         if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
 968                 if (fp->ff_extents[7].blockCount == 0)
 969                         return (0);
 970
 971                 blocks = fp->ff_extents[0].blockCount +
 972                          fp->ff_extents[1].blockCount +
 973                          fp->ff_extents[2].blockCount +
 974                          fp->ff_extents[3].blockCount +
 975                          fp->ff_extents[4].blockCount +
 976                          fp->ff_extents[5].blockCount +
 977                          fp->ff_extents[6].blockCount +
 978                          fp->ff_extents[7].blockCount;
 979         } else {
 980                 if (fp->ff_extents[2].blockCount == 0)
 981                         return false;
 982
 983                 blocks = fp->ff_extents[0].blockCount +
 984                          fp->ff_extents[1].blockCount +
 985                          fp->ff_extents[2].blockCount;
 986           }
 987
 988         return (fp->ff_blocks > blocks);
 989 }
 990
 991 /*
 992  * Lock the HFS global journal lock
 993  */
 994 int
 995 hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype) {
 996
 997         void *thread = current_thread();
 998
 999         if (hfsmp->hfs_global_lockowner == thread) {
1000                 panic ("hfs_lock_global: locking against myself!");
1001         }
1002
1003     /* HFS_SHARED_LOCK */
1004         if (locktype == HFS_SHARED_LOCK) {
1005                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1006                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1007         }
1008     /* HFS_EXCLUSIVE_LOCK */
1009         else {
1010                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1011                 hfsmp->hfs_global_lockowner = thread;
1012         }
1013
1014         return 0;
1015 }
1016
1017
1018 /*
1019  * Unlock the HFS global journal lock
1020  */
1021 void
1022 hfs_unlock_global (struct hfsmount *hfsmp) {
1023
1024         void *thread = current_thread();
1025
1026     /* HFS_LOCK_EXCLUSIVE */
1027         if (hfsmp->hfs_global_lockowner == thread) {
1028                 hfsmp->hfs_global_lockowner = NULL;
1029                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1030         }
1031     /* HFS_LOCK_SHARED */
1032         else {
1033                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1034         }
1035 }
1036
1037
1038 /*
1039  * Lock HFS system file(s).
1040  */
1041 int
1042 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
1043 {
1044         /*
1045          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1046          */
1047         if (flags & SFL_CATALOG) {
1048
1049 #ifdef HFS_CHECK_LOCK_ORDER
1050                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1051                         panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1052                 }
1053                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1054                         panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1055                 }
1056                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1057                         panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1058                 }
1059 #endif /* HFS_CHECK_LOCK_ORDER */
1060
1061                 if (hfsmp->hfs_catalog_cp) {
1062                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
1063                 } else {
1064                         flags &= ~SFL_CATALOG;
1065                 }
1066
1067                 /*
1068                  * When the catalog file has overflow extents then
1069                  * also acquire the extents b-tree lock if its not
1070                  * already requested.
1071                  */
1072                 if ((flags & SFL_EXTENTS) == 0 &&
1073                     overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) {
1074                         flags |= SFL_EXTENTS;
1075                 }
1076         }
1077         if (flags & SFL_ATTRIBUTE) {
1078
1079 #ifdef HFS_CHECK_LOCK_ORDER
1080                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1081                         panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1082                 }
1083                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1084                         panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1085                 }
1086 #endif /* HFS_CHECK_LOCK_ORDER */
1087
1088                 if (hfsmp->hfs_attribute_cp) {
1089                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype);
1090                         /*
1091                          * When the attribute file has overflow extents then
1092                          * also acquire the extents b-tree lock if its not
1093                          * already requested.
1094                          */
1095                         if ((flags & SFL_EXTENTS) == 0 &&
1096                             overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) {
1097                                 flags |= SFL_EXTENTS;
1098                         }
1099                 } else {
1100                         flags &= ~SFL_ATTRIBUTE;
1101                 }
1102         }
1103         if (flags & SFL_STARTUP) {
1104 #ifdef HFS_CHECK_LOCK_ORDER
1105                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1106                         panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1107                 }
1108 #endif /* HFS_CHECK_LOCK_ORDER */
1109
1110                 if (hfsmp->hfs_startup_cp) {
1111                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
1112                 } else {
1113                         flags &= ~SFL_STARTUP;
1114                 }
1115
1116                 /*
1117                  * When the startup file has overflow extents then
1118                  * also acquire the extents b-tree lock if its not
1119                  * already requested.
1120                  */
1121                 if ((flags & SFL_EXTENTS) == 0 &&
1122                     overflow_extents(VTOF(hfsmp->hfs_startup_vp))) {
1123                         flags |= SFL_EXTENTS;
1124                 }
1125         }
1126         /*
1127          * To prevent locks being taken in the wrong order, the extent lock
1128          * gets a bitmap lock as well.
1129          */
1130         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1131                 /*
1132                  * If there's no bitmap cnode, ignore the bitmap lock.
1133                  */
1134                 if (hfsmp->hfs_allocation_cp == NULL) {
1135                         flags &= ~SFL_BITMAP;
1136                 } else {
1137                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
1138                         /*
1139                          * The bitmap lock is also grabbed when only extent lock
1140                          * was requested. Set the bitmap lock bit in the lock
1141                          * flags which callers will use during unlock.
1142                          */
1143                         flags |= SFL_BITMAP;
1144                 }
1145         }
1146         if (flags & SFL_EXTENTS) {
1147                 /*
1148                  * Since the extents btree lock is recursive we always
1149                  * need exclusive access.
1150                  */
1151                 if (hfsmp->hfs_extents_cp) {
1152                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
1153                 } else {
1154                         flags &= ~SFL_EXTENTS;
1155                 }
1156         }
1157         return (flags);
1158 }
1159
1160 /*
1161  * unlock HFS system file(s).
1162  */
1163 void
1164 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1165 {
1166         struct timeval tv;
1167         u_int32_t lastfsync;
1168         int numOfLockedBuffs;
1169
1170         if (hfsmp->jnl == NULL) {
1171                 microuptime(&tv);
1172                 lastfsync = tv.tv_sec;
1173         }
1174         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1175                 hfs_unlock(hfsmp->hfs_startup_cp);
1176         }
1177         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1178                 if (hfsmp->jnl == NULL) {
1179                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1180                         numOfLockedBuffs = count_lock_queue();
1181                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1182                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1183                               kMaxSecsForFsync))) {
1184                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1185                         }
1186                 }
1187                 hfs_unlock(hfsmp->hfs_attribute_cp);
1188         }
1189         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1190                 if (hfsmp->jnl == NULL) {
1191                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1192                         numOfLockedBuffs = count_lock_queue();
1193                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1194                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1195                               kMaxSecsForFsync))) {
1196                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1197                         }
1198                 }
1199                 hfs_unlock(hfsmp->hfs_catalog_cp);
1200         }
1201         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1202                 hfs_unlock(hfsmp->hfs_allocation_cp);
1203         }
1204         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1205                 if (hfsmp->jnl == NULL) {
1206                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1207                         numOfLockedBuffs = count_lock_queue();
1208                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1209                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1210                               kMaxSecsForFsync))) {
1211                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1212                         }
1213                 }
1214                 hfs_unlock(hfsmp->hfs_extents_cp);
1215         }
1216 }
1217
1218
1219 /*
1220  * RequireFileLock
1221  *
1222  * Check to see if a vnode is locked in the current context
1223  * This is to be used for debugging purposes only!!
1224  */
1225 #if HFS_DIAGNOSTIC
1226 void RequireFileLock(FileReference vp, int shareable)
1227 {
1228         int locked;
1229
1230         /* The extents btree and allocation bitmap are always exclusive. */
1231         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1232             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1233                 shareable = 0;
1234         }
1235
1236         locked = VTOC(vp)->c_lockowner == (void *)current_thread();
1237
1238         if (!locked && !shareable) {
1239                 switch (VTOC(vp)->c_fileid) {
1240                 case kHFSExtentsFileID:
1241                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1242                         break;
1243                 case kHFSCatalogFileID:
1244                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1245                         break;
1246                 case kHFSAllocationFileID:
1247                         /* The allocation file can hide behind the jornal lock. */
1248                         if (VTOHFS(vp)->jnl == NULL)
1249                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1250                         break;
1251                 case kHFSStartupFileID:
1252                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1253                 case kHFSAttributesFileID:
1254                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1255                         break;
1256                 }
1257         }
1258 }
1259 #endif
1260
1261
1262 /*
1263  * There are three ways to qualify for ownership rights on an object:
1264  *
1265  * 1. (a) Your UID matches the cnode's UID.
1266  *    (b) The object in question is owned by "unknown"
1267  * 2. (a) Permissions on the filesystem are being ignored and
1268  *        your UID matches the replacement UID.
1269  *    (b) Permissions on the filesystem are being ignored and
1270  *        the replacement UID is "unknown".
1271  * 3. You are root.
1272  *
1273  */
1274 int
1275 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1276                 __unused struct proc *p, int invokesuperuserstatus)
1277 {
1278         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1279             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1280             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1281               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1282                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1283             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1284                 return (0);
1285         } else {
1286                 return (EPERM);
1287         }
1288 }
1289
1290
1291 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1292                                u_int32_t blockSizeLimit,
1293                                u_int32_t baseMultiple) {
1294     /*
1295        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1296        specified limit but still an even multiple of the baseMultiple.
1297      */
1298     int baseBlockCount, blockCount;
1299     u_int32_t trialBlockSize;
1300
1301     if (allocationBlockSize % baseMultiple != 0) {
1302         /*
1303            Whoops: the allocation blocks aren't even multiples of the specified base:
1304            no amount of dividing them into even parts will be a multiple, either then!
1305         */
1306         return 512;             /* Hope for the best */
1307     };
1308
1309     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1310        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1311        Even though the former (the result of the loop below) is the larger allocation
1312        block size, the latter is more efficient: */
1313     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1314
1315     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1316     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1317
1318     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1319         trialBlockSize = blockCount * baseMultiple;
1320         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1321             if ((trialBlockSize <= blockSizeLimit) &&
1322                 (trialBlockSize % baseMultiple == 0)) {
1323                 return trialBlockSize;
1324             };
1325         };
1326     };
1327
1328     /* Note: we should never get here, since blockCount = 1 should always work,
1329        but this is nice and safe and makes the compiler happy, too ... */
1330     return 512;
1331 }
1332
1333
1334 u_int32_t
1335 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1336                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1337 {
1338         struct hfsmount * hfsmp;
1339         struct cat_desc jdesc;
1340         int lockflags;
1341         int error;
1342
1343         if (vcb->vcbSigWord != kHFSPlusSigWord)
1344                 return (0);
1345
1346         hfsmp = VCBTOHFS(vcb);
1347
1348         memset(&jdesc, 0, sizeof(struct cat_desc));
1349         jdesc.cd_parentcnid = kRootDirID;
1350         jdesc.cd_nameptr = (const u_int8_t *)name;
1351         jdesc.cd_namelen = strlen(name);
1352
1353         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1354         error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL);
1355         hfs_systemfile_unlock(hfsmp, lockflags);
1356
1357         if (error == 0) {
1358                 return (fattr->ca_fileid);
1359         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1360                 return (0);
1361         }
1362
1363         return (0);     /* XXX what callers expect on an error */
1364 }
1365
1366
1367 /*
1368  * On HFS Plus Volumes, there can be orphaned files or directories
1369  * These are files or directories that were unlinked while busy.
1370  * If the volume was not cleanly unmounted then some of these may
1371  * have persisted and need to be removed.
1372  */
1373 void
1374 hfs_remove_orphans(struct hfsmount * hfsmp)
1375 {
1376         struct BTreeIterator * iterator = NULL;
1377         struct FSBufferDescriptor btdata;
1378         struct HFSPlusCatalogFile filerec;
1379         struct HFSPlusCatalogKey * keyp;
1380         struct proc *p = current_proc();
1381         FCB *fcb;
1382         ExtendedVCB *vcb;
1383         char filename[32];
1384         char tempname[32];
1385         size_t namelen;
1386         cat_cookie_t cookie;
1387         int catlock = 0;
1388         int catreserve = 0;
1389         int started_tr = 0;
1390         int lockflags;
1391         int result;
1392         int orphaned_files = 0;
1393         int orphaned_dirs = 0;
1394
1395         bzero(&cookie, sizeof(cookie));
1396
1397         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1398                 return;
1399
1400         vcb = HFSTOVCB(hfsmp);
1401         fcb = VTOF(hfsmp->hfs_catalog_vp);
1402
1403         btdata.bufferAddress = &filerec;
1404         btdata.itemSize = sizeof(filerec);
1405         btdata.itemCount = 1;
1406
1407         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1408         bzero(iterator, sizeof(*iterator));
1409
1410         /* Build a key to "temp" */
1411         keyp = (HFSPlusCatalogKey*)&iterator->key;
1412         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1413         keyp->nodeName.length = 4;  /* "temp" */
1414         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1415         keyp->nodeName.unicode[0] = 't';
1416         keyp->nodeName.unicode[1] = 'e';
1417         keyp->nodeName.unicode[2] = 'm';
1418         keyp->nodeName.unicode[3] = 'p';
1419
1420         /*
1421          * Position the iterator just before the first real temp file/dir.
1422          */
1423         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1424         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1425         hfs_systemfile_unlock(hfsmp, lockflags);
1426
1427         /* Visit all the temp files/dirs in the HFS+ private directory. */
1428         for (;;) {
1429                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1430                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1431                 hfs_systemfile_unlock(hfsmp, lockflags);
1432                 if (result)
1433                         break;
1434                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1435                         break;
1436
1437                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1438                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1439
1440                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1441                                 HFS_DELETE_PREFIX, filerec.fileID);
1442
1443                 /*
1444                  * Delete all files (and directories) named "tempxxx",
1445                  * where xxx is the file's cnid in decimal.
1446                  *
1447                  */
1448                 if (bcmp(tempname, filename, namelen) == 0) {
1449                         struct filefork dfork;
1450                 struct filefork rfork;
1451                         struct cnode cnode;
1452                         int mode = 0;
1453
1454                         bzero(&dfork, sizeof(dfork));
1455                         bzero(&rfork, sizeof(rfork));
1456                         bzero(&cnode, sizeof(cnode));
1457
1458                         /* Delete any attributes, ignore errors */
1459                         (void) hfs_removeallattr(hfsmp, filerec.fileID);
1460
1461                         if (hfs_start_transaction(hfsmp) != 0) {
1462                             printf("hfs_remove_orphans: failed to start transaction\n");
1463                             goto exit;
1464                         }
1465                         started_tr = 1;
1466
1467                         /*
1468                          * Reserve some space in the Catalog file.
1469                          */
1470                         if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1471                             printf("hfs_remove_orphans: cat_preflight failed\n");
1472                                 goto exit;
1473                         }
1474                         catreserve = 1;
1475
1476                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1477                         catlock = 1;
1478
1479                         /* Build a fake cnode */
1480                         cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1481                                         &dfork.ff_data, &rfork.ff_data);
1482                         cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1483                         cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1484                         cnode.c_desc.cd_namelen = namelen;
1485                         cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1486                         cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1487
1488                         /* Position iterator at previous entry */
1489                         if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1490                             NULL, NULL) != 0) {
1491                                 break;
1492                         }
1493
1494                         /* Truncate the file to zero (both forks) */
1495                         if (dfork.ff_blocks > 0) {
1496                                 u_int64_t fsize;
1497
1498                                 dfork.ff_cp = &cnode;
1499                                 cnode.c_datafork = &dfork;
1500                                 cnode.c_rsrcfork = NULL;
1501                                 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1502                                 while (fsize > 0) {
1503                                     if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
1504                                                 fsize -= HFS_BIGFILE_SIZE;
1505                                         } else {
1506                                                 fsize = 0;
1507                                         }
1508
1509                                         if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1510                                                                           cnode.c_attr.ca_fileid, false) != 0) {
1511                                                 printf("hfs: error truncating data fork!\n");
1512
1513                                                 break;
1514                                         }
1515
1516                                         //
1517                                         // if we're iteratively truncating this file down,
1518                                         // then end the transaction and start a new one so
1519                                         // that no one transaction gets too big.
1520                                         //
1521                                         if (fsize > 0 && started_tr) {
1522                                                 /* Drop system file locks before starting
1523                                                  * another transaction to preserve lock order.
1524                                                  */
1525                                                 hfs_systemfile_unlock(hfsmp, lockflags);
1526                                                 catlock = 0;
1527                                                 hfs_end_transaction(hfsmp);
1528
1529                                                 if (hfs_start_transaction(hfsmp) != 0) {
1530                                                         started_tr = 0;
1531                                                         break;
1532                                                 }
1533                                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1534                                                 catlock = 1;
1535                                         }
1536                                 }
1537                         }
1538
1539                         if (rfork.ff_blocks > 0) {
1540                                 rfork.ff_cp = &cnode;
1541                                 cnode.c_datafork = NULL;
1542                                 cnode.c_rsrcfork = &rfork;
1543                                 if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1544                                         printf("hfs: error truncating rsrc fork!\n");
1545                                         break;
1546                                 }
1547                         }
1548
1549                         /* Remove the file or folder record from the Catalog */
1550                         if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1551                                 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1552                                 hfs_systemfile_unlock(hfsmp, lockflags);
1553                                 catlock = 0;
1554                                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1555                                 break;
1556                         }
1557
1558                         mode = cnode.c_attr.ca_mode & S_IFMT;
1559
1560                         if (mode == S_IFDIR) {
1561                                 orphaned_dirs++;
1562                         }
1563                         else {
1564                                 orphaned_files++;
1565                         }
1566
1567                         /* Update parent and volume counts */
1568                         hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1569                         if (mode == S_IFDIR) {
1570                                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1571                         }
1572
1573                         (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1574                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1575
1576                         /* Drop locks and end the transaction */
1577                         hfs_systemfile_unlock(hfsmp, lockflags);
1578                         cat_postflight(hfsmp, &cookie, p);
1579                         catlock = catreserve = 0;
1580
1581                         /*
1582                            Now that Catalog is unlocked, update the volume info, making
1583                            sure to differentiate between files and directories
1584                         */
1585                         if (mode == S_IFDIR) {
1586                                 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1587                         }
1588                         else{
1589                                 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1590                         }
1591
1592                         if (started_tr) {
1593                                 hfs_end_transaction(hfsmp);
1594                                 started_tr = 0;
1595                         }
1596
1597                 } /* end if */
1598         } /* end for */
1599         if (orphaned_files > 0 || orphaned_dirs > 0)
1600                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1601 exit:
1602         if (catlock) {
1603                 hfs_systemfile_unlock(hfsmp, lockflags);
1604         }
1605         if (catreserve) {
1606                 cat_postflight(hfsmp, &cookie, p);
1607         }
1608         if (started_tr) {
1609                 hfs_end_transaction(hfsmp);
1610         }
1611
1612         FREE(iterator, M_TEMP);
1613         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1614 }
1615
1616
1617 /*
1618  * This will return the correct logical block size for a given vnode.
1619  * For most files, it is the allocation block size, for meta data like
1620  * BTrees, this is kept as part of the BTree private nodeSize
1621  */
1622 u_int32_t
1623 GetLogicalBlockSize(struct vnode *vp)
1624 {
1625 u_int32_t logBlockSize;
1626
1627         DBG_ASSERT(vp != NULL);
1628
1629         /* start with default */
1630         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1631
1632         if (vnode_issystem(vp)) {
1633                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1634                         BTreeInfoRec                    bTreeInfo;
1635
1636                         /*
1637                          * We do not lock the BTrees, because if we are getting block..then the tree
1638                          * should be locked in the first place.
1639                          * We just want the nodeSize wich will NEVER change..so even if the world
1640                          * is changing..the nodeSize should remain the same. Which argues why lock
1641                          * it in the first place??
1642                          */
1643
1644                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1645
1646                         logBlockSize = bTreeInfo.nodeSize;
1647
1648                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1649                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1650                 }
1651         }
1652
1653         DBG_ASSERT(logBlockSize > 0);
1654
1655         return logBlockSize;
1656 }
1657
1658 u_int32_t
1659 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
1660 {
1661         u_int32_t freeblks;
1662         u_int32_t rsrvblks;
1663         u_int32_t loanblks;
1664
1665         /*
1666          * We don't bother taking the mount lock
1667          * to look at these values since the values
1668          * themselves are each updated atomically
1669          * on aligned addresses.
1670          */
1671         freeblks = hfsmp->freeBlocks;
1672         rsrvblks = hfsmp->reserveBlocks;
1673         loanblks = hfsmp->loanedBlocks;
1674         if (wantreserve) {
1675                 if (freeblks > rsrvblks)
1676                         freeblks -= rsrvblks;
1677                 else
1678                         freeblks = 0;
1679         }
1680         if (freeblks > loanblks)
1681                 freeblks -= loanblks;
1682         else
1683                 freeblks = 0;
1684
1685 #if HFS_SPARSE_DEV
1686         /*
1687          * When the underlying device is sparse, check the
1688          * available space on the backing store volume.
1689          */
1690         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1691                 struct vfsstatfs *vfsp;  /* 272 bytes */
1692                 u_int64_t vfreeblks;
1693                 u_int32_t loanedblks;
1694                 struct mount * backingfs_mp;
1695                 struct timeval now;
1696
1697                 backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
1698
1699                 microtime(&now);
1700                 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1701                     vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1702                     hfsmp->hfs_last_backingstatfs = now.tv_sec;
1703                 }
1704
1705                 if ((vfsp = vfs_statfs(backingfs_mp))) {
1706                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1707                         vfreeblks = vfsp->f_bavail;
1708                         /* Normalize block count if needed. */
1709                         if (vfsp->f_bsize != hfsmp->blockSize) {
1710                                 vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
1711                         }
1712                         if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
1713                                 vfreeblks -= hfsmp->hfs_sparsebandblks;
1714                         else
1715                                 vfreeblks = 0;
1716
1717                         /* Take into account any delayed allocations. */
1718                         loanedblks = 2 * hfsmp->loanedBlocks;
1719                         if (vfreeblks > loanedblks)
1720                                 vfreeblks -= loanedblks;
1721                         else
1722                                 vfreeblks = 0;
1723
1724                         if (hfsmp->hfs_backingfs_maxblocks) {
1725                                 vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
1726                         }
1727                         freeblks = MIN(vfreeblks, freeblks);
1728                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1729                 }
1730         }
1731 #endif /* HFS_SPARSE_DEV */
1732
1733         return (freeblks);
1734 }
1735
1736 /*
1737  * Map HFS Common errors (negative) to BSD error codes (positive).
1738  * Positive errors (ie BSD errors) are passed through unchanged.
1739  */
1740 short MacToVFSError(OSErr err)
1741 {
1742         if (err >= 0)
1743                 return err;
1744
1745         switch (err) {
1746         case dskFulErr:                 /*    -34 */
1747         case btNoSpaceAvail:            /* -32733 */
1748                 return ENOSPC;
1749         case fxOvFlErr:                 /* -32750 */
1750                 return EOVERFLOW;
1751
1752         case btBadNode:                 /* -32731 */
1753                 return EIO;
1754
1755         case memFullErr:                /*  -108 */
1756                 return ENOMEM;          /*   +12 */
1757
1758         case cmExists:                  /* -32718 */
1759         case btExists:                  /* -32734 */
1760                 return EEXIST;          /*    +17 */
1761
1762         case cmNotFound:                /* -32719 */
1763         case btNotFound:                /* -32735 */
1764                 return ENOENT;          /*     28 */
1765
1766         case cmNotEmpty:                /* -32717 */
1767                 return ENOTEMPTY;       /*     66 */
1768
1769         case cmFThdDirErr:              /* -32714 */
1770                 return EISDIR;          /*     21 */
1771
1772         case fxRangeErr:                /* -32751 */
1773                 return ERANGE;
1774
1775         case bdNamErr:                  /*   -37 */
1776                 return ENAMETOOLONG;    /*    63 */
1777
1778         case paramErr:                  /*   -50 */
1779         case fileBoundsErr:             /* -1309 */
1780                 return EINVAL;          /*   +22 */
1781
1782         case fsBTBadNodeSize:
1783                 return ENXIO;
1784
1785         default:
1786                 return EIO;             /*   +5 */
1787         }
1788 }
1789
1790
1791 /*
1792  * Find the current thread's directory hint for a given index.
1793  *
1794  * Requires an exclusive lock on directory cnode.
1795  *
1796  * Use detach if the cnode lock must be dropped while the hint is still active.
1797  */
1798 __private_extern__
1799 directoryhint_t *
1800 hfs_getdirhint(struct cnode *dcp, int index, int detach)
1801 {
1802         struct timeval tv;
1803         directoryhint_t *hint;
1804         boolean_t need_remove, need_init;
1805         const u_int8_t * name;
1806
1807         microuptime(&tv);
1808
1809         /*
1810          *  Look for an existing hint first.  If not found, create a new one (when
1811          *  the list is not full) or recycle the oldest hint.  Since new hints are
1812          *  always added to the head of the list, the last hint is always the
1813          *  oldest.
1814          */
1815         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1816                 if (hint->dh_index == index)
1817                         break;
1818         }
1819         if (hint != NULL) { /* found an existing hint */
1820                 need_init = false;
1821                 need_remove = true;
1822         } else { /* cannot find an existing hint */
1823                 need_init = true;
1824                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
1825                         /* Create a default directory hint */
1826                         MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
1827                         ++dcp->c_dirhintcnt;
1828                         need_remove = false;
1829                 } else {                                /* recycle the last (i.e., the oldest) hint */
1830                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
1831                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
1832                             (name = hint->dh_desc.cd_nameptr)) {
1833                                 hint->dh_desc.cd_nameptr = NULL;
1834                                 hint->dh_desc.cd_namelen = 0;
1835                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
1836                                 vfs_removename((const char *)name);
1837                         }
1838                         need_remove = true;
1839                 }
1840         }
1841
1842         if (need_remove)
1843                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1844
1845         if (detach)
1846                 --dcp->c_dirhintcnt;
1847         else
1848                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1849
1850         if (need_init) {
1851                 hint->dh_index = index;
1852                 hint->dh_desc.cd_flags = 0;
1853                 hint->dh_desc.cd_encoding = 0;
1854                 hint->dh_desc.cd_namelen = 0;
1855                 hint->dh_desc.cd_nameptr = NULL;
1856                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
1857                 hint->dh_desc.cd_hint = dcp->c_childhint;
1858                 hint->dh_desc.cd_cnid = 0;
1859         }
1860         hint->dh_time = tv.tv_sec;
1861         return (hint);
1862 }
1863
1864 /*
1865  * Release a single directory hint.
1866  *
1867  * Requires an exclusive lock on directory cnode.
1868  */
1869 __private_extern__
1870 void
1871 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
1872 {
1873         const u_int8_t * name;
1874         directoryhint_t *hint;
1875
1876         /* Check if item is on list (could be detached) */
1877         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1878                 if (hint == relhint) {
1879                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
1880                         --dcp->c_dirhintcnt;
1881                         break;
1882                 }
1883         }
1884         name = relhint->dh_desc.cd_nameptr;
1885         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1886                 relhint->dh_desc.cd_nameptr = NULL;
1887                 relhint->dh_desc.cd_namelen = 0;
1888                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
1889                 vfs_removename((const char *)name);
1890         }
1891         FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
1892 }
1893
1894 /*
1895  * Release directory hints for given directory
1896  *
1897  * Requires an exclusive lock on directory cnode.
1898  */
1899 __private_extern__
1900 void
1901 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
1902 {
1903         struct timeval tv;
1904         directoryhint_t *hint, *prev;
1905         const u_int8_t * name;
1906
1907         if (stale_hints_only)
1908                 microuptime(&tv);
1909
1910         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
1911         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
1912                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
1913                         break;  /* stop here if this entry is too new */
1914                 name = hint->dh_desc.cd_nameptr;
1915                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1916                         hint->dh_desc.cd_nameptr = NULL;
1917                         hint->dh_desc.cd_namelen = 0;
1918                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
1919                         vfs_removename((const char *)name);
1920                 }
1921                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
1922                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1923                 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
1924                 --dcp->c_dirhintcnt;
1925         }
1926 }
1927
1928 /*
1929  * Insert a detached directory hint back into the list of dirhints.
1930  *
1931  * Requires an exclusive lock on directory cnode.
1932  */
1933 __private_extern__
1934 void
1935 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
1936 {
1937         directoryhint_t *test;
1938
1939         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
1940                 if (test == hint)
1941                         panic("hfs_insertdirhint: hint %p already on list!", hint);
1942         }
1943
1944         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1945         ++dcp->c_dirhintcnt;
1946 }
1947
1948 /*
1949  * Perform a case-insensitive compare of two UTF-8 filenames.
1950  *
1951  * Returns 0 if the strings match.
1952  */
1953 __private_extern__
1954 int
1955 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
1956 {
1957         u_int16_t *ustr1, *ustr2;
1958         size_t ulen1, ulen2;
1959         size_t maxbytes;
1960         int cmp = -1;
1961
1962         if (len1 != len2)
1963                 return (cmp);
1964
1965         maxbytes = kHFSPlusMaxFileNameChars << 1;
1966         MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
1967         ustr2 = ustr1 + (maxbytes >> 1);
1968
1969         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
1970                 goto out;
1971         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
1972                 goto out;
1973
1974         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
1975 out:
1976         FREE(ustr1, M_TEMP);
1977         return (cmp);
1978 }
1979
1980
1981 typedef struct jopen_cb_info {
1982         off_t   jsize;
1983         char   *desired_uuid;
1984         struct  vnode *jvp;
1985         size_t  blksize;
1986         int     need_clean;
1987         int     need_init;
1988 } jopen_cb_info;
1989
1990 static int
1991 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
1992 {
1993         struct nameidata nd;
1994         jopen_cb_info *ji = (jopen_cb_info *)arg;
1995         char bsd_name[256];
1996         int error;
1997
1998         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
1999         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2000
2001         if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2002                 return 1;   // keep iterating
2003         }
2004
2005         // if we're here, either the desired uuid matched or there was no
2006         // desired uuid so let's try to open the device for writing and
2007         // see if it works.  if it does, we'll use it.
2008
2009         NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2010         if ((error = namei(&nd))) {
2011                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2012                 return 1;   // keep iterating
2013         }
2014
2015         ji->jvp = nd.ni_vp;
2016         nameidone(&nd);
2017
2018         if (ji->jvp == NULL) {
2019                 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2020         } else {
2021                 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2022                 if (error == 0) {
2023                         // if the journal is dirty and we didn't specify a desired
2024                         // journal device uuid, then do not use the journal.  but
2025                         // if the journal is just invalid (e.g. it hasn't been
2026                         // initialized) then just set the need_init flag.
2027                         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2028                                 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2029                                 if (error == EBUSY) {
2030                                         VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2031                                         vnode_put(ji->jvp);
2032                                         ji->jvp = NULL;
2033                                         return 1;    // keep iterating
2034                                 } else if (error == EINVAL) {
2035                                         ji->need_init = 1;
2036                                 }
2037                         }
2038
2039                         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2040                                 strlcpy(ji->desired_uuid, uuid_str, 128);
2041                         }
2042                         vnode_setmountedon(ji->jvp);
2043                         // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
2044                         return 0;   // stop iterating
2045                 } else {
2046                         vnode_put(ji->jvp);
2047                         ji->jvp = NULL;
2048                 }
2049         }
2050
2051         return 1;   // keep iterating
2052 }
2053
2054 extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
2055 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2056 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2057
2058
2059 static vnode_t
2060 open_journal_dev(const char *vol_device,
2061                  int need_clean,
2062                  char *uuid_str,
2063                  char *machine_serial_num,
2064                  off_t jsize,
2065                  size_t blksize,
2066                  int *need_init)
2067 {
2068     int retry_counter=0;
2069     jopen_cb_info ji;
2070
2071     ji.jsize        = jsize;
2072     ji.desired_uuid = uuid_str;
2073     ji.jvp          = NULL;
2074     ji.blksize      = blksize;
2075     ji.need_clean   = need_clean;
2076     ji.need_init    = 0;
2077
2078 //    if (uuid_str[0] == '\0') {
2079 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2080 //    } else {
2081 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2082 //    }
2083     while (ji.jvp == NULL && retry_counter++ < 4) {
2084             if (retry_counter > 1) {
2085                     if (uuid_str[0]) {
2086                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2087                     } else {
2088                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2089                     }
2090                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2091             }
2092
2093             IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2094     }
2095
2096     if (ji.jvp == NULL) {
2097             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2098                    vol_device, uuid_str, machine_serial_num);
2099     }
2100
2101     *need_init = ji.need_init;
2102
2103     return ji.jvp;
2104 }
2105
2106
2107 int
2108 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2109                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2110                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2111 {
2112         JournalInfoBlock *jibp;
2113         struct buf       *jinfo_bp, *bp;
2114         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2115         int               retval, write_jibp = 0;
2116         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2117         struct vnode     *devvp;
2118         struct hfs_mount_args *args = _args;
2119         u_int32_t         jib_flags;
2120         u_int64_t         jib_offset;
2121         u_int64_t         jib_size;
2122         const char *dev_name;
2123
2124         devvp = hfsmp->hfs_devvp;
2125         dev_name = vnode_name(devvp);
2126         if (dev_name == NULL) {
2127                 dev_name = "unknown-dev";
2128         }
2129
2130         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2131                 arg_flags  = args->journal_flags;
2132                 arg_tbufsz = args->journal_tbuffer_size;
2133         }
2134
2135         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2136
2137         jinfo_bp = NULL;
2138         retval = (int)buf_meta_bread(devvp,
2139                                                 (daddr64_t)((embeddedOffset/blksize) +
2140                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2141                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2142         if (retval) {
2143                 if (jinfo_bp) {
2144                         buf_brelse(jinfo_bp);
2145                 }
2146                 return retval;
2147         }
2148
2149         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2150         jib_flags  = SWAP_BE32(jibp->flags);
2151         jib_size   = SWAP_BE64(jibp->size);
2152
2153         if (jib_flags & kJIJournalInFSMask) {
2154                 hfsmp->jvp = hfsmp->hfs_devvp;
2155                 jib_offset = SWAP_BE64(jibp->offset);
2156         } else {
2157             int need_init=0;
2158
2159             // if the volume was unmounted cleanly then we'll pick any
2160             // available external journal partition
2161             //
2162             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2163                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2164             }
2165
2166             hfsmp->jvp = open_journal_dev(dev_name,
2167                                           !(jib_flags & kJIJournalNeedInitMask),
2168                                           (char *)&jibp->ext_jnl_uuid[0],
2169                                           (char *)&jibp->machine_serial_num[0],
2170                                           jib_size,
2171                                           hfsmp->hfs_logical_block_size,
2172                                           &need_init);
2173             if (hfsmp->jvp == NULL) {
2174                 buf_brelse(jinfo_bp);
2175                 return EROFS;
2176             } else {
2177                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2178                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2179                     }
2180             }
2181
2182             jib_offset = 0;
2183             write_jibp = 1;
2184             if (need_init) {
2185                     jib_flags |= kJIJournalNeedInitMask;
2186             }
2187         }
2188
2189         // save this off for the hack-y check in hfs_remove()
2190         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2191         hfsmp->jnl_size  = jib_size;
2192
2193         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2194             // if the file system is read-only, check if the journal is empty.
2195             // if it is, then we can allow the mount.  otherwise we have to
2196             // return failure.
2197             retval = journal_is_clean(hfsmp->jvp,
2198                                       jib_offset + embeddedOffset,
2199                                       jib_size,
2200                                       devvp,
2201                                       hfsmp->hfs_logical_block_size);
2202
2203             hfsmp->jnl = NULL;
2204
2205             buf_brelse(jinfo_bp);
2206
2207             if (retval) {
2208                 const char *name = vnode_getname(devvp);
2209               printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2210                      name ? name : "");
2211                 if (name)
2212                         vnode_putname(name);
2213             }
2214
2215             return retval;
2216         }
2217
2218         if (jib_flags & kJIJournalNeedInitMask) {
2219                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2220                            jib_offset + embeddedOffset, jib_size);
2221                 hfsmp->jnl = journal_create(hfsmp->jvp,
2222                                                                         jib_offset + embeddedOffset,
2223                                                                         jib_size,
2224                                                                         devvp,
2225                                                                         blksize,
2226                                                                         arg_flags,
2227                                                                         arg_tbufsz,
2228                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
2229                 if (hfsmp->jnl)
2230                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2231
2232                 // no need to start a transaction here... if this were to fail
2233                 // we'd just re-init it on the next mount.
2234                 jib_flags &= ~kJIJournalNeedInitMask;
2235                 jibp->flags  = SWAP_BE32(jib_flags);
2236                 buf_bwrite(jinfo_bp);
2237                 jinfo_bp = NULL;
2238                 jibp     = NULL;
2239         } else {
2240                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2241                 //         jib_offset + embeddedOffset,
2242                 //         jib_size, SWAP_BE32(vhp->blockSize));
2243
2244                 hfsmp->jnl = journal_open(hfsmp->jvp,
2245                                                                   jib_offset + embeddedOffset,
2246                                                                   jib_size,
2247                                                                   devvp,
2248                                                                   blksize,
2249                                                                   arg_flags,
2250                                                                   arg_tbufsz,
2251                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
2252                 if (hfsmp->jnl)
2253                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2254
2255                 if (write_jibp) {
2256                         buf_bwrite(jinfo_bp);
2257                 } else {
2258                         buf_brelse(jinfo_bp);
2259                 }
2260                 jinfo_bp = NULL;
2261                 jibp     = NULL;
2262
2263                 if (hfsmp->jnl && mdbp) {
2264                         // reload the mdb because it could have changed
2265                         // if the journal had to be replayed.
2266                         if (mdb_offset == 0) {
2267                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2268                         }
2269                         bp = NULL;
2270                         retval = (int)buf_meta_bread(devvp,
2271                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2272                                         hfsmp->hfs_physical_block_size, cred, &bp);
2273                         if (retval) {
2274                                 if (bp) {
2275                                         buf_brelse(bp);
2276                                 }
2277                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2278                                            retval);
2279                                 return retval;
2280                         }
2281                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2282                         buf_brelse(bp);
2283                         bp = NULL;
2284                 }
2285         }
2286
2287
2288         //printf("journal @ 0x%x\n", hfsmp->jnl);
2289
2290         // if we expected the journal to be there and we couldn't
2291         // create it or open it then we have to bail out.
2292         if (hfsmp->jnl == NULL) {
2293                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2294                 return EINVAL;
2295         }
2296
2297         return 0;
2298 }
2299
2300
2301 //
2302 // This function will go and re-locate the .journal_info_block and
2303 // the .journal files in case they moved (which can happen if you
2304 // run Norton SpeedDisk).  If we fail to find either file we just
2305 // disable journaling for this volume and return.  We turn off the
2306 // journaling bit in the vcb and assume it will get written to disk
2307 // later (if it doesn't on the next mount we'd do the same thing
2308 // again which is harmless).  If we disable journaling we don't
2309 // return an error so that the volume is still mountable.
2310 //
2311 // If the info we find for the .journal_info_block and .journal files
2312 // isn't what we had stored, we re-set our cached info and proceed
2313 // with opening the journal normally.
2314 //
2315 static int
2316 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2317 {
2318         JournalInfoBlock *jibp;
2319         struct buf       *jinfo_bp;
2320         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2321         int               retval, write_jibp = 0, recreate_journal = 0;
2322         struct vnode     *devvp;
2323         struct cat_attr   jib_attr, jattr;
2324         struct cat_fork   jib_fork, jfork;
2325         ExtendedVCB      *vcb;
2326         u_int32_t            fid;
2327         struct hfs_mount_args *args = _args;
2328         u_int32_t         jib_flags;
2329         u_int64_t         jib_offset;
2330         u_int64_t         jib_size;
2331
2332         devvp = hfsmp->hfs_devvp;
2333         vcb = HFSTOVCB(hfsmp);
2334
2335         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2336                 if (args->journal_disable) {
2337                         return 0;
2338                 }
2339
2340                 arg_flags  = args->journal_flags;
2341                 arg_tbufsz = args->journal_tbuffer_size;
2342         }
2343
2344         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2345         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2346                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2347                            jib_fork.cf_extents[0].startBlock);
2348                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2349                 return 0;
2350         }
2351         hfsmp->hfs_jnlinfoblkid = fid;
2352
2353         // make sure the journal_info_block begins where we think it should.
2354         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2355                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2356                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2357
2358                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2359                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2360                 recreate_journal = 1;
2361         }
2362
2363
2364         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2365         jinfo_bp = NULL;
2366         retval = (int)buf_meta_bread(devvp,
2367                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2368                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2369                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2370         if (retval) {
2371                 if (jinfo_bp) {
2372                         buf_brelse(jinfo_bp);
2373                 }
2374                 printf("hfs: can't read journal info block. disabling journaling.\n");
2375                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2376                 return 0;
2377         }
2378
2379         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2380         jib_flags  = SWAP_BE32(jibp->flags);
2381         jib_offset = SWAP_BE64(jibp->offset);
2382         jib_size   = SWAP_BE64(jibp->size);
2383
2384         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2385         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2386                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2387                            jfork.cf_extents[0].startBlock);
2388                 buf_brelse(jinfo_bp);
2389                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2390                 return 0;
2391         }
2392         hfsmp->hfs_jnlfileid = fid;
2393
2394         // make sure the journal file begins where we think it should.
2395         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2396                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2397                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2398
2399                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2400                 write_jibp   = 1;
2401                 recreate_journal = 1;
2402         }
2403
2404         // check the size of the journal file.
2405         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2406                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2407                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2408
2409                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2410                 write_jibp = 1;
2411                 recreate_journal = 1;
2412         }
2413
2414         if (jib_flags & kJIJournalInFSMask) {
2415                 hfsmp->jvp = hfsmp->hfs_devvp;
2416                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2417         } else {
2418             const char *dev_name;
2419             int need_init = 0;
2420
2421             dev_name = vnode_name(devvp);
2422             if (dev_name == NULL) {
2423                     dev_name = "unknown-dev";
2424             }
2425
2426             // since the journal is empty, just use any available external journal
2427             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2428
2429             // this fills in the uuid of the device we actually get
2430             hfsmp->jvp = open_journal_dev(dev_name,
2431                                           !(jib_flags & kJIJournalNeedInitMask),
2432                                           (char *)&jibp->ext_jnl_uuid[0],
2433                                           (char *)&jibp->machine_serial_num[0],
2434                                           jib_size,
2435                                           hfsmp->hfs_logical_block_size,
2436                                           &need_init);
2437             if (hfsmp->jvp == NULL) {
2438                 buf_brelse(jinfo_bp);
2439                 return EROFS;
2440             } else {
2441                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2442                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2443                     }
2444             }
2445             jib_offset = 0;
2446             recreate_journal = 1;
2447             write_jibp = 1;
2448             if (need_init) {
2449                     jib_flags |= kJIJournalNeedInitMask;
2450             }
2451         }
2452
2453         // save this off for the hack-y check in hfs_remove()
2454         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2455         hfsmp->jnl_size  = jib_size;
2456
2457         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2458             // if the file system is read-only, check if the journal is empty.
2459             // if it is, then we can allow the mount.  otherwise we have to
2460             // return failure.
2461             retval = journal_is_clean(hfsmp->jvp,
2462                                       jib_offset,
2463                                       jib_size,
2464                                       devvp,
2465                                       hfsmp->hfs_logical_block_size);
2466
2467             hfsmp->jnl = NULL;
2468
2469             buf_brelse(jinfo_bp);
2470
2471             if (retval) {
2472                 const char *name = vnode_getname(devvp);
2473               printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2474                      name ? name : "");
2475                 if (name)
2476                         vnode_putname(name);
2477             }
2478
2479             return retval;
2480         }
2481
2482         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2483                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2484                            jib_offset, jib_size);
2485                 hfsmp->jnl = journal_create(hfsmp->jvp,
2486                                                                         jib_offset,
2487                                                                         jib_size,
2488                                                                         devvp,
2489                                                                         hfsmp->hfs_logical_block_size,
2490                                                                         arg_flags,
2491                                                                         arg_tbufsz,
2492                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
2493                 if (hfsmp->jnl)
2494                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2495
2496                 // no need to start a transaction here... if this were to fail
2497                 // we'd just re-init it on the next mount.
2498                 jib_flags &= ~kJIJournalNeedInitMask;
2499                 write_jibp   = 1;
2500
2501         } else {
2502                 //
2503                 // if we weren't the last person to mount this volume
2504                 // then we need to throw away the journal because it
2505                 // is likely that someone else mucked with the disk.
2506                 // if the journal is empty this is no big deal.  if the
2507                 // disk is dirty this prevents us from replaying the
2508                 // journal over top of changes that someone else made.
2509                 //
2510                 arg_flags |= JOURNAL_RESET;
2511
2512                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2513                 //         jib_offset,
2514                 //         jib_size, SWAP_BE32(vhp->blockSize));
2515
2516                 hfsmp->jnl = journal_open(hfsmp->jvp,
2517                                                                   jib_offset,
2518                                                                   jib_size,
2519                                                                   devvp,
2520                                                                   hfsmp->hfs_logical_block_size,
2521                                                                   arg_flags,
2522                                                                   arg_tbufsz,
2523                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
2524                 if (hfsmp->jnl)
2525                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2526         }
2527
2528
2529         if (write_jibp) {
2530                 jibp->flags  = SWAP_BE32(jib_flags);
2531                 jibp->offset = SWAP_BE64(jib_offset);
2532                 jibp->size   = SWAP_BE64(jib_size);
2533
2534                 buf_bwrite(jinfo_bp);
2535         } else {
2536                 buf_brelse(jinfo_bp);
2537         }
2538         jinfo_bp = NULL;
2539         jibp     = NULL;
2540
2541         //printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
2542
2543         // if we expected the journal to be there and we couldn't
2544         // create it or open it then we have to bail out.
2545         if (hfsmp->jnl == NULL) {
2546                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2547                 return EINVAL;
2548         }
2549
2550         return 0;
2551 }
2552
2553 /*
2554  * Calculate the allocation zone for metadata.
2555  *
2556  * This zone includes the following:
2557  *      Allocation Bitmap file
2558  *      Overflow Extents file
2559  *      Journal file
2560  *      Quota files
2561  *      Clustered Hot files
2562  *      Catalog file
2563  *
2564  *                          METADATA ALLOCATION ZONE
2565  * ____________________________________________________________________________
2566  * |    |    |     |               |                              |           |
2567  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2568  * |____|____|_____|_______________|______________________________|___________|
2569  *
2570  * <------------------------------- N * 128 MB ------------------------------->
2571  *
2572  */
2573 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
2574
2575 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2576 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2577 #define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2578 #define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2579 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2580 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2581
2582 /* Initialize the metadata zone.
2583  *
2584  * If the size of  the volume is less than the minimum size for
2585  * metadata zone, metadata zone is disabled.
2586  *
2587  * If disable is true, disable metadata zone unconditionally.
2588  */
2589 void
2590 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2591 {
2592         ExtendedVCB  *vcb;
2593         u_int64_t  fs_size;
2594         u_int64_t  zonesize;
2595         u_int64_t  temp;
2596         u_int64_t  filesize;
2597         u_int32_t  blk;
2598         int  items, really_do_it=1;
2599
2600         vcb = HFSTOVCB(hfsmp);
2601         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2602
2603         /*
2604          * For volumes less than 10 GB, don't bother.
2605          */
2606         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2607                 really_do_it = 0;
2608         }
2609
2610         /*
2611          * Skip non-journaled volumes as well.
2612          */
2613         if (hfsmp->jnl == NULL) {
2614                 really_do_it = 0;
2615         }
2616
2617         /* If caller wants to disable metadata zone, do it */
2618         if (disable == true) {
2619                 really_do_it = 0;
2620         }
2621
2622         /*
2623          * Start with space for the boot blocks and Volume Header.
2624          * 1536 = byte offset from start of volume to end of volume header:
2625          * 1024 bytes is the offset from the start of the volume to the
2626          * start of the volume header (defined by the volume format)
2627          * + 512 bytes (the size of the volume header).
2628          */
2629         zonesize = roundup(1536, hfsmp->blockSize);
2630
2631         /*
2632          * Add the on-disk size of allocation bitmap.
2633          */
2634         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2635
2636         /*
2637          * Add space for the Journal Info Block and Journal (if they're in
2638          * this file system).
2639          */
2640         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
2641                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
2642         }
2643
2644         /*
2645          * Add the existing size of the Extents Overflow B-tree.
2646          * (It rarely grows, so don't bother reserving additional room for it.)
2647          */
2648         zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2649
2650         /*
2651          * If there is an Attributes B-tree, leave room for 11 clumps worth.
2652          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
2653          * When installing a full OS install onto a 20GB volume, we use
2654          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2655          * us with another 3 or 4 clumps worth before we need another extent.
2656          */
2657         if (hfsmp->hfs_attribute_cp) {
2658                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
2659         }
2660
2661         /*
2662          * Leave room for 11 clumps of the Catalog B-tree.
2663          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
2664          * When installing a full OS install onto a 20GB volume, we use
2665          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2666          * us with another 3 or 4 clumps worth before we need another extent.
2667          */
2668         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
2669
2670         /*
2671          * Add space for hot file region.
2672          *
2673          * ...for now, use 5 MB per 1 GB (0.5 %)
2674          */
2675         filesize = (fs_size / 1024) * 5;
2676         if (filesize > HOTBAND_MAXIMUM_SIZE)
2677                 filesize = HOTBAND_MAXIMUM_SIZE;
2678         else if (filesize < HOTBAND_MINIMUM_SIZE)
2679                 filesize = HOTBAND_MINIMUM_SIZE;
2680         /*
2681          * Calculate user quota file requirements.
2682          */
2683         if (hfsmp->hfs_flags & HFS_QUOTAS) {
2684                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
2685                 if (items < QF_MIN_USERS)
2686                         items = QF_MIN_USERS;
2687                 else if (items > QF_MAX_USERS)
2688                         items = QF_MAX_USERS;
2689                 if (!powerof2(items)) {
2690                         int x = items;
2691                         items = 4;
2692                         while (x>>1 != 1) {
2693                                 x = x >> 1;
2694                                 items = items << 1;
2695                         }
2696                 }
2697                 filesize += (items + 1) * sizeof(struct dqblk);
2698                 /*
2699                  * Calculate group quota file requirements.
2700                  *
2701                  */
2702                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
2703                 if (items < QF_MIN_GROUPS)
2704                         items = QF_MIN_GROUPS;
2705                 else if (items > QF_MAX_GROUPS)
2706                         items = QF_MAX_GROUPS;
2707                 if (!powerof2(items)) {
2708                         int x = items;
2709                         items = 4;
2710                         while (x>>1 != 1) {
2711                                 x = x >> 1;
2712                                 items = items << 1;
2713                         }
2714                 }
2715                 filesize += (items + 1) * sizeof(struct dqblk);
2716         }
2717         zonesize += filesize;
2718
2719         /*
2720          * Round up entire zone to a bitmap block's worth.
2721          * The extra space goes to the catalog file and hot file area.
2722          */
2723         temp = zonesize;
2724         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
2725         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
2726         /*
2727          * If doing the round up for hfs_min_alloc_start would push us past
2728          * allocLimit, then just reset it back to 0.  Though using a value
2729          * bigger than allocLimit would not cause damage in the block allocator
2730          * code, this value could get stored in the volume header and make it out
2731          * to disk, making the volume header technically corrupt.
2732          */
2733         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
2734                 hfsmp->hfs_min_alloc_start = 0;
2735         }
2736
2737         if (really_do_it == 0) {
2738                 /* If metadata zone needs to be disabled because the
2739                  * volume was truncated, clear the bit and zero out
2740                  * the values that are no longer needed.
2741                  */
2742                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2743                         /* Disable metadata zone */
2744                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
2745
2746                         /* Zero out mount point values that are not required */
2747                         hfsmp->hfs_catalog_maxblks = 0;
2748                         hfsmp->hfs_hotfile_maxblks = 0;
2749                         hfsmp->hfs_hotfile_start = 0;
2750                         hfsmp->hfs_hotfile_end = 0;
2751                         hfsmp->hfs_hotfile_freeblks = 0;
2752                         hfsmp->hfs_metazone_start = 0;
2753                         hfsmp->hfs_metazone_end = 0;
2754                 }
2755
2756                 return;
2757         }
2758
2759         temp = zonesize - temp;  /* temp has extra space */
2760         filesize += temp / 3;
2761         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
2762
2763         hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
2764
2765         /* Convert to allocation blocks. */
2766         blk = zonesize / vcb->blockSize;
2767
2768         /* The default metadata zone location is at the start of volume. */
2769         hfsmp->hfs_metazone_start = 1;
2770         hfsmp->hfs_metazone_end = blk - 1;
2771
2772         /* The default hotfile area is at the end of the zone. */
2773         hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
2774         hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
2775         hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
2776 #if 0
2777         printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
2778         printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
2779         printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
2780 #endif
2781         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
2782 }
2783
2784
2785 static u_int32_t
2786 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
2787 {
2788         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
2789         int  lockflags;
2790         int  freeblocks;
2791
2792         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2793         freeblocks = MetaZoneFreeBlocks(vcb);
2794         hfs_systemfile_unlock(hfsmp, lockflags);
2795
2796         /* Minus Extents overflow file reserve. */
2797         freeblocks -=
2798                 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
2799         /* Minus catalog file reserve. */
2800         freeblocks -=
2801                 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
2802         if (freeblocks < 0)
2803                 freeblocks = 0;
2804
2805         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
2806 }
2807
2808 /*
2809  * Determine if a file is a "virtual" metadata file.
2810  * This includes journal and quota files.
2811  */
2812 int
2813 hfs_virtualmetafile(struct cnode *cp)
2814 {
2815         const char * filename;
2816
2817
2818         if (cp->c_parentcnid != kHFSRootFolderID)
2819                 return (0);
2820
2821         filename = (const char *)cp->c_desc.cd_nameptr;
2822         if (filename == NULL)
2823                 return (0);
2824
2825         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
2826             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
2827             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
2828             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
2829             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
2830                 return (1);
2831
2832         return (0);
2833 }
2834
2835
2836 //
2837 // Fire off a timed callback to sync the disk if the
2838 // volume is on ejectable media.
2839 //
2840  __private_extern__
2841 void
2842 hfs_sync_ejectable(struct hfsmount *hfsmp)
2843 {
2844         if (hfsmp->hfs_syncer)  {
2845                 clock_sec_t secs;
2846                 clock_usec_t usecs;
2847                 uint64_t now;
2848
2849                 clock_get_calendar_microtime(&secs, &usecs);
2850                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2851
2852                 if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
2853                         // if we have a sync scheduled but i/o is starting to pile up,
2854                         // don't call thread_call_enter_delayed() again because that
2855                         // will defer the sync.
2856                         return;
2857                 }
2858
2859                 if (hfsmp->hfs_sync_scheduled == 0) {
2860                         uint64_t deadline;
2861
2862                         hfsmp->hfs_last_sync_request_time = now;
2863
2864                         clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
2865
2866                         /*
2867                          * Increment hfs_sync_scheduled on the assumption that we're the
2868                          * first thread to schedule the timer.  If some other thread beat
2869                          * us, then we'll decrement it.  If we *were* the first to
2870                          * schedule the timer, then we need to keep track that the
2871                          * callback is waiting to complete.
2872                          */
2873                         OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2874                         if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
2875                                 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2876                         else
2877                                 OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2878                 }
2879         }
2880 }
2881
2882
2883 int
2884 hfs_start_transaction(struct hfsmount *hfsmp)
2885 {
2886         int ret, unlock_on_err=0;
2887         void * thread = current_thread();
2888
2889 #ifdef HFS_CHECK_LOCK_ORDER
2890         /*
2891          * You cannot start a transaction while holding a system
2892          * file lock. (unless the transaction is nested.)
2893          */
2894         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
2895                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
2896                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
2897                 }
2898                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
2899                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
2900                 }
2901                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
2902                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
2903                 }
2904         }
2905 #endif /* HFS_CHECK_LOCK_ORDER */
2906
2907         if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
2908                 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
2909                 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
2910                 unlock_on_err = 1;
2911         }
2912
2913         /* If a downgrade to read-only mount is in progress, no other
2914          * process than the downgrade process is allowed to modify
2915          * the file system.
2916          */
2917         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
2918                         (hfsmp->hfs_downgrading_proc != thread)) {
2919                 ret = EROFS;
2920                 goto out;
2921         }
2922
2923         if (hfsmp->jnl) {
2924                 ret = journal_start_transaction(hfsmp->jnl);
2925                 if (ret == 0) {
2926                         OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
2927                 }
2928         } else {
2929                 ret = 0;
2930         }
2931
2932 out:
2933         if (ret != 0 && unlock_on_err) {
2934                 hfs_unlock_global (hfsmp);
2935                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2936         }
2937
2938     return ret;
2939 }
2940
2941 int
2942 hfs_end_transaction(struct hfsmount *hfsmp)
2943 {
2944     int need_unlock=0, ret;
2945
2946     if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
2947             && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
2948             need_unlock = 1;
2949     }
2950
2951         if (hfsmp->jnl) {
2952                 ret = journal_end_transaction(hfsmp->jnl);
2953         } else {
2954                 ret = 0;
2955         }
2956
2957         if (need_unlock) {
2958                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2959                 hfs_unlock_global (hfsmp);
2960                 hfs_sync_ejectable(hfsmp);
2961         }
2962
2963     return ret;
2964 }
2965
2966
2967 /*
2968  * Flush the contents of the journal to the disk.
2969  *
2970  *  Input:
2971  *      wait_for_IO -
2972  *      If TRUE, wait to write in-memory journal to the disk
2973  *      consistently, and also wait to write all asynchronous
2974  *      metadata blocks to its corresponding locations
2975  *      consistently on the disk.  This means that the journal
2976  *      is empty at this point and does not contain any
2977  *      transactions.  This is overkill in normal scenarios
2978  *      but is useful whenever the metadata blocks are required
2979  *      to be consistent on-disk instead of just the journal
2980  *      being consistent; like before live verification
2981  *      and live volume resizing.
2982  *
2983  *      If FALSE, only wait to write in-memory journal to the
2984  *      disk consistently.  This means that the journal still
2985  *      contains uncommitted transactions and the file system
2986  *      metadata blocks in the journal transactions might be
2987  *      written asynchronously to the disk.  But there is no
2988  *      guarantee that they are written to the disk before
2989  *      returning to the caller.  Note that this option is
2990  *      sufficient for file system data integrity as it
2991  *      guarantees consistent journal content on the disk.
2992  */
2993 int
2994 hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
2995 {
2996         int ret;
2997
2998         /* Only peek at hfsmp->jnl while holding the global lock */
2999         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3000         if (hfsmp->jnl) {
3001                 ret = journal_flush(hfsmp->jnl, wait_for_IO);
3002         } else {
3003                 ret = 0;
3004         }
3005         hfs_unlock_global (hfsmp);
3006
3007         return ret;
3008 }
3009
3010
3011 /*
3012  * hfs_erase_unused_nodes
3013  *
3014  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3015  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3016  * zeroes to the unused nodes.
3017  *
3018  * How do we detect when a volume needs this repair?  We can't always be
3019  * certain.  If a volume was created after a certain date, then it may have
3020  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3021  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3022  * that means that the entire first clump must have been written to, which means
3023  * there shouldn't be unused and unwritten nodes in that first clump, and this
3024  * repair is not needed.
3025  *
3026  * We have defined a bit in the Volume Header's attributes to indicate when the
3027  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3028  * As will fsck_hfs when it repairs the unused nodes.
3029  */
3030 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3031 {
3032         int result;
3033         struct filefork *catalog;
3034         int lockflags;
3035
3036         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3037         {
3038                 /* This volume has already been checked and repaired. */
3039                 return 0;
3040         }
3041
3042         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3043         {
3044                 /* This volume is too old to have had the problem. */
3045                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3046                 return 0;
3047         }
3048
3049         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3050         if (catalog->ff_size > catalog->ff_clumpsize)
3051         {
3052                 /* The entire first clump must have been in use at some point. */
3053                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3054                 return 0;
3055         }
3056
3057         /*
3058          * If we get here, we need to zero out those unused nodes.
3059          *
3060          * We start a transaction and lock the catalog since we're going to be
3061          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3062          * do its writing via the journal, because that would be too much I/O
3063          * to fit in a transaction, and it's a pain to break it up into multiple
3064          * transactions.  (It behaves more like growing a B-tree would.)
3065          */
3066         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3067         result = hfs_start_transaction(hfsmp);
3068         if (result)
3069                 goto done;
3070         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3071         result = BTZeroUnusedNodes(catalog);
3072         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3073         hfs_systemfile_unlock(hfsmp, lockflags);
3074         hfs_end_transaction(hfsmp);
3075         if (result == 0)
3076                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3077         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3078
3079 done:
3080         return result;
3081 }
3082
3083
3084 extern time_t snapshot_timestamp;
3085
3086 int
3087 check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3088 {
3089         int tracked_error = 0, snapshot_error = 0;
3090
3091         if (vp == NULL) {
3092                 return 0;
3093         }
3094
3095         if (VTOC(vp)->c_bsdflags & UF_TRACKED) {
3096                 // the file has the tracked bit set, so send an event to the tracked-file handler
3097                 int error;
3098
3099                 // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
3100                 error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
3101                 if (error) {
3102                         if (error == EAGAIN) {
3103                                 printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
3104
3105                         } else if (error == EINTR) {
3106                                 // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
3107                                 tracked_error = EINTR;
3108                         }
3109                 }
3110         }
3111
3112         if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3113                 // the change time is within this epoch
3114                 int error;
3115
3116                 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3117                 if (error == EDEADLK) {
3118                         snapshot_error = 0;
3119                 } else if (error) {
3120                         if (error == EAGAIN) {
3121                                 printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3122                         } else if (error == EINTR) {
3123                                 // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3124                                 snapshot_error = EINTR;
3125                         }
3126                 }
3127         }
3128
3129         if (tracked_error) return tracked_error;
3130         if (snapshot_error) return snapshot_error;
3131
3132         return 0;
3133 }
3134
3135 int
3136 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3137 {
3138         int error;
3139
3140         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3141                 // there's nothing to do, it's not dataless
3142                 return 0;
3143         }
3144
3145         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3146         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3147         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3148                 error = 0;
3149         } else if (error) {
3150                 if (error == EAGAIN) {
3151                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3152                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3153                         return 0;
3154                 } else if (error == EINTR) {
3155                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3156                         return EINTR;
3157                 }
3158         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3159                 //
3160                 // if we're here, the dataless bit is still set on the file
3161                 // which means it didn't get handled.  we return an error
3162                 // but it's presently ignored by all callers of this function.
3163                 //
3164                 // XXXdbg - EDATANOTPRESENT is what we really need...
3165                 //
3166                 return EBADF;
3167         }
3168
3169         return error;
3170 }