livefiles_hfs_plugin/lf_hfs_vfsops.c

   1 /*  Copyright © 2017-2018 Apple Inc. All rights reserved.
   2  *
   3  *  lf_hfs_vfsops.c
   4  *  livefiles_hfs
   5  *
   6  *  Created by Or Haimovich on 18/3/18.
   7  */
   8
   9 #include "lf_hfs_common.h"
  10 #include <CommonCrypto/CommonDigest.h>
  11 #include <stdatomic.h>
  12 #include <sys/ioctl.h>
  13 #include <sys/mount.h>
  14 #include <sys/disk.h>
  15 #include <sys/stat.h>
  16 #include <stdlib.h>
  17 #include "lf_hfs_logger.h"
  18 #include "lf_hfs_mount.h"
  19 #include "lf_hfs.h"
  20 #include "lf_hfs_catalog.h"
  21 #include "lf_hfs_cnode.h"
  22 #include "lf_hfs_chash.h"
  23 #include "lf_hfs_format.h"
  24 #include "lf_hfs_locks.h"
  25 #include "lf_hfs_endian.h"
  26 #include "lf_hfs_locks.h"
  27 #include "lf_hfs_utils.h"
  28 #include "lf_hfs_raw_read_write.h"
  29 #include "lf_hfs_vfsutils.h"
  30 #include "lf_hfs_vfsops.h"
  31 #include "lf_hfs_volume_allocation.h"
  32 #include "lf_hfs_catalog.h"
  33 #include "lf_hfs_link.h"
  34 #include "lf_hfs_vnops.h"
  35 #include "lf_hfs_generic_buf.h"
  36 #include "lf_hfs_fsops_handler.h"
  37 #include "lf_hfs_journal.h"
  38 #include "lf_hfs_fileops_handler.h"
  39
  40 #include <spawn.h>
  41
  42 static void hfs_locks_destroy(struct hfsmount *hfsmp);
  43 static int  hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args);
  44
  45
  46 static int
  47 setup_posix_file_action_for_fsck(posix_spawn_file_actions_t *file_actions, int fd)
  48 {
  49     int error;
  50
  51     if (file_actions == NULL || fd < 0)
  52     {
  53         return EINVAL;
  54     }
  55
  56     error = posix_spawn_file_actions_init(file_actions);
  57     if (error)
  58     {
  59         goto out;
  60     }
  61
  62     error = posix_spawn_file_actions_addinherit_np(file_actions, 0);
  63     if (error)
  64     {
  65         goto out;
  66     }
  67
  68     error = posix_spawn_file_actions_addinherit_np(file_actions, 1);
  69     if (error)
  70     {
  71         goto out;
  72     }
  73
  74     error = posix_spawn_file_actions_addinherit_np(file_actions, 2);
  75     if (error)
  76     {
  77         goto out;
  78     }
  79
  80     error = posix_spawn_file_actions_addinherit_np(file_actions, fd);
  81
  82 out:
  83     return error;
  84 }
  85
  86 static int
  87 setup_spawnattr_for_fsck(posix_spawnattr_t *spawn_attr)
  88 {
  89     int error;
  90
  91     error = posix_spawnattr_init(spawn_attr);
  92     if (error)
  93     {
  94         goto out;
  95     }
  96     error = posix_spawnattr_setflags(spawn_attr, POSIX_SPAWN_CLOEXEC_DEFAULT);
  97
  98 out:
  99     return error;
 100 }
 101
 102
 103 // fsck_mount_and_replay: executed on fsck_hfs -quick
 104 // Try to mount, and if a journaled volume, play the journal.
 105 // Returned values:
 106 // OK if:
 107 // 1) On journaled volumes, the journal has been replayed and the dirty bit cleared.
 108 // 2) On non-journalled volumes, the dirty is cleared.
 109 // EINVAL if:
 110 // 1) On non-journalled volumes the dirty bit is set. Please run fsck_hfs to fix.
 111 // 2) On journalled volume, the replay failed. Try fsck_hfs.
 112 int fsck_mount_and_replay(int iFd) {
 113     int iErr = 0;
 114
 115     LFHFS_LOG(LEVEL_DEBUG, "fsck_mount_and_replay %d", iFd);
 116
 117     UVFSFileNode sRootNode;
 118
 119     iErr = LFHFS_Taste(iFd);
 120     if (iErr) {
 121         LFHFS_LOG(LEVEL_DEBUG, "LFHFS_Taste returned %d", iErr);
 122         return iErr;
 123     }
 124
 125     UVFSScanVolsRequest sScanVolsReq = {0};
 126     UVFSScanVolsReply sScanVolsReply = {0};
 127     iErr = LFHFS_ScanVols(iFd, &sScanVolsReq, &sScanVolsReply);
 128     if (iErr) {
 129         LFHFS_LOG(LEVEL_DEBUG, "LFHFS_ScanVol returned %d", iErr);
 130         return iErr;
 131     }
 132
 133     // Mount and replay journal if possible
 134     iErr = LFHFS_Mount(iFd, 0, 0, NULL, &sRootNode); // On journaled volumes, this replays the journal.
 135                                          // Non-journaled volumes fails to mount if dirty (Unmounted == 0).
 136     if (iErr) {
 137         LFHFS_LOG(LEVEL_DEBUG, "fsck_mount_and_replay: LFHFS_Mount returned %d", iErr);
 138         return EINVAL;
 139     }
 140
 141     LFHFS_Unmount (sRootNode, UVFSUnmountHintNone);
 142
 143     return iErr;
 144 }
 145
 146 #define PATH_TO_FSCK "/System/Library/Filesystems/hfs.fs/Contents/Resources/fsck_hfs"
 147
 148 int
 149 fsck_hfs(int fd, check_flags_t how)
 150 {
 151     pid_t child;
 152     pid_t child_found;
 153     int child_status;
 154     extern char **environ;
 155     char fdescfs_path[24];
 156     posix_spawn_file_actions_t file_actions;
 157     int result;
 158     posix_spawnattr_t spawn_attr;
 159
 160     /*
 161      * XXXJRT There are dragons related to how the journal is replayed in
 162      * fsck_hfs.  Until we can sort out the mess, disable running fsck_hfs.
 163      * <rdar://problem/47262605> USB: Re-enable Detonator fsck_hfs
 164      */
 165     if (how == QUICK_CHECK) {
 166         if (fsck_mount_and_replay(fd) == 0) {
 167             return(0);
 168         }
 169     }
 170
 171     LFHFS_LOG(LEVEL_DEFAULT, "fsck_hfs - fsck start for %d", fd);
 172     snprintf(fdescfs_path, sizeof(fdescfs_path), "/dev/fd/%d", fd);
 173     const char * argv[] = {"fsck_hfs", "-q", fdescfs_path, NULL};
 174
 175     switch (how)
 176     {
 177         case QUICK_CHECK:
 178             /* Do nothing, already setup for this */
 179             break;
 180         case CHECK:
 181             argv[1] = "-n";
 182             break;
 183         case CHECK_AND_REPAIR:
 184             argv[1] = "-y";
 185             break;
 186         default:
 187             LFHFS_LOG(LEVEL_ERROR, "Invalid how flags for the check, ignoring; %d", how);
 188             break;
 189     }
 190
 191     LFHFS_LOG(LEVEL_DEBUG, "fsck_hfs params: %s %s %s", argv[1], argv[2], argv[3]);
 192     result = setup_posix_file_action_for_fsck(&file_actions, fd);
 193     if (result)
 194     {
 195         goto out;
 196     }
 197
 198     result = setup_spawnattr_for_fsck(&spawn_attr);
 199     if (result)
 200     {
 201         posix_spawn_file_actions_destroy(&file_actions);
 202         goto out;
 203     }
 204
 205     result = posix_spawn(&child,
 206                          PATH_TO_FSCK,
 207                          &file_actions,
 208                          &spawn_attr,
 209                          (char * const *)argv,
 210                          environ);
 211
 212     posix_spawn_file_actions_destroy(&file_actions);
 213     posix_spawnattr_destroy(&spawn_attr);
 214     if (result)
 215     {
 216         LFHFS_LOG(LEVEL_ERROR, "posix_spawn fsck_hfs: error=%d", result);
 217         goto out;
 218     }
 219
 220     // Wait for child to finish, XXXab: revisit, need sensible timeout?
 221     do {
 222         child_found = waitpid(child, &child_status, 0);
 223     } while (child_found == -1 && errno == EINTR);
 224
 225     if (child_found == -1)
 226     {
 227         result = errno;
 228         LFHFS_LOG(LEVEL_ERROR, "waitpid fsck_hfs: errno=%d", result);
 229         goto out;
 230     }
 231
 232     if (WIFEXITED(child_status))
 233     {
 234         result = WEXITSTATUS(child_status);
 235         if (result)
 236         {
 237             LFHFS_LOG(LEVEL_ERROR, "fsck_hfs: exited with status %d", result);
 238             result = EILSEQ;
 239         } else {
 240             LFHFS_LOG(LEVEL_ERROR, "fsck_hfs: exited with status %d", result);
 241         }
 242     }
 243     else
 244     {
 245         result = WTERMSIG(child_status);
 246         LFHFS_LOG(LEVEL_ERROR, "fsck_hfs: terminated by signal %d", result);
 247         result = EINTR;
 248     }
 249
 250 out:
 251     LFHFS_LOG(LEVEL_DEFAULT, "fsck_hfs - fsck finish for %d with err %d", fd, result);
 252     return result;
 253 }
 254
 255 int
 256 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data)
 257 {
 258     struct hfsmount *hfsmp  = NULL;
 259     int retval              = 0;
 260     if ( devvp == NULL )
 261     {
 262         retval = EINVAL;
 263         goto fail;
 264     }
 265
 266     retval = hfs_mountfs(devvp, mp, NULL);
 267     if (retval)
 268     {
 269         // ENOTSUP is for regular HFS -> just fail
 270         if (retval != ENOTSUP)
 271         {
 272             //Failed during mount, try to run fsck to fix and try mount again
 273             retval = fsck_hfs(devvp->psFSRecord->iFD, CHECK_AND_REPAIR);
 274
 275             // fsck succeeded, try to mount
 276             if (!retval) {
 277                 retval = hfs_mountfs(devvp, mp, NULL);
 278                 if (!retval)
 279                     goto mount_passed;
 280             }
 281         }
 282
 283         LFHFS_LOG(LEVEL_ERROR, "hfs_mount: hfs_mountfs returned error=%d\n", retval);
 284         goto fail;
 285     }
 286 mount_passed:
 287     /* After hfs_mountfs succeeds, we should have valid hfsmp */
 288     hfsmp = VFSTOHFS(mp);
 289
 290     /* Set up the maximum defrag file size */
 291     hfsmp->hfs_defrag_max = HFS_INITIAL_DEFRAG_SIZE;
 292
 293     if (!data)
 294     {
 295         // Root mount
 296         hfsmp->hfs_uid          = UNKNOWNUID;
 297         hfsmp->hfs_gid          = UNKNOWNGID;
 298         hfsmp->hfs_dir_mask     = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 299         hfsmp->hfs_file_mask    = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 300
 301         /* Establish the free block reserve. */
 302         hfsmp->reserveBlocks = (uint32_t) ((u_int64_t)hfsmp->totalBlocks * HFS_MINFREE) / 100;
 303         hfsmp->reserveBlocks = MIN(hfsmp->reserveBlocks, HFS_MAXRESERVE / hfsmp->blockSize);
 304     }
 305
 306 fail:
 307     return (retval);
 308 }
 309
 310 static int hfs_InitialMount(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, HFSPlusVolumeHeader **vhp, off_t *embeddedOffset, struct hfsmount **hfsmp, bool bFailForDirty)
 311 {
 312     int retval                      = 0;
 313     HFSMasterDirectoryBlock *mdbp   = NULL;
 314     void* pvBuffer                  = NULL;
 315     int mntwrapper;
 316     u_int64_t disksize;
 317     u_int64_t log_blkcnt;
 318     u_int32_t log_blksize;
 319     u_int32_t phys_blksize;
 320     u_int32_t minblksize;
 321     u_int32_t iswritable;
 322     u_int64_t mdb_offset;
 323     u_int32_t device_features = 0;
 324
 325     mntwrapper = 0;
 326     minblksize = kHFSBlockSize;
 327     *hfsmp = NULL;
 328
 329     /* Get the logical block size (treated as physical block size everywhere) */
 330     if (ioctl(devvp->psFSRecord->iFD, DKIOCGETBLOCKSIZE, &log_blksize))
 331     {
 332         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
 333         retval = ENXIO;
 334         goto error_exit;
 335     }
 336     if (log_blksize == 0 || log_blksize > 1024*1024*1024)
 337     {
 338         LFHFS_LOG(LEVEL_ERROR, "hfs_mountfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
 339         retval = ENXIO;
 340         goto error_exit;
 341     }
 342
 343     /* Get the physical block size. */
 344     if (ioctl(devvp->psFSRecord->iFD, DKIOCGETPHYSICALBLOCKSIZE, &phys_blksize))
 345     {
 346         if ((retval != ENOTSUP) && (retval != ENOTTY))
 347         {
 348             LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
 349             retval = ENXIO;
 350             goto error_exit;
 351         }
 352         /* If device does not support this ioctl, assume that physical
 353          * block size is same as logical block size
 354          */
 355         phys_blksize = log_blksize;
 356     }
 357
 358     if (phys_blksize == 0 || phys_blksize > MAXBSIZE)
 359     {
 360         LFHFS_LOG(LEVEL_ERROR, "hfs_mountfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
 361         retval = ENXIO;
 362         goto error_exit;
 363     }
 364
 365         /* Don't let phys_blksize be smaller than the logical  */
 366         if (phys_blksize < log_blksize) {
 367                 /*
 368                  * In the off chance that the phys_blksize is SMALLER than the logical
 369                  * then don't let that happen.  Pretend that the PHYSICALBLOCKSIZE
 370                  * ioctl was not supported.
 371                  */
 372                  phys_blksize = log_blksize;
 373         }
 374
 375     /* Get the number of physical blocks. */
 376     if (ioctl(devvp->psFSRecord->iFD, DKIOCGETBLOCKCOUNT, &log_blkcnt))
 377     {
 378         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
 379         retval = ENXIO;
 380         goto error_exit;
 381     }
 382
 383     /* Compute an accurate disk size (i.e. within 512 bytes) */
 384     disksize = (u_int64_t)log_blkcnt * (u_int64_t)log_blksize;
 385
 386     /*
 387      * At this point:
 388      *   minblksize is the minimum physical block size
 389      *   log_blksize has our preferred physical block size
 390      *   log_blkcnt has the total number of physical blocks
 391      */
 392     mdbp = hfs_mallocz(kMDBSize);
 393     if (mdbp == NULL)
 394     {
 395         retval = ENOMEM;
 396         goto error_exit;
 397     }
 398
 399     pvBuffer = hfs_malloc(phys_blksize);
 400     if (pvBuffer == NULL)
 401     {
 402         retval = ENOMEM;
 403         goto error_exit;
 404     }
 405
 406     mdb_offset = (uint64_t) HFS_PRI_SECTOR(log_blksize);
 407     retval = raw_readwrite_read_mount( devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), phys_blksize, pvBuffer, phys_blksize, NULL, NULL);
 408     if (retval)
 409     {
 410         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: raw_readwrite_read_mount failed with %d\n", retval);
 411         goto error_exit;
 412     }
 413
 414     bcopy(pvBuffer + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
 415     hfs_free(pvBuffer);
 416     pvBuffer = NULL;
 417
 418     *hfsmp = hfs_malloc(sizeof(struct hfsmount));
 419     if (*hfsmp == NULL)
 420     {
 421         retval = ENOMEM;
 422         goto error_exit;
 423     }
 424     memset( *hfsmp, 0, sizeof(struct hfsmount) );
 425
 426     //Copy read only flag
 427     if (mp->mnt_flag == MNT_RDONLY) (*hfsmp)->hfs_flags = HFS_READ_ONLY;
 428
 429     hfs_chashinit_finish(*hfsmp);
 430
 431     /* Init the ID lookup hashtable */
 432     hfs_idhash_init (*hfsmp);
 433
 434     /*
 435      * See if the disk supports unmap (trim).
 436      *
 437      * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
 438      * returned by vfs_ioattr.  We need to call VNOP_IOCTL ourselves.
 439      */
 440     if (ioctl(devvp->psFSRecord->iFD, DKIOCGETFEATURES, &device_features) == 0)
 441     {
 442         if (device_features & DK_FEATURE_UNMAP)
 443         {
 444             (*hfsmp)->hfs_flags |= HFS_UNMAP;
 445         }
 446
 447         if(device_features & DK_FEATURE_BARRIER)
 448         {
 449             (*hfsmp)->hfs_flags |= HFS_FEATURE_BARRIER;
 450         }
 451     }
 452
 453     /*
 454      *  Init the volume information structure
 455      */
 456     lf_lck_mtx_init(&(*hfsmp)->hfs_mutex);
 457     lf_lck_mtx_init(&(*hfsmp)->sync_mutex);
 458     lf_lck_rw_init(&(*hfsmp)->hfs_global_lock);
 459     lf_lck_spin_init(&(*hfsmp)->vcbFreeExtLock);
 460
 461     if (mp)
 462     {
 463         mp->psHfsmount = (*hfsmp);
 464     }
 465
 466     (*hfsmp)->hfs_mp = mp;            /* Make VFSTOHFS work */
 467     (*hfsmp)->hfs_raw_dev = 0; //vnode_specrdev(devvp);
 468     (*hfsmp)->hfs_devvp = devvp;
 469     (*hfsmp)->hfs_logical_block_size = log_blksize;
 470     (*hfsmp)->hfs_logical_block_count = log_blkcnt;
 471     (*hfsmp)->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
 472     (*hfsmp)->hfs_physical_block_size = phys_blksize;
 473     (*hfsmp)->hfs_log_per_phys = (phys_blksize / log_blksize);
 474     (*hfsmp)->hfs_flags |= HFS_WRITEABLE_MEDIA;
 475
 476     if (mp && (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS))
 477     {
 478         (*hfsmp)->hfs_flags |= HFS_UNKNOWN_PERMS;
 479     }
 480
 481     /* MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
 482     if (mp && (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS))
 483     {
 484         (*hfsmp)->hfs_uid = UNKNOWNUID;
 485         (*hfsmp)->hfs_gid = UNKNOWNGID;
 486         //        vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);            /* tell the VFS */
 487         (*hfsmp)->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;        /* 0777: rwx---rwx */
 488         (*hfsmp)->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;    /* 0666: no --x by default? */
 489     }
 490
 491     /* Find out if disk media is writable. */
 492     if (ioctl(devvp->psFSRecord->iFD, DKIOCISWRITABLE, &iswritable) == 0)
 493     {
 494         if (iswritable)
 495         {
 496             (*hfsmp)->hfs_flags |= HFS_WRITEABLE_MEDIA;
 497         }
 498         else
 499         {
 500             (*hfsmp)->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
 501         }
 502     }
 503
 504     // Reservations
 505     rl_init(&(*hfsmp)->hfs_reserved_ranges[0]);
 506     rl_init(&(*hfsmp)->hfs_reserved_ranges[1]);
 507
 508     // record the current time at which we're mounting this volume
 509     struct timeval tv;
 510     microuptime(&tv);
 511     (*hfsmp)->hfs_mount_time = tv.tv_sec;
 512
 513     /* Mount an HFS Plus disk */
 514     int   jnl_disable = 0;
 515
 516     /* Mount a standard HFS disk */
 517     if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) && (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord)))
 518     {
 519         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: Not supporting standard HFS\n");
 520         retval = ENOTSUP;
 521         goto error_exit;
 522     }
 523     /* Get the embedded Volume Header */
 524     else if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord)
 525     {
 526         *embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
 527         *embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) * (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
 528
 529         /*
 530          * If the embedded volume doesn't start on a block
 531          * boundary, then switch the device to a 512-byte
 532          * block size so everything will line up on a block
 533          * boundary.
 534          */
 535         if ((*embeddedOffset % log_blksize) != 0)
 536         {
 537             // LF not support DKIOCSETBLOCKSIZE, return error.
 538             LFHFS_LOG(LEVEL_DEFAULT, "hfs_mountfs: embedded volume offset not a multiple of physical block size (%d); switching to 512\n", log_blksize);
 539             retval = ENXIO;
 540             goto error_exit;
 541         }
 542
 543         disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) * (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
 544
 545         (*hfsmp)->hfs_logical_block_count = disksize / log_blksize;
 546
 547         (*hfsmp)->hfs_logical_bytes = (uint64_t) (*hfsmp)->hfs_logical_block_count * (uint64_t) (*hfsmp)->hfs_logical_block_size;
 548
 549         mdb_offset = (uint64_t)((*embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
 550
 551         pvBuffer = hfs_malloc(phys_blksize);
 552         if (pvBuffer == NULL)
 553         {
 554             retval = ENOMEM;
 555             goto error_exit;
 556         }
 557
 558         retval = raw_readwrite_read_mount( devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), phys_blksize, pvBuffer, phys_blksize, NULL, NULL);
 559         if (retval)
 560         {
 561             LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: raw_readwrite_read_mount (2) failed with %d\n", retval);
 562             goto error_exit;
 563         }
 564
 565         bcopy(pvBuffer + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
 566         *vhp = (HFSPlusVolumeHeader*) mdbp;
 567         hfs_free(pvBuffer);
 568         pvBuffer = NULL;
 569     }
 570     else
 571     { /* pure HFS+ */
 572         *embeddedOffset = 0;
 573         *vhp = (HFSPlusVolumeHeader*) mdbp;
 574     }
 575
 576     retval = hfs_ValidateHFSPlusVolumeHeader(*hfsmp, *vhp);
 577     if (retval)
 578         goto error_exit;
 579
 580     /*
 581      * If allocation block size is less than the physical block size,
 582      * invalidate the buffer read in using native physical block size
 583      * to ensure data consistency.
 584      *
 585      * HFS Plus reserves one allocation block for the Volume Header.
 586      * If the physical size is larger, then when we read the volume header,
 587      * we will also end up reading in the next allocation block(s).
 588      * If those other allocation block(s) is/are modified, and then the volume
 589      * header is modified, the write of the volume header's buffer will write
 590      * out the old contents of the other allocation blocks.
 591      *
 592      * We assume that the physical block size is same as logical block size.
 593      * The physical block size value is used to round down the offsets for
 594      * reading and writing the primary and alternate volume headers.
 595      *
 596      * The same logic is also in hfs_MountHFSPlusVolume to ensure that
 597      * hfs_mountfs, hfs_MountHFSPlusVolume and later are doing the I/Os
 598      * using same block size.
 599      */
 600     if (SWAP_BE32((*vhp)->blockSize) < (*hfsmp)->hfs_physical_block_size)
 601     {
 602         phys_blksize = (*hfsmp)->hfs_logical_block_size;
 603         (*hfsmp)->hfs_physical_block_size = (*hfsmp)->hfs_logical_block_size;
 604         (*hfsmp)->hfs_log_per_phys = 1;
 605
 606         if (retval)
 607             goto error_exit;
 608     }
 609
 610     /*
 611      * On inconsistent disks, do not allow read-write mount
 612      * unless it is the boot volume being mounted.  We also
 613      * always want to replay the journal if the journal_replay_only
 614      * flag is set because that will (most likely) get the
 615      * disk into a consistent state before fsck_hfs starts
 616      * looking at it.
 617      */
 618     if ( (mp && !(mp->mnt_flag & MNT_ROOTFS))
 619         && (SWAP_BE32((*vhp)->attributes) & kHFSVolumeInconsistentMask)
 620         && !((*hfsmp)->hfs_flags & HFS_READ_ONLY))
 621     {
 622         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: failed to mount non-root inconsistent disk\n");
 623         retval = EINVAL;
 624         goto error_exit;
 625     }
 626
 627     (*hfsmp)->jnl = NULL;
 628     (*hfsmp)->jvp = NULL;
 629     if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) && args->journal_disable)
 630     {
 631         jnl_disable = 1;
 632     }
 633
 634     /*
 635      * We only initialize the journal here if the last person
 636      * to mount this volume was journaling aware.  Otherwise
 637      * we delay journal initialization until later at the end
 638      * of hfs_MountHFSPlusVolume() because the last person who
 639      * mounted it could have messed things up behind our back
 640      * (so we need to go find the .journal file, make sure it's
 641      * the right size, re-sync up if it was moved, etc).
 642      */
 643     uint32_t lastMountedVersion = SWAP_BE32((*vhp)->lastMountedVersion);
 644     uint32_t attributes         = SWAP_BE32((*vhp)->attributes);
 645     if (   (lastMountedVersion == kHFSJMountVersion) &&
 646         (attributes & kHFSVolumeJournaledMask)    &&
 647         !jnl_disable)
 648     {
 649
 650         // if we're able to init the journal, mark the mount
 651         // point as journaled.
 652         if ((retval = hfs_early_journal_init(*hfsmp, *vhp, args, *embeddedOffset, mdb_offset, mdbp)) != 0)
 653         {
 654             if (retval == EROFS)
 655             {
 656                 // EROFS is a special error code that means the volume has an external
 657                 // journal which we couldn't find.  in that case we do not want to
 658                 // rewrite the volume header - we'll just refuse to mount the volume.
 659                 LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
 660                 retval = EINVAL;
 661                 goto error_exit;
 662             }
 663
 664             // if the journal failed to open, then set the lastMountedVersion
 665             // to be "FSK!" which fsck_hfs will see and force the fsck instead
 666             // of just bailing out because the volume is journaled.
 667             LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
 668             HFSPlusVolumeHeader *jvhp;
 669
 670             (*hfsmp)->hfs_flags |= HFS_NEED_JNL_RESET;
 671
 672             if (mdb_offset == 0)
 673             {
 674                 mdb_offset = (uint64_t)((*embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
 675             }
 676
 677             pvBuffer = hfs_malloc(phys_blksize);
 678             if (pvBuffer == NULL)
 679             {
 680                 retval = ENOMEM;
 681                 goto error_exit;
 682             }
 683
 684             retval = raw_readwrite_read_mount( devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (*hfsmp)->hfs_log_per_phys), phys_blksize, pvBuffer, phys_blksize, NULL, NULL);
 685             if (retval)
 686             {
 687                 LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: raw_readwrite_read_mount (3) failed with %d\n", retval);
 688                 goto error_exit;
 689             }
 690
 691             jvhp = (HFSPlusVolumeHeader *)(pvBuffer + HFS_PRI_OFFSET(phys_blksize));
 692
 693             if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord)
 694             {
 695                 LFHFS_LOG(LEVEL_DEFAULT, "hfs_mountfs: Journal replay fail.  Writing lastMountVersion as FSK!\n");
 696
 697                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 698                 retval = raw_readwrite_write_mount( devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (*hfsmp)->hfs_log_per_phys), phys_blksize, pvBuffer, phys_blksize, NULL, NULL );
 699                 if (retval)
 700                 {
 701                     LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: raw_readwrite_write_mount (1) failed with %d\n", retval);
 702                     goto error_exit;
 703                 }
 704                 hfs_free(pvBuffer);
 705                 pvBuffer = NULL;
 706             }
 707
 708             LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
 709             retval = EINVAL;
 710             goto error_exit;
 711         }
 712     }
 713
 714     retval = hfs_MountHFSPlusVolume(*hfsmp, *vhp, *embeddedOffset, disksize, bFailForDirty);
 715     /*
 716      * If the backend didn't like our physical blocksize
 717      * then retry with physical blocksize of 512.
 718      */
 719     if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize))
 720     {
 721         // LF not support DKIOCSETBLOCKSIZE, return error.
 722         LFHFS_LOG(LEVEL_DEFAULT, "hfs_mountfs: could not use physical block size (%d).\n", log_blksize);
 723         goto error_exit;
 724     }
 725     else if ( retval )
 726     {
 727         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_MountHFSPlusVolume encountered failure %d \n", retval);
 728         goto error_exit;
 729     }
 730
 731     return (retval);
 732
 733 error_exit:
 734     if (pvBuffer)
 735         hfs_free(pvBuffer);
 736
 737     hfs_free(mdbp);
 738
 739     if (*hfsmp)
 740     {
 741         hfs_locks_destroy(*hfsmp);
 742         hfs_delete_chash(*hfsmp);
 743         hfs_idhash_destroy (*hfsmp);
 744
 745         hfs_free(*hfsmp);
 746         *hfsmp = NULL;
 747     }
 748     return (retval);
 749 }
 750
 751
 752 int hfs_ScanVolGetVolName(int iFd, char* pcVolumeName)
 753 {
 754     int retval = 0;
 755
 756     HFSPlusVolumeHeader *vhp;
 757     off_t embeddedOffset;
 758     struct hfsmount *hfsmp;
 759     struct mount* psMount            = hfs_mallocz(sizeof(struct mount));
 760     struct vnode* psDevVnode         = hfs_mallocz(sizeof(struct vnode));
 761     struct cnode* psDevCnode         = hfs_mallocz(sizeof(struct cnode));
 762     struct filefork* psDevFileFork   = hfs_mallocz(sizeof(struct filefork));
 763     FileSystemRecord_s *psFSRecord   = hfs_mallocz(sizeof(FileSystemRecord_s));
 764
 765     if ( psMount == NULL || psDevVnode == NULL || psDevCnode == NULL || psDevFileFork == NULL || psFSRecord == NULL )
 766     {
 767         retval = ENOMEM;
 768         LFHFS_LOG(LEVEL_ERROR, "hfs_ScanVolGetVolName: failed to malloc initial system files\n");
 769         goto exit;
 770     }
 771
 772     psFSRecord->iFD             = iFd;
 773     psDevVnode->psFSRecord      = psFSRecord;
 774     psDevVnode->sFSParams.vnfs_marksystem = 1;
 775     psDevVnode->bIsMountVnode   = true;
 776
 777     // Initializing inputs for hfs_mount
 778     psDevFileFork->ff_data.cf_blocks                = 3;
 779     psDevFileFork->ff_data.cf_extents[0].blockCount = 1;
 780     psDevFileFork->ff_data.cf_extents[0].startBlock = 0;
 781
 782     psDevVnode->sFSParams.vnfs_fsnode   = psDevCnode;
 783     psDevCnode->c_vp                    = psDevVnode;
 784     psDevVnode->is_rsrc                 = false;
 785     psDevCnode->c_datafork              = psDevFileFork;
 786     psDevVnode->sFSParams.vnfs_mp       = psMount;
 787
 788     retval = hfs_InitialMount(psDevVnode, psMount, 0, &vhp, &embeddedOffset, &hfsmp, false);
 789
 790     if (retval)
 791     {
 792         goto exit;
 793     }
 794     else
 795     {
 796         strlcpy(pcVolumeName, (char*) hfsmp->vcbVN, UVFS_SCANVOLS_VOLNAME_MAX);
 797     }
 798
 799     if (vhp) free(vhp);
 800     if (hfsmp)
 801     {
 802         if (hfsmp->jnl) {
 803             journal_release(hfsmp->jnl);
 804             hfsmp->jnl = NULL;
 805         }
 806
 807         hfsUnmount(hfsmp);
 808
 809         hfs_locks_destroy(hfsmp);
 810         hfs_delete_chash(hfsmp);
 811         hfs_idhash_destroy (hfsmp);
 812
 813         hfs_free(hfsmp);
 814         hfsmp = NULL;
 815     }
 816
 817 exit:
 818     if (retval) {
 819         LFHFS_LOG(LEVEL_ERROR, "hfs_ScanVolGetVolName: failed with error %d, returning empty name and no error\n",retval);
 820         pcVolumeName[0] = '\0';
 821     }
 822
 823     if (psMount) free (psMount);
 824     if (psDevVnode) free (psDevVnode);
 825     if (psDevCnode) free (psDevCnode);
 826     if (psDevFileFork) free (psDevFileFork);
 827     if (psFSRecord) free (psFSRecord);
 828
 829     return 0;
 830 }
 831
 832 /*
 833  * Common code for mount and mountroot
 834  */
 835 static int
 836 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args)
 837 {
 838     int retval = 0;
 839
 840     HFSPlusVolumeHeader *vhp;
 841     off_t embeddedOffset;
 842     struct hfsmount *hfsmp;
 843     retval = hfs_InitialMount(devvp, mp, args, &vhp, &embeddedOffset, &hfsmp, true);
 844     if ( retval )
 845     {
 846         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_InitialMount encountered failure %d \n", retval);
 847         //No need to go to error_exit, since everything got reset at the Initial Mount
 848         return retval;
 849     }
 850
 851     retval = hfs_CollectBtreeStats(hfsmp, vhp, embeddedOffset, args);
 852     free(vhp);
 853     vhp = NULL;
 854     if ( retval )
 855     {
 856         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_CollectBtreeStats encountered failure %d \n", retval);
 857         goto error_exit;
 858     }
 859
 860     // save off a snapshot of the mtime from the previous mount
 861     // (for matador).
 862     hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
 863
 864     if ( retval )
 865     {
 866         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: encountered failure %d \n", retval);
 867         goto error_exit;
 868     }
 869
 870     LFHFS_LOG(LEVEL_DEFAULT, "hfs_mountfs: mounted %s on device %s\n", (hfsmp->vcbVN[0] ? (const char*) hfsmp->vcbVN : "unknown"), "unknown device");
 871
 872     hfs_flushvolumeheader(hfsmp, 0);
 873
 874     return (0);
 875
 876 error_exit:
 877     if (vhp) free(vhp);
 878
 879     if (hfsmp)
 880     {
 881         hfsUnmount(hfsmp);
 882
 883         hfs_locks_destroy(hfsmp);
 884         hfs_delete_chash(hfsmp);
 885         hfs_idhash_destroy (hfsmp);
 886
 887         hfs_free(hfsmp);
 888         hfsmp = NULL;
 889     }
 890     return (retval);
 891 }
 892
 893 /*
 894  * Destroy all locks, mutexes and spinlocks in hfsmp on unmount or failed mount
 895  */
 896 static void
 897 hfs_locks_destroy(struct hfsmount *hfsmp)
 898 {
 899
 900     lf_lck_mtx_destroy(&hfsmp->hfs_mutex);
 901     lf_lck_mtx_destroy(&hfsmp->sync_mutex);
 902     lf_lck_rw_destroy(&hfsmp->hfs_global_lock);
 903     lf_lck_spin_destroy(&hfsmp->vcbFreeExtLock);
 904
 905     return;
 906 }
 907
 908
 909 /*
 910  *  Flush any dirty in-memory mount data to the on-disk
 911  *  volume header.
 912  *
 913  *  Note: the on-disk volume signature is intentionally
 914  *  not flushed since the on-disk "H+" and "HX" signatures
 915  *  are always stored in-memory as "H+".
 916  */
 917 int
 918 hfs_flushvolumeheader(struct hfsmount *hfsmp, hfs_flush_volume_header_options_t options)
 919 {
 920     int retval = 0;
 921
 922     ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 923     bool critical = false;
 924     daddr64_t avh_sector;
 925     bool altflush = ISSET(options, HFS_FVH_WRITE_ALT);
 926
 927     void         *pvVolHdrData  = NULL;
 928     GenericLFBuf *psVolHdrBuf   = NULL;
 929     void         *pvVolHdr2Data = NULL;
 930     GenericLFBuf *psVolHdr2Buf  = NULL;
 931     void         *pvAltHdrData  = NULL;
 932     GenericLFBuf *psAltHdrBuf   = NULL;
 933
 934
 935     if (ISSET(options, HFS_FVH_FLUSH_IF_DIRTY) && !hfs_header_needs_flushing(hfsmp)) {
 936         return 0;
 937     }
 938
 939     if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 940         return 0;
 941     }
 942
 943     if (options & HFS_FVH_MARK_UNMOUNT) {
 944         HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
 945     } else {
 946         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 947     }
 948
 949     daddr64_t priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 950
 951     if (!(options & HFS_FVH_SKIP_TRANSACTION)) {
 952         if (hfs_start_transaction(hfsmp) != 0) {
 953             return EINVAL;
 954         }
 955     }
 956
 957     psVolHdrBuf = lf_hfs_generic_buf_allocate(hfsmp->hfs_devvp,
 958                                            HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
 959                                            hfsmp->hfs_physical_block_size, GEN_BUF_PHY_BLOCK);
 960     if (psVolHdrBuf == NULL) {
 961         retval = ENOMEM;
 962         goto err_exit;
 963     }
 964     pvVolHdrData = psVolHdrBuf->pvData;
 965
 966     retval = lf_hfs_generic_buf_read(psVolHdrBuf);
 967     if (retval) {
 968         LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN);
 969         goto err_exit;
 970     }
 971
 972     HFSPlusVolumeHeader* volumeHeader = (HFSPlusVolumeHeader *)(pvVolHdrData + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 973
 974     /*
 975      * Sanity check what we just read.  If it's bad, try the alternate instead.
 976      */
 977     u_int16_t signature  = SWAP_BE16 (volumeHeader->signature);
 978     u_int16_t hfsversion = SWAP_BE16 (volumeHeader->version);
 979
 980     if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
 981         (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
 982         (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize))
 983     {
 984         LFHFS_LOG(LEVEL_DEFAULT, "hfs_flushvolumeheader: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d\n", vcb->vcbVN, signature, hfsversion, SWAP_BE32 (volumeHeader->blockSize));
 985         hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
 986
 987         /* Almost always we read AVH relative to the partition size */
 988         avh_sector = hfsmp->hfs_partition_avh_sector;
 989
 990         if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)
 991         {
 992             /*
 993              * The two altVH offsets do not match --- which means that a smaller file
 994              * system exists in a larger partition.  Verify that we have the correct
 995              * alternate volume header sector as per the current parititon size.
 996              * The GPT device that we are mounted on top could have changed sizes
 997              * without us knowing.
 998              *
 999              * We're in a transaction, so it's safe to modify the partition_avh_sector
1000              * field if necessary.
1001              */
1002
1003             uint64_t sector_count = 0;
1004
1005             /* Get underlying device block count */
1006             retval = ioctl(hfsmp->hfs_devvp->psFSRecord->iFD, DKIOCGETBLOCKCOUNT, &sector_count);
1007             if (retval)
1008             {
1009                 LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d getting block count (%s) \n", retval, vcb->vcbVN);
1010                 retval = ENXIO;
1011                 goto err_exit;
1012             }
1013
1014             /* Partition size was changed without our knowledge */
1015             if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count)
1016             {
1017                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count);
1018                 /* Note: hfs_fs_avh_sector will remain unchanged */
1019                 LFHFS_LOG(LEVEL_DEFAULT, "hfs_flushvolumeheader: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n", hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
1020
1021                 /*
1022                  * We just updated the offset for AVH relative to
1023                  * the partition size, so the content of that AVH
1024                  * will be invalid.  But since we are also maintaining
1025                  * a valid AVH relative to the file system size, we
1026                  * can read it since primary VH and partition AVH
1027                  * are not valid.
1028                  */
1029                 avh_sector = hfsmp->hfs_fs_avh_sector;
1030             }
1031         }
1032
1033         LFHFS_LOG(LEVEL_DEFAULT, "hfs_flushvolumeheader: trying alternate (for %s) avh_sector=%qu\n", (avh_sector == hfsmp->hfs_fs_avh_sector) ? "file system" : "partition", avh_sector);
1034
1035         if (avh_sector)
1036         {
1037             psAltHdrBuf = lf_hfs_generic_buf_allocate(hfsmp->hfs_devvp,
1038                                                       HFS_PHYSBLK_ROUNDDOWN(avh_sector, hfsmp->hfs_log_per_phys),
1039                                                       hfsmp->hfs_physical_block_size, GEN_BUF_PHY_BLOCK);
1040             if (psAltHdrBuf == NULL) {
1041                 retval = ENOMEM;
1042                 goto err_exit;
1043             }
1044             pvAltHdrData = psAltHdrBuf->pvData;
1045
1046             retval = lf_hfs_generic_buf_read(psAltHdrBuf);
1047
1048             if (retval)
1049             {
1050                 LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d reading alternate VH blk (vol=%s)\n", retval, vcb->vcbVN);
1051                 goto err_exit;
1052             }
1053
1054             HFSPlusVolumeHeader * altVH = (HFSPlusVolumeHeader *)(pvAltHdrData +  HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
1055             signature  = SWAP_BE16(altVH->signature);
1056             hfsversion = SWAP_BE16(altVH->version);
1057
1058             if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
1059                 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
1060                 (SWAP_BE32(altVH->blockSize) != vcb->blockSize))
1061             {
1062                 LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n", vcb->vcbVN, signature, hfsversion, SWAP_BE32(altVH->blockSize));
1063                 retval = EIO;
1064                 goto err_exit;
1065             }
1066
1067             /* The alternate is plausible, so use it. */
1068             bcopy(altVH, volumeHeader, kMDBSize);
1069             lf_hfs_generic_buf_release(psAltHdrBuf);
1070             pvAltHdrData = NULL;
1071         }
1072         else
1073         {
1074             /* No alternate VH, nothing more we can do. */
1075             retval = EIO;
1076             goto err_exit;
1077         }
1078     }
1079
1080     if (hfsmp->jnl)
1081     {
1082         journal_modify_block_start(hfsmp->jnl, psVolHdrBuf);
1083     }
1084
1085     /*
1086      * For embedded HFS+ volumes, update create date if it changed
1087      * (ie from a setattrlist call)
1088      */
1089     if ((vcb->hfsPlusIOPosOffset != 0) && (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate))
1090     {
1091         HFSMasterDirectoryBlock    *mdb;
1092
1093         psVolHdr2Buf = lf_hfs_generic_buf_allocate(hfsmp->hfs_devvp,
1094                                                 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
1095                                                 hfsmp->hfs_physical_block_size, GEN_BUF_PHY_BLOCK);
1096         if (psVolHdr2Buf == NULL) {
1097             retval = ENOMEM;
1098             goto err_exit;
1099         }
1100         void *pvVolHdr2Data = psVolHdr2Buf->pvData;
1101
1102         retval = lf_hfs_generic_buf_read(psVolHdr2Buf);
1103
1104         if (retval)
1105         {
1106             lf_hfs_generic_buf_release(psVolHdr2Buf);
1107             LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d reading alternate VH blk (vol=%s)\n", retval, vcb->vcbVN);
1108             goto err_exit;
1109         }
1110
1111         mdb = (HFSMasterDirectoryBlock *)(pvVolHdr2Data + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
1112
1113         if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
1114         {
1115             if (hfsmp->jnl)
1116             {
1117                 journal_modify_block_start(hfsmp->jnl, psVolHdr2Buf);
1118             }
1119             mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);    /* pick up the new create date */
1120             if (hfsmp->jnl)
1121             {
1122                 journal_modify_block_end(hfsmp->jnl, psVolHdr2Buf, NULL, NULL);
1123             }
1124             else
1125             {
1126                 retval = raw_readwrite_write_mount( hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, pvVolHdr2Data, hfsmp->hfs_physical_block_size, NULL, NULL);
1127
1128                 lf_hfs_generic_buf_release(psVolHdr2Buf);
1129                 pvVolHdr2Data = NULL;
1130                 if (retval)
1131                 {
1132                     LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d writing VH blk (vol=%s)\n", retval, vcb->vcbVN);
1133                     goto err_exit;
1134                 }
1135             }
1136         }
1137         else
1138         {
1139             lf_hfs_generic_buf_release(psVolHdr2Buf);                        /* just release it */
1140             pvVolHdr2Data = NULL;
1141         }
1142     }
1143
1144     hfs_lock_mount (hfsmp);
1145
1146     /* Note: only update the lower 16 bits worth of attributes */
1147     volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
1148     volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
1149     if (hfsmp->jnl)
1150     {
1151         volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
1152     }
1153     else
1154     {
1155         volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
1156     }
1157     volumeHeader->createDate      = SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
1158     volumeHeader->modifyDate      = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
1159     volumeHeader->backupDate      = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
1160     volumeHeader->fileCount       = SWAP_BE32 (vcb->vcbFilCnt);
1161     volumeHeader->folderCount     = SWAP_BE32 (vcb->vcbDirCnt);
1162     volumeHeader->totalBlocks     = SWAP_BE32 (vcb->totalBlocks);
1163     volumeHeader->freeBlocks      = SWAP_BE32 (vcb->freeBlocks + vcb->reclaimBlocks);
1164     volumeHeader->nextAllocation  = SWAP_BE32 (vcb->nextAllocation);
1165     volumeHeader->rsrcClumpSize   = SWAP_BE32 (vcb->vcbClpSiz);
1166     volumeHeader->dataClumpSize   = SWAP_BE32 (vcb->vcbClpSiz);
1167     volumeHeader->nextCatalogID   = SWAP_BE32 (vcb->vcbNxtCNID);
1168     volumeHeader->writeCount      = SWAP_BE32 (vcb->vcbWrCnt);
1169     volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
1170
1171     if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0)
1172     {
1173         bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
1174         critical = true;
1175     }
1176
1177     if (!altflush && !ISSET(options, HFS_FVH_FLUSH_IF_DIRTY))
1178     {
1179         goto done;
1180     }
1181
1182     /* Sync Extents over-flow file meta data */
1183     struct filefork * fp = VTOF(vcb->extentsRefNum);
1184     if (FTOC(fp)->c_flag & C_MODIFIED)
1185     {
1186         for (int iExtentCounter = 0; iExtentCounter < kHFSPlusExtentDensity; iExtentCounter++)
1187         {
1188             volumeHeader->extentsFile.extents[iExtentCounter].startBlock    = SWAP_BE32 (fp->ff_extents[iExtentCounter].startBlock);
1189             volumeHeader->extentsFile.extents[iExtentCounter].blockCount    = SWAP_BE32 (fp->ff_extents[iExtentCounter].blockCount);
1190         }
1191         volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
1192         volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
1193         volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
1194         FTOC(fp)->c_flag &= ~C_MODIFIED;
1195         altflush = true;
1196     }
1197
1198     /* Sync Catalog file meta data */
1199     fp = VTOF(vcb->catalogRefNum);
1200     if (FTOC(fp)->c_flag & C_MODIFIED)
1201     {
1202         for (int iExtentCounter = 0; iExtentCounter < kHFSPlusExtentDensity; iExtentCounter++)
1203         {
1204             volumeHeader->catalogFile.extents[iExtentCounter].startBlock    = SWAP_BE32 (fp->ff_extents[iExtentCounter].startBlock);
1205             volumeHeader->catalogFile.extents[iExtentCounter].blockCount    = SWAP_BE32 (fp->ff_extents[iExtentCounter].blockCount);
1206         }
1207         volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
1208         volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
1209         volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
1210         FTOC(fp)->c_flag &= ~C_MODIFIED;
1211         altflush = true;
1212     }
1213
1214     /* Sync Allocation file meta data */
1215     fp = VTOF(vcb->allocationsRefNum);
1216     if (FTOC(fp)->c_flag & C_MODIFIED)
1217     {
1218         for (int iExtentCounter = 0; iExtentCounter < kHFSPlusExtentDensity; iExtentCounter++)
1219         {
1220             volumeHeader->allocationFile.extents[iExtentCounter].startBlock = SWAP_BE32 (fp->ff_extents[iExtentCounter].startBlock);
1221             volumeHeader->allocationFile.extents[iExtentCounter].blockCount = SWAP_BE32 (fp->ff_extents[iExtentCounter].blockCount);
1222         }
1223         volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
1224         volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
1225         volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
1226         FTOC(fp)->c_flag &= ~C_MODIFIED;
1227         altflush = true;
1228     }
1229
1230     /* Sync Attribute file meta data */
1231     if (hfsmp->hfs_attribute_vp)
1232     {
1233         fp = VTOF(hfsmp->hfs_attribute_vp);
1234         for (int iExtentCounter = 0; iExtentCounter < kHFSPlusExtentDensity; iExtentCounter++)
1235         {
1236             volumeHeader->attributesFile.extents[iExtentCounter].startBlock = SWAP_BE32 (fp->ff_extents[iExtentCounter].startBlock);
1237             volumeHeader->attributesFile.extents[iExtentCounter].blockCount = SWAP_BE32 (fp->ff_extents[iExtentCounter].blockCount);
1238         }
1239         if (ISSET(FTOC(fp)->c_flag, C_MODIFIED))
1240         {
1241             FTOC(fp)->c_flag &= ~C_MODIFIED;
1242             altflush = true;
1243         }
1244         volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
1245         volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
1246         volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
1247     }
1248
1249     /* Sync Startup file meta data */
1250     if (hfsmp->hfs_startup_vp)
1251     {
1252         fp = VTOF(hfsmp->hfs_startup_vp);
1253         if (FTOC(fp)->c_flag & C_MODIFIED)
1254         {
1255             for (int iExtentCounter = 0; iExtentCounter < kHFSPlusExtentDensity; iExtentCounter++)
1256             {
1257                 volumeHeader->startupFile.extents[iExtentCounter].startBlock = SWAP_BE32 (fp->ff_extents[iExtentCounter].startBlock);
1258                 volumeHeader->startupFile.extents[iExtentCounter].blockCount = SWAP_BE32 (fp->ff_extents[iExtentCounter].blockCount);
1259             }
1260             volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
1261             volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
1262             volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
1263             FTOC(fp)->c_flag &= ~C_MODIFIED;
1264             altflush = true;
1265         }
1266     }
1267
1268     if (altflush)
1269         critical = true;
1270
1271 done:
1272     MarkVCBClean(hfsmp);
1273     hfs_unlock_mount (hfsmp);
1274
1275     /* If requested, flush out the alternate volume header */
1276     if (altflush) {
1277         /*
1278          * The two altVH offsets do not match --- which means that a smaller file
1279          * system exists in a larger partition.  Verify that we have the correct
1280          * alternate volume header sector as per the current parititon size.
1281          * The GPT device that we are mounted on top could have changed sizes
1282          * without us knowning.
1283          *
1284          * We're in a transaction, so it's safe to modify the partition_avh_sector
1285          * field if necessary.
1286          */
1287         if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)
1288         {
1289             uint64_t sector_count;
1290
1291             /* Get underlying device block count */
1292             retval = ioctl(hfsmp->hfs_devvp->psFSRecord->iFD, DKIOCGETBLOCKCOUNT, &sector_count);
1293             if (retval)
1294             {
1295                 LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d getting block count (%s) \n", retval, vcb->vcbVN);
1296                 retval = ENXIO;
1297                 goto err_exit;
1298             }
1299
1300             /* Partition size was changed without our knowledge */
1301             if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count)
1302             {
1303                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +  HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count);
1304                 /* Note: hfs_fs_avh_sector will remain unchanged */
1305                 LFHFS_LOG(LEVEL_DEFAULT, "hfs_flushvolumeheader: altflush: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n",
1306                         hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
1307             }
1308         }
1309
1310         /*
1311          * First see if we need to write I/O to the "secondary" AVH
1312          * located at FS Size - 1024 bytes, because this one will
1313          * always go into the journal.  We put this AVH into the journal
1314          * because even if the filesystem size has shrunk, this LBA should be
1315          * reachable after the partition-size modification has occurred.
1316          * The one where we need to be careful is partitionsize-1024, since the
1317          * partition size should hopefully shrink.
1318          *
1319          * Most of the time this block will not execute.
1320          */
1321         if ((hfsmp->hfs_fs_avh_sector) && (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector))
1322         {
1323             if (pvAltHdrData != NULL)
1324             {
1325                 panic("We shouldn't be here!");
1326                 hfs_assert(0);
1327             }
1328
1329             psAltHdrBuf = lf_hfs_generic_buf_allocate(hfsmp->hfs_devvp,
1330                                             HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys),
1331                                             hfsmp->hfs_physical_block_size, GEN_BUF_PHY_BLOCK);
1332
1333             if (psAltHdrBuf == NULL) {
1334                 retval = ENOMEM;
1335                 goto err_exit;
1336             }
1337             pvAltHdrData = psAltHdrBuf->pvData;
1338
1339             retval = lf_hfs_generic_buf_read(psAltHdrBuf);
1340             if (retval)
1341             {
1342                 LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d reading alternate VH blk (vol=%s)\n", retval, vcb->vcbVN);
1343                 goto err_exit;
1344             }
1345
1346             if (hfsmp->jnl)
1347             {
1348                 journal_modify_block_start(hfsmp->jnl, psAltHdrBuf);
1349             }
1350
1351             bcopy(volumeHeader, pvAltHdrData + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
1352
1353             if (hfsmp->jnl)
1354             {
1355                 journal_modify_block_end(hfsmp->jnl, psAltHdrBuf, NULL, NULL);
1356             }
1357             else
1358             {
1359                 retval = raw_readwrite_write_mount( hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, pvAltHdrData, hfsmp->hfs_physical_block_size, NULL, NULL);
1360                 if (retval)
1361                 {
1362                     LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d writing VH blk (vol=%s)\n", retval, vcb->vcbVN);
1363                     goto err_exit;
1364                 }
1365                 lf_hfs_generic_buf_release(psAltHdrBuf);
1366                 pvAltHdrData = NULL;
1367             }
1368         }
1369
1370         /*
1371          * Flush out alternate volume header located at 1024 bytes before
1372          * end of the partition as part of journal transaction.  In
1373          * most cases, this will be the only alternate volume header
1374          * that we need to worry about because the file system size is
1375          * same as the partition size, therefore hfs_fs_avh_sector is
1376          * same as hfs_partition_avh_sector. This is the "priority" AVH.
1377          *
1378          * However, do not always put this I/O into the journal.  If we skipped the
1379          * FS-Size AVH write above, then we will put this I/O into the journal as
1380          * that indicates the two were in sync.  However, if the FS size is
1381          * not the same as the partition size, we are tracking two.  We don't
1382          * put it in the journal in that case, since if the partition
1383          * size changes between uptimes, and we need to replay the journal,
1384          * this I/O could generate an EIO if during replay it is now trying
1385          * to access blocks beyond the device EOF.
1386          */
1387         if (hfsmp->hfs_partition_avh_sector)
1388         {
1389             if (pvAltHdrData != NULL)
1390             {
1391                 panic("We shouldn't be here!");
1392                 hfs_assert(0);
1393             }
1394
1395             psAltHdrBuf = lf_hfs_generic_buf_allocate(hfsmp->hfs_devvp,
1396                                                       HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys),
1397                                                       hfsmp->hfs_physical_block_size, GEN_BUF_PHY_BLOCK);
1398             if (psAltHdrBuf == NULL) {
1399                 retval = ENOMEM;
1400                 goto err_exit;
1401             }
1402             pvAltHdrData = psAltHdrBuf->pvData;
1403
1404             retval = lf_hfs_generic_buf_read(psAltHdrBuf);
1405
1406             if (retval)
1407             {
1408                 LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d reading alternate VH blk (vol=%s)\n", retval, vcb->vcbVN);
1409                 goto err_exit;
1410             }
1411
1412             /* only one AVH, put this I/O in the journal. */
1413             if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) {
1414                 journal_modify_block_start(hfsmp->jnl, psAltHdrBuf);
1415             }
1416
1417             bcopy(volumeHeader, pvAltHdrData + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
1418
1419             /* If journaled and we only have one AVH to track */
1420             if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) {
1421                 journal_modify_block_end (hfsmp->jnl, psAltHdrBuf, NULL, NULL);
1422             }
1423             else
1424             {
1425                 /*
1426                  * If we don't have a journal or there are two AVH's at the
1427                  * moment, then this one doesn't go in the journal.  Note that
1428                  * this one may generate I/O errors, since the partition
1429                  * can be resized behind our backs at any moment and this I/O
1430                  * may now appear to be beyond the device EOF.
1431                  */
1432                 retval = raw_readwrite_write_mount( hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, pvAltHdrData, hfsmp->hfs_physical_block_size, NULL, NULL);
1433                 if (retval)
1434                 {
1435                     LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d writing VH blk (vol=%s)\n", retval, vcb->vcbVN);
1436                     goto err_exit;
1437                 }
1438                 lf_hfs_generic_buf_release(psAltHdrBuf);
1439                 pvAltHdrData = NULL;
1440                 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
1441             }
1442         }
1443     }
1444
1445     /* Finish modifying the block for the primary VH */
1446     if (hfsmp->jnl) {
1447         journal_modify_block_end(hfsmp->jnl, psVolHdrBuf, NULL, NULL);
1448     }
1449     else
1450     {
1451         retval = raw_readwrite_write_mount( hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, pvVolHdrData, hfsmp->hfs_physical_block_size, NULL, NULL);
1452         /* When critical data changes, flush the device cache */
1453         if (critical && (retval == 0))
1454         {
1455             hfs_flush(hfsmp, HFS_FLUSH_CACHE);
1456         }
1457
1458         lf_hfs_generic_buf_release(psVolHdrBuf);
1459         pvVolHdrData = NULL;
1460         if (retval)
1461         {
1462             LFHFS_LOG(LEVEL_ERROR, "hfs_flushvolumeheader: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN);
1463             goto err_exit;
1464         }
1465     }
1466     if (!(options & HFS_FVH_SKIP_TRANSACTION)) {
1467         hfs_end_transaction(hfsmp);
1468     }
1469
1470     return (retval);
1471
1472 err_exit:
1473     if (pvVolHdrData)
1474         lf_hfs_generic_buf_release(psVolHdrBuf);
1475     if (pvVolHdr2Data)
1476         lf_hfs_generic_buf_release(psVolHdr2Buf);
1477     if (pvAltHdrData)
1478         lf_hfs_generic_buf_release(psAltHdrBuf);
1479
1480     if (!(options & HFS_FVH_SKIP_TRANSACTION)) {
1481         hfs_end_transaction(hfsmp);
1482     }
1483     return retval;
1484 }
1485
1486 /* If a runtime corruption is detected, set the volume inconsistent
1487  * bit in the volume attributes.  The volume inconsistent bit is a persistent
1488  * bit which represents that the volume is corrupt and needs repair.
1489  * The volume inconsistent bit can be set from the kernel when it detects
1490  * runtime corruption or from file system repair utilities like fsck_hfs when
1491  * a repair operation fails.  The bit should be cleared only from file system
1492  * verify/repair utility like fsck_hfs when a verify/repair succeeds.
1493  */
1494 void hfs_mark_inconsistent(struct hfsmount *hfsmp, hfs_inconsistency_reason_t reason)
1495 {
1496     hfs_lock_mount (hfsmp);
1497     if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0)
1498     {
1499         hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
1500         MarkVCBDirty(hfsmp);
1501     }
1502     if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0)
1503     {
1504         switch (reason)
1505         {
1506             case HFS_INCONSISTENCY_DETECTED:
1507                 LFHFS_LOG(LEVEL_ERROR, "hfs_mark_inconsistent: Runtime corruption detected on %s, fsck will be forced on next mount.\n",hfsmp->vcbVN);
1508                 break;
1509             case HFS_ROLLBACK_FAILED:
1510                 LFHFS_LOG(LEVEL_ERROR, "hfs_mark_inconsistent: Failed to roll back; volume `%s' might be inconsistent; fsck will be forced on next mount.\n", hfsmp->vcbVN);
1511                 break;
1512             case HFS_OP_INCOMPLETE:
1513                 LFHFS_LOG(LEVEL_ERROR, "hfs_mark_inconsistent: Failed to complete operation; volume `%s' might be inconsistent; fsck will be forced on next mount.\n",hfsmp->vcbVN);
1514                 break;
1515             case HFS_FSCK_FORCED:
1516                 LFHFS_LOG(LEVEL_ERROR, "hfs_mark_inconsistent: fsck requested for `%s'; fsck will be forced on next mount.\n",hfsmp->vcbVN);
1517                 break;
1518         }
1519     }
1520     hfs_unlock_mount (hfsmp);
1521 }
1522
1523 /*
1524  * Creates a UUID from a unique "name" in the HFS UUID Name space.
1525  * See version 3 UUID.
1526  */
1527 void
1528 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result_uuid)
1529 {
1530
1531     if (uuid_is_null(hfsmp->hfs_full_uuid)) {
1532         uuid_t result;
1533
1534         CC_MD5_CTX  md5c;
1535         uint8_t  rawUUID[8];
1536
1537         ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
1538         ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
1539
1540         CC_MD5_Init( &md5c );
1541         CC_MD5_Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
1542         CC_MD5_Update( &md5c, rawUUID, sizeof (rawUUID) );
1543         CC_MD5_Final( result, &md5c );
1544
1545         result[6] = 0x30 | ( result[6] & 0x0F );
1546         result[8] = 0x80 | ( result[8] & 0x3F );
1547
1548         uuid_copy(hfsmp->hfs_full_uuid, result);
1549     }
1550     uuid_copy (result_uuid, hfsmp->hfs_full_uuid);
1551
1552 }
1553
1554 /*
1555  * Call into the allocator code and perform a full scan of the bitmap file.
1556  *
1557  * This allows us to TRIM unallocated ranges if needed, and also to build up
1558  * an in-memory summary table of the state of the allocated blocks.
1559  */
1560 void hfs_scan_blocks (struct hfsmount *hfsmp)
1561 {
1562     /*
1563      * Take the allocation file lock.  Journal transactions will block until
1564      * we're done here.
1565      */
1566     int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1567
1568     /*
1569      * We serialize here with the HFS mount lock as we're mounting.
1570      *
1571      * The mount can only proceed once this thread has acquired the bitmap
1572      * lock, since we absolutely do not want someone else racing in and
1573      * getting the bitmap lock, doing a read/write of the bitmap file,
1574      * then us getting the bitmap lock.
1575      *
1576      * To prevent this, the mount thread takes the HFS mount mutex, starts us
1577      * up, then immediately msleeps on the scan_var variable in the mount
1578      * point as a condition variable.  This serialization is safe since
1579      * if we race in and try to proceed while they're still holding the lock,
1580      * we'll block trying to acquire the global lock.  Since the mount thread
1581      * acquires the HFS mutex before starting this function in a new thread,
1582      * any lock acquisition on our part must be linearizably AFTER the mount thread's.
1583      *
1584      * Note that the HFS mount mutex is always taken last, and always for only
1585      * a short time.  In this case, we just take it long enough to mark the
1586      * scan-in-flight bit.
1587      */
1588     (void) hfs_lock_mount (hfsmp);
1589     hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_INFLIGHT;
1590     hfs_unlock_mount (hfsmp);
1591
1592     /* Initialize the summary table */
1593     if (hfs_init_summary (hfsmp))
1594     {
1595         LFHFS_LOG(LEVEL_DEBUG, "hfs_scan_blocks: could not initialize summary table for %s\n", hfsmp->vcbVN);
1596     }
1597
1598     /*
1599      * ScanUnmapBlocks assumes that the bitmap lock is held when you
1600      * call the function. We don't care if there were any errors issuing unmaps.
1601      *
1602      * It will also attempt to build up the summary table for subsequent
1603      * allocator use, as configured.
1604      */
1605     (void) ScanUnmapBlocks(hfsmp);
1606
1607     (void) hfs_lock_mount (hfsmp);
1608     hfsmp->scan_var &= ~HFS_ALLOCATOR_SCAN_INFLIGHT;
1609     hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
1610     hfs_unlock_mount (hfsmp);
1611
1612     hfs_systemfile_unlock(hfsmp, flags);
1613 }
1614
1615 /*
1616  * Look up an HFS object by ID.
1617  *
1618  * The object is returned with an iocount reference and the cnode locked.
1619  *
1620  * If the object is a file then it will represent the data fork.
1621  */
1622 int
1623 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
1624 {
1625     struct vnode *vp = NULL;
1626     struct cat_desc cndesc;
1627     struct cat_attr cnattr;
1628     struct cat_fork cnfork;
1629
1630     u_int32_t linkref = 0;
1631
1632     int error;
1633
1634     /* Check for cnids that should't be exported. */
1635     if ((cnid < kHFSFirstUserCatalogNodeID) &&
1636         (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
1637         return (ENOENT);
1638     }
1639     /* Don't export our private directories. */
1640     if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
1641         cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
1642         return (ENOENT);
1643     }
1644     /*
1645      * Check the hash first
1646      */
1647     vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
1648     if (vp) {
1649         *vpp = vp;
1650         return(0);
1651     }
1652
1653     bzero(&cndesc, sizeof(cndesc));
1654     bzero(&cnattr, sizeof(cnattr));
1655     bzero(&cnfork, sizeof(cnfork));
1656
1657     /*
1658      * Not in hash, lookup in catalog
1659      */
1660     if (cnid == kHFSRootParentID) {
1661         static char hfs_rootname[] = "/";
1662
1663         cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
1664         cndesc.cd_namelen = 1;
1665         cndesc.cd_parentcnid = kHFSRootParentID;
1666         cndesc.cd_cnid = kHFSRootFolderID;
1667         cndesc.cd_flags = CD_ISDIR;
1668
1669         cnattr.ca_fileid = kHFSRootFolderID;
1670         cnattr.ca_linkcount = 1;
1671         cnattr.ca_entries = 1;
1672         cnattr.ca_dircount = 1;
1673         cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
1674     } else {
1675         int lockflags;
1676         cnid_t pid;
1677         const char *nameptr;
1678
1679         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1680         error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork);
1681         hfs_systemfile_unlock(hfsmp, lockflags);
1682
1683         if (error) {
1684             *vpp = NULL;
1685             return (error);
1686         }
1687
1688         /*
1689          * Check for a raw hardlink inode and save its linkref.
1690          */
1691         pid = cndesc.cd_parentcnid;
1692         nameptr = (const char *)cndesc.cd_nameptr;
1693         if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
1694             cndesc.cd_namelen > HFS_INODE_PREFIX_LEN &&
1695             (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
1696             linkref = (uint32_t) strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
1697
1698         } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
1699                    cndesc.cd_namelen > HFS_DIRINODE_PREFIX_LEN &&
1700                    (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
1701             linkref = (uint32_t) strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
1702
1703         } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
1704                    cndesc.cd_namelen > HFS_DELETE_PREFIX_LEN &&
1705                    (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
1706             *vpp = NULL;
1707             cat_releasedesc(&cndesc);
1708             return (ENOENT);  /* open unlinked file */
1709         }
1710     }
1711
1712     /*
1713      * Finish initializing cnode descriptor for hardlinks.
1714      *
1715      * We need a valid name and parent for reverse lookups.
1716      */
1717     if (linkref) {
1718         cnid_t lastid;
1719         struct cat_desc linkdesc;
1720         int linkerr = 0;
1721
1722         cnattr.ca_linkref = linkref;
1723         bzero (&linkdesc, sizeof (linkdesc));
1724
1725         /*
1726          * If the caller supplied the raw inode value, then we don't know exactly
1727          * which hardlink they wanted. It's likely that they acquired the raw inode
1728          * value BEFORE the item became a hardlink, in which case, they probably
1729          * want the oldest link.  So request the oldest link from the catalog.
1730          *
1731          * Unfortunately, this requires that we iterate through all N hardlinks. On the plus
1732          * side, since we know that we want the last linkID, we can also have this one
1733          * call give us back the name of the last ID, since it's going to have it in-hand...
1734          */
1735         linkerr = hfs_lookup_lastlink (hfsmp, linkref, &lastid, &linkdesc);
1736         if ((linkerr == 0) && (lastid != 0)) {
1737             /*
1738              * Release any lingering buffers attached to our local descriptor.
1739              * Then copy the name and other business into the cndesc
1740              */
1741             cat_releasedesc (&cndesc);
1742             bcopy (&linkdesc, &cndesc, sizeof(linkdesc));
1743         }
1744         /* If it failed, the linkref code will just use whatever it had in-hand below. */
1745     }
1746
1747     if (linkref) {
1748         int newvnode_flags = 0;
1749         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cnfork, &vp, &newvnode_flags);
1750         if (error == 0) {
1751             VTOC(vp)->c_flag |= C_HARDLINK;
1752
1753             //TBD - this set is for vfs -> since we have the C_HARDLINK
1754             //      currently disable this set.
1755             //vnode_setmultipath(vp);
1756         }
1757     }
1758     else
1759     {
1760         int newvnode_flags = 0;
1761
1762         void *buf = hfs_malloc(MAXPATHLEN);
1763
1764         /* Supply hfs_getnewvnode with a component name. */
1765         struct componentname cn = {
1766             .cn_nameiop = LOOKUP,
1767             .cn_flags    = ISLASTCN,
1768             .cn_pnlen    = MAXPATHLEN,
1769             .cn_namelen = cndesc.cd_namelen,
1770             .cn_pnbuf    = buf,
1771             .cn_nameptr = buf
1772         };
1773
1774         bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
1775         error = hfs_getnewvnode(hfsmp, NULL, &cn, &cndesc, 0, &cnattr, &cnfork, &vp, &newvnode_flags);
1776
1777         if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
1778             hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
1779         }
1780
1781         hfs_free(buf);
1782     }
1783     cat_releasedesc(&cndesc);
1784
1785     *vpp = vp;
1786     if (vp && skiplock) {
1787         hfs_unlock(VTOC(vp));
1788     }
1789     return (error);
1790 }
1791
1792 int
1793 hfs_GetInfoByID(struct hfsmount *hfsmp, cnid_t cnid, UVFSFileAttributes *file_attrs, char pcName[MAX_UTF8_NAME_LENGTH])
1794 {
1795     struct vnode *psVnode = NULL;
1796     int error = hfs_vget(hfsmp, cnid, &psVnode, 0, 0);
1797     if (error || psVnode == NULL) {
1798         if (psVnode != NULL) hfs_unlock(VTOC(psVnode));
1799         hfs_vnop_reclaim(psVnode);
1800         return EFAULT;
1801     } else {
1802         vnode_GetAttrInternal (psVnode, file_attrs);
1803         hfs_unlock(VTOC(psVnode));
1804     }
1805
1806     if (cnid == kHFSRootFolderID)
1807         pcName[0] = 0;
1808     else {
1809         //Make sure we actually have the name in the vnode
1810         if (psVnode->sFSParams.vnfs_cnp && psVnode->sFSParams.vnfs_cnp->cn_nameptr)
1811             strlcpy(pcName, (char*) psVnode->sFSParams.vnfs_cnp->cn_nameptr, MAX_UTF8_NAME_LENGTH);
1812         else
1813             return EINVAL;
1814     }
1815
1816     error = hfs_vnop_reclaim(psVnode);
1817     return (error);
1818 }
1819
1820 /*
1821  * Return the root of a filesystem.
1822  */
1823 int hfs_vfs_root(struct mount *mp, struct vnode **vpp)
1824 {
1825     return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
1826 }
1827
1828 /*
1829  * unmount system call
1830  */
1831 int hfs_unmount(struct mount *mp)
1832 {
1833     struct hfsmount *hfsmp = VFSTOHFS(mp);
1834     int retval = E_NONE;
1835
1836     if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)
1837     {
1838         if (hfsmp->hfs_summary_table)
1839         {
1840             int err = 0;
1841             /*
1842              * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
1843              */
1844             if (hfsmp->hfs_allocation_vp)
1845             {
1846                 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1847             }
1848             hfs_free(hfsmp->hfs_summary_table);
1849             hfsmp->hfs_summary_table = NULL;
1850             hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
1851
1852             if (err == 0 && hfsmp->hfs_allocation_vp)
1853             {
1854                 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
1855             }
1856         }
1857     }
1858
1859     /*
1860      *    Invalidate our caches and release metadata vnodes
1861      */
1862     if (hfsmp->jnl) {
1863         journal_release(hfsmp->jnl);
1864         hfsmp->jnl = NULL;
1865     }
1866
1867     hfsUnmount(hfsmp);
1868     int iFD = hfsmp->hfs_devvp->psFSRecord->iFD;
1869     // Remove Buffer cache entries realted to the mount
1870     lf_hfs_generic_buf_cache_clear_by_iFD(iFD);
1871
1872     vnode_rele(hfsmp->hfs_devvp);
1873
1874     hfs_locks_destroy(hfsmp);
1875     hfs_delete_chash(hfsmp);
1876     hfs_idhash_destroy(hfsmp);
1877
1878     hfs_assert(TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS]) && TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS]));
1879     hfs_assert(!hfsmp->lockedBlocks);
1880
1881     hfs_free(hfsmp);
1882
1883     return (retval);
1884 }
1885 /* Update volume encoding bitmap (HFS Plus only)
1886  *
1887  * Mark a legacy text encoding as in-use (as needed)
1888  * in the volume header of this HFS+ filesystem.
1889  */
1890 void
1891 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
1892 {
1893 #define  kIndexMacUkrainian    48  /* MacUkrainian encoding is 152 */
1894 #define  kIndexMacFarsi        49  /* MacFarsi encoding is 140 */
1895
1896     u_int32_t    index;
1897
1898     switch (encoding)
1899     {
1900         case kTextEncodingMacUkrainian:
1901             index = kIndexMacUkrainian;
1902             break;
1903         case kTextEncodingMacFarsi:
1904             index = kIndexMacFarsi;
1905             break;
1906         default:
1907             index = encoding;
1908             break;
1909     }
1910
1911     /* Only mark the encoding as in-use if it wasn't already set */
1912     if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
1913         hfs_lock_mount (hfsmp);
1914         hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
1915         MarkVCBDirty(hfsmp);
1916         hfs_unlock_mount(hfsmp);
1917     }
1918 }
1919
1920 /*
1921  * Update volume stats
1922  *
1923  * On journal volumes this will cause a volume header flush
1924  */
1925 int
1926 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
1927 {
1928     struct timeval tv;
1929     microtime(&tv);
1930     hfs_lock_mount (hfsmp);
1931
1932     MarkVCBDirty(hfsmp);
1933     hfsmp->hfs_mtime = tv.tv_sec;
1934
1935     switch (op) {
1936         case VOL_UPDATE:
1937             break;
1938         case VOL_MKDIR:
1939             if (hfsmp->hfs_dircount != 0xFFFFFFFF)
1940                 ++hfsmp->hfs_dircount;
1941             if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
1942                 ++hfsmp->vcbNmRtDirs;
1943             break;
1944         case VOL_RMDIR:
1945             if (hfsmp->hfs_dircount != 0)
1946                 --hfsmp->hfs_dircount;
1947             if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
1948                 --hfsmp->vcbNmRtDirs;
1949             break;
1950         case VOL_MKFILE:
1951             if (hfsmp->hfs_filecount != 0xFFFFFFFF)
1952                 ++hfsmp->hfs_filecount;
1953             if (inroot && hfsmp->vcbNmFls != 0xFFFF)
1954                 ++hfsmp->vcbNmFls;
1955             break;
1956         case VOL_RMFILE:
1957             if (hfsmp->hfs_filecount != 0)
1958                 --hfsmp->hfs_filecount;
1959             if (inroot && hfsmp->vcbNmFls != 0xFFFF)
1960                 --hfsmp->vcbNmFls;
1961             break;
1962     }
1963
1964     hfs_unlock_mount (hfsmp);
1965     hfs_flushvolumeheader(hfsmp, 0);
1966
1967     return (0);
1968 }
1969