bsd/hfs/hfs_vfsops.c

   1 /*
   2  * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1991, 1993, 1994
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      hfs_vfsops.c
  66  *  derived from        @(#)ufs_vfsops.c        8.8 (Berkeley) 5/20/95
  67  *
  68  *      (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
  69  *
  70  *      hfs_vfsops.c -- VFS layer for loadable HFS file system.
  71  *
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/kauth.h>
  76
  77 #include <sys/ubc.h>
  78 #include <sys/ubc_internal.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/mount_internal.h>
  81 #include <sys/sysctl.h>
  82 #include <sys/malloc.h>
  83 #include <sys/stat.h>
  84 #include <sys/quota.h>
  85 #include <sys/disk.h>
  86 #include <sys/paths.h>
  87 #include <sys/utfconv.h>
  88 #include <sys/kdebug.h>
  89 #include <sys/fslog.h>
  90 #include <sys/ubc.h>
  91 #include <sys/buf_internal.h>
  92
  93 /* for parsing boot-args */
  94 #include <pexpert/pexpert.h>
  95
  96
  97 #include <kern/locks.h>
  98
  99 #include <vfs/vfs_journal.h>
 100
 101 #include <miscfs/specfs/specdev.h>
 102 #include <hfs/hfs_mount.h>
 103
 104 #include <libkern/crypto/md5.h>
 105 #include <uuid/uuid.h>
 106
 107 #include "hfs.h"
 108 #include "hfs_catalog.h"
 109 #include "hfs_cnode.h"
 110 #include "hfs_dbg.h"
 111 #include "hfs_endian.h"
 112 #include "hfs_hotfiles.h"
 113 #include "hfs_quota.h"
 114 #include "hfs_btreeio.h"
 115 #include "hfs_kdebug.h"
 116
 117 #include "hfscommon/headers/FileMgrInternal.h"
 118 #include "hfscommon/headers/BTreesInternal.h"
 119
 120 #if CONFIG_PROTECT
 121 #include <sys/cprotect.h>
 122 #endif
 123
 124 #define HFS_MOUNT_DEBUG 1
 125
 126 #if     HFS_DIAGNOSTIC
 127 int hfs_dbg_all = 0;
 128 int hfs_dbg_err = 0;
 129 #endif
 130
 131 /* Enable/disable debugging code for live volume resizing */
 132 int hfs_resize_debug = 0;
 133
 134 lck_grp_attr_t *  hfs_group_attr;
 135 lck_attr_t *  hfs_lock_attr;
 136 lck_grp_t *  hfs_mutex_group;
 137 lck_grp_t *  hfs_rwlock_group;
 138 lck_grp_t *  hfs_spinlock_group;
 139
 140 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 141
 142 #if CONFIG_HFS_STD
 143 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
 144 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
 145 #endif
 146
 147 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
 148 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 149
 150 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
 151 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
 152 static int hfs_flushfiles(struct mount *, int, struct proc *);
 153 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
 154 static int hfs_init(struct vfsconf *vfsp);
 155 static void hfs_locks_destroy(struct hfsmount *hfsmp);
 156 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
 157 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
 158 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
 159 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 160 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
 161 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 162 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
 163 static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
 164
 165 void hfs_initialize_allocator (struct hfsmount *hfsmp);
 166 int hfs_teardown_allocator (struct hfsmount *hfsmp);
 167
 168 int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
 169 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
 170 int hfs_reload(struct mount *mp);
 171 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
 172 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
 173 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 174                       user_addr_t newp, size_t newlen, vfs_context_t context);
 175 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 176
 177 /*
 178  * Called by vfs_mountroot when mounting HFS Plus as root.
 179  */
 180
 181 int
 182 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 183 {
 184         struct hfsmount *hfsmp;
 185         ExtendedVCB *vcb;
 186         struct vfsstatfs *vfsp;
 187         int error;
 188
 189         if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
 190                 if (HFS_MOUNT_DEBUG) {
 191                         printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
 192                                         error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
 193                 }
 194                 return (error);
 195         }
 196
 197         /* Init hfsmp */
 198         hfsmp = VFSTOHFS(mp);
 199
 200         hfsmp->hfs_uid = UNKNOWNUID;
 201         hfsmp->hfs_gid = UNKNOWNGID;
 202         hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 203         hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 204
 205         /* Establish the free block reserve. */
 206         vcb = HFSTOVCB(hfsmp);
 207         vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
 208         vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
 209
 210         vfsp = vfs_statfs(mp);
 211         (void)hfs_statfs(mp, vfsp, NULL);
 212
 213         return (0);
 214 }
 215
 216
 217 /*
 218  * VFS Operations.
 219  *
 220  * mount system call
 221  */
 222
 223 int
 224 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
 225 {
 226         struct proc *p = vfs_context_proc(context);
 227         struct hfsmount *hfsmp = NULL;
 228         struct hfs_mount_args args;
 229         int retval = E_NONE;
 230         u_int32_t cmdflags;
 231
 232         if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
 233                 if (HFS_MOUNT_DEBUG) {
 234                         printf("hfs_mount: copyin returned %d for fs\n", retval);
 235                 }
 236                 return (retval);
 237         }
 238         cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
 239         if (cmdflags & MNT_UPDATE) {
 240                 hfsmp = VFSTOHFS(mp);
 241
 242                 /* Reload incore data after an fsck. */
 243                 if (cmdflags & MNT_RELOAD) {
 244                         if (vfs_isrdonly(mp)) {
 245                                 int error = hfs_reload(mp);
 246                                 if (error && HFS_MOUNT_DEBUG) {
 247                                         printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
 248                                 }
 249                                 return error;
 250                         }
 251                         else {
 252                                 if (HFS_MOUNT_DEBUG) {
 253                                         printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
 254                                 }
 255                                 return (EINVAL);
 256                         }
 257                 }
 258
 259                 /* Change to a read-only file system. */
 260                 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 261                     vfs_isrdonly(mp)) {
 262                         int flags;
 263
 264                         /* Set flag to indicate that a downgrade to read-only
 265                          * is in progress and therefore block any further
 266                          * modifications to the file system.
 267                          */
 268                         hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 269                         hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
 270                         hfsmp->hfs_downgrading_proc = current_thread();
 271                         hfs_unlock_global (hfsmp);
 272
 273                         /* use VFS_SYNC to push out System (btree) files */
 274                         retval = VFS_SYNC(mp, MNT_WAIT, context);
 275                         if (retval && ((cmdflags & MNT_FORCE) == 0)) {
 276                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 277                                 hfsmp->hfs_downgrading_proc = NULL;
 278                                 if (HFS_MOUNT_DEBUG) {
 279                                         printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
 280                                 }
 281                                 goto out;
 282                         }
 283
 284                         flags = WRITECLOSE;
 285                         if (cmdflags & MNT_FORCE)
 286                                 flags |= FORCECLOSE;
 287
 288                         if ((retval = hfs_flushfiles(mp, flags, p))) {
 289                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 290                                 hfsmp->hfs_downgrading_proc = NULL;
 291                                 if (HFS_MOUNT_DEBUG) {
 292                                         printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
 293                                 }
 294                                 goto out;
 295                         }
 296
 297                         /* mark the volume cleanly unmounted */
 298                         hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
 299                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 300                         hfsmp->hfs_flags |= HFS_READ_ONLY;
 301
 302                         /*
 303                          * Close down the journal.
 304                          *
 305                          * NOTE: It is critically important to close down the journal
 306                          * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
 307                          * In a journaled environment it is expected that the journal be
 308                          * the only actor permitted to issue I/O for metadata blocks in HFS.
 309                          * If we were to call VNOP_FSYNC prior to closing down the journal,
 310                          * we would inadvertantly issue (and wait for) the I/O we just
 311                          * initiated above as part of the flushvolumeheader call.
 312                          *
 313                          * To avoid this, we follow the same order of operations as in
 314                          * unmount and issue the journal_close prior to calling VNOP_FSYNC.
 315                          */
 316
 317                         if (hfsmp->jnl) {
 318                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 319
 320                             journal_close(hfsmp->jnl);
 321                             hfsmp->jnl = NULL;
 322
 323                             // Note: we explicitly don't want to shutdown
 324                             //       access to the jvp because we may need
 325                             //       it later if we go back to being read-write.
 326
 327                                 hfs_unlock_global (hfsmp);
 328                         }
 329
 330
 331                         /*
 332                          * Write out any pending I/O still outstanding against the device node
 333                          * now that the journal has been closed.
 334                          */
 335                         if (retval == 0) {
 336                                 vnode_get(hfsmp->hfs_devvp);
 337                                 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
 338                                 vnode_put(hfsmp->hfs_devvp);
 339                         }
 340
 341                         if (retval) {
 342                                 if (HFS_MOUNT_DEBUG) {
 343                                         printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
 344                                 }
 345                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 346                                 hfsmp->hfs_downgrading_proc = NULL;
 347                                 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 348                                 goto out;
 349                         }
 350
 351                         if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
 352                                 if (hfsmp->hfs_summary_table) {
 353                                         int err = 0;
 354                                         /*
 355                                          * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
 356                                          */
 357                                         if (hfsmp->hfs_allocation_vp) {
 358                                                 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 359                                         }
 360                                         FREE (hfsmp->hfs_summary_table, M_TEMP);
 361                                         hfsmp->hfs_summary_table = NULL;
 362                                         hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
 363                                         if (err == 0 && hfsmp->hfs_allocation_vp){
 364                                                 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
 365                                         }
 366                                 }
 367                         }
 368
 369                         hfsmp->hfs_downgrading_proc = NULL;
 370                 }
 371
 372                 /* Change to a writable file system. */
 373                 if (vfs_iswriteupgrade(mp)) {
 374                         /*
 375                          * On inconsistent disks, do not allow read-write mount
 376                          * unless it is the boot volume being mounted.
 377                          */
 378                         if (!(vfs_flags(mp) & MNT_ROOTFS) &&
 379                                         (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
 380                                 if (HFS_MOUNT_DEBUG) {
 381                                         printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n",  (hfsmp->vcbVN));
 382                                 }
 383                                 retval = EINVAL;
 384                                 goto out;
 385                         }
 386
 387                         // If the journal was shut-down previously because we were
 388                         // asked to be read-only, let's start it back up again now
 389
 390                         if (   (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
 391                             && hfsmp->jnl == NULL
 392                             && hfsmp->jvp != NULL) {
 393                             int jflags;
 394
 395                             if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
 396                                         jflags = JOURNAL_RESET;
 397                                 } else {
 398                                         jflags = 0;
 399                                 }
 400
 401                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 402
 403                                 /* We provide the mount point twice here: The first is used as
 404                                  * an opaque argument to be passed back when hfs_sync_metadata
 405                                  * is called.  The second is provided to the throttling code to
 406                                  * indicate which mount's device should be used when accounting
 407                                  * for metadata writes.
 408                                  */
 409                                 hfsmp->jnl = journal_open(hfsmp->jvp,
 410                                                 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
 411                                                 hfsmp->jnl_size,
 412                                                 hfsmp->hfs_devvp,
 413                                                 hfsmp->hfs_logical_block_size,
 414                                                 jflags,
 415                                                 0,
 416                                                 hfs_sync_metadata, hfsmp->hfs_mp,
 417                                                 hfsmp->hfs_mp);
 418
 419                                 /*
 420                                  * Set up the trim callback function so that we can add
 421                                  * recently freed extents to the free extent cache once
 422                                  * the transaction that freed them is written to the
 423                                  * journal on disk.
 424                                  */
 425                                 if (hfsmp->jnl)
 426                                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 427
 428                                 hfs_unlock_global (hfsmp);
 429
 430                                 if (hfsmp->jnl == NULL) {
 431                                         if (HFS_MOUNT_DEBUG) {
 432                                                 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
 433                                         }
 434                                         retval = EINVAL;
 435                                         goto out;
 436                                 } else {
 437                                         hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
 438                                 }
 439
 440                         }
 441
 442                         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 443                         retval = hfs_erase_unused_nodes(hfsmp);
 444                         if (retval != E_NONE) {
 445                                 if (HFS_MOUNT_DEBUG) {
 446                                         printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
 447                                 }
 448                                 goto out;
 449                         }
 450
 451                         /* If this mount point was downgraded from read-write
 452                          * to read-only, clear that information as we are now
 453                          * moving back to read-write.
 454                          */
 455                         hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 456                         hfsmp->hfs_downgrading_proc = NULL;
 457
 458                         /* mark the volume dirty (clear clean unmount bit) */
 459                         hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 460
 461                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 462                         if (retval != E_NONE) {
 463                                 if (HFS_MOUNT_DEBUG) {
 464                                         printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
 465                                 }
 466                                 goto out;
 467                         }
 468
 469                         /* Only clear HFS_READ_ONLY after a successful write */
 470                         hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 471
 472
 473                         if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
 474                                 /* Setup private/hidden directories for hardlinks. */
 475                                 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 476                                 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 477
 478                                 hfs_remove_orphans(hfsmp);
 479
 480                                 /*
 481                                  * Allow hot file clustering if conditions allow.
 482                                  */
 483                                 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
 484                                            ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0))    {
 485                                         (void) hfs_recording_init(hfsmp);
 486                                 }
 487                                 /* Force ACLs on HFS+ file systems. */
 488                                 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
 489                                         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 490                                 }
 491                         }
 492                 }
 493
 494                 /* Update file system parameters. */
 495                 retval = hfs_changefs(mp, &args);
 496                 if (retval &&  HFS_MOUNT_DEBUG) {
 497                         printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
 498                 }
 499
 500         } else /* not an update request */ {
 501
 502                 /* Set the mount flag to indicate that we support volfs  */
 503                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
 504
 505                 retval = hfs_mountfs(devvp, mp, &args, 0, context);
 506                 if (retval) {
 507                         const char *name = vnode_getname(devvp);
 508                         printf("hfs_mount: hfs_mountfs returned error=%d for device %s\n", retval, (name ? name : "unknown-dev"));
 509                         if (name) {
 510                                 vnode_putname(name);
 511                         }
 512                         goto out;
 513                 }
 514
 515                 /* After hfs_mountfs succeeds, we should have valid hfsmp */
 516                 hfsmp = VFSTOHFS(mp);
 517
 518                 /*
 519                  * Check to see if the file system exists on CoreStorage.
 520                  *
 521                  * This must be done after examining the root folder's CP EA since
 522                  * hfs_vfs_root will create a vnode (which must not occur until after
 523                  * we've established the CP level of the FS).
 524                  */
 525                 if (retval == 0) {
 526                         errno_t err;
 527                         vnode_t root_vnode;
 528                         err = hfs_vfs_root(mp, &root_vnode, context);
 529                         if (err == 0) {
 530                                 if (VNOP_IOCTL(devvp, _DKIOCCSSETFSVNODE,
 531                                                         (caddr_t)&root_vnode, 0, context) == 0) {
 532                                         err = vnode_ref(root_vnode);
 533                                         if (err == 0) {
 534                                                 hfsmp->hfs_flags |= HFS_CS;
 535                                         }
 536                                 }
 537
 538                                 err = vnode_put(root_vnode);
 539                                 if (err) {
 540                                         printf("hfs: could not release io count on root vnode with error: %d\n",
 541                                                         err);
 542                                 }
 543                         } else {
 544                                 printf("hfs: could not get root vnode with error: %d\n",
 545                                                 err);
 546                         }
 547                 }
 548         }
 549
 550 out:
 551         if (retval == 0) {
 552                 (void)hfs_statfs(mp, vfs_statfs(mp), context);
 553         }
 554         return (retval);
 555 }
 556
 557
 558 struct hfs_changefs_cargs {
 559         struct hfsmount *hfsmp;
 560         int             namefix;
 561         int             permfix;
 562         int             permswitch;
 563 };
 564
 565 static int
 566 hfs_changefs_callback(struct vnode *vp, void *cargs)
 567 {
 568         ExtendedVCB *vcb;
 569         struct cnode *cp;
 570         struct cat_desc cndesc;
 571         struct cat_attr cnattr;
 572         struct hfs_changefs_cargs *args;
 573         int lockflags;
 574         int error;
 575
 576         args = (struct hfs_changefs_cargs *)cargs;
 577
 578         cp = VTOC(vp);
 579         vcb = HFSTOVCB(args->hfsmp);
 580
 581         lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 582         error = cat_lookup(args->hfsmp, &cp->c_desc, 0, 0, &cndesc, &cnattr, NULL, NULL);
 583         hfs_systemfile_unlock(args->hfsmp, lockflags);
 584         if (error) {
 585                 /*
 586                  * If we couldn't find this guy skip to the next one
 587                  */
 588                 if (args->namefix)
 589                         cache_purge(vp);
 590
 591                 return (VNODE_RETURNED);
 592         }
 593         /*
 594          * Get the real uid/gid and perm mask from disk.
 595          */
 596         if (args->permswitch || args->permfix) {
 597                 cp->c_uid = cnattr.ca_uid;
 598                 cp->c_gid = cnattr.ca_gid;
 599                 cp->c_mode = cnattr.ca_mode;
 600         }
 601         /*
 602          * If we're switching name converters then...
 603          *   Remove the existing entry from the namei cache.
 604          *   Update name to one based on new encoder.
 605          */
 606         if (args->namefix) {
 607                 cache_purge(vp);
 608                 replace_desc(cp, &cndesc);
 609
 610                 if (cndesc.cd_cnid == kHFSRootFolderID) {
 611                         strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
 612                         cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
 613                 }
 614         } else {
 615                 cat_releasedesc(&cndesc);
 616         }
 617         return (VNODE_RETURNED);
 618 }
 619
 620 /* Change fs mount parameters */
 621 static int
 622 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
 623 {
 624         int retval = 0;
 625         int namefix, permfix, permswitch;
 626         struct hfsmount *hfsmp;
 627         ExtendedVCB *vcb;
 628         struct hfs_changefs_cargs cargs;
 629         u_int32_t mount_flags;
 630
 631 #if CONFIG_HFS_STD
 632         u_int32_t old_encoding = 0;
 633         hfs_to_unicode_func_t   get_unicode_func;
 634         unicode_to_hfs_func_t   get_hfsname_func;
 635 #endif
 636
 637         hfsmp = VFSTOHFS(mp);
 638         vcb = HFSTOVCB(hfsmp);
 639         mount_flags = (unsigned int)vfs_flags(mp);
 640
 641         hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
 642
 643         permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
 644                        ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
 645                       (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
 646                        (mount_flags & MNT_UNKNOWNPERMISSIONS)));
 647
 648         /* The root filesystem must operate with actual permissions: */
 649         if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
 650                 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));  /* Just say "No". */
 651                 retval = EINVAL;
 652                 goto exit;
 653         }
 654         if (mount_flags & MNT_UNKNOWNPERMISSIONS)
 655                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
 656         else
 657                 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
 658
 659         namefix = permfix = 0;
 660
 661         /*
 662          * Tracking of hot files requires up-to-date access times.  So if
 663          * access time updates are disabled, we must also disable hot files.
 664          */
 665         if (mount_flags & MNT_NOATIME) {
 666                 (void) hfs_recording_suspend(hfsmp);
 667         }
 668
 669         /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
 670         if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
 671                 gTimeZone = args->hfs_timezone;
 672         }
 673
 674         /* Change the default uid, gid and/or mask */
 675         if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
 676                 hfsmp->hfs_uid = args->hfs_uid;
 677                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 678                         ++permfix;
 679         }
 680         if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
 681                 hfsmp->hfs_gid = args->hfs_gid;
 682                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 683                         ++permfix;
 684         }
 685         if (args->hfs_mask != (mode_t)VNOVAL) {
 686                 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
 687                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
 688                         hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
 689                         if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
 690                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
 691                         if (vcb->vcbSigWord == kHFSPlusSigWord)
 692                                 ++permfix;
 693                 }
 694         }
 695
 696 #if CONFIG_HFS_STD
 697         /* Change the hfs encoding value (hfs only) */
 698         if ((vcb->vcbSigWord == kHFSSigWord)    &&
 699             (args->hfs_encoding != (u_int32_t)VNOVAL)              &&
 700             (hfsmp->hfs_encoding != args->hfs_encoding)) {
 701
 702                 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
 703                 if (retval)
 704                         goto exit;
 705
 706                 /*
 707                  * Connect the new hfs_get_unicode converter but leave
 708                  * the old hfs_get_hfsname converter in place so that
 709                  * we can lookup existing vnodes to get their correctly
 710                  * encoded names.
 711                  *
 712                  * When we're all finished, we can then connect the new
 713                  * hfs_get_hfsname converter and release our interest
 714                  * in the old converters.
 715                  */
 716                 hfsmp->hfs_get_unicode = get_unicode_func;
 717                 old_encoding = hfsmp->hfs_encoding;
 718                 hfsmp->hfs_encoding = args->hfs_encoding;
 719                 ++namefix;
 720         }
 721 #endif
 722
 723         if (!(namefix || permfix || permswitch))
 724                 goto exit;
 725
 726         /* XXX 3762912 hack to support HFS filesystem 'owner' */
 727         if (permfix)
 728                 vfs_setowner(mp,
 729                     hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
 730                     hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
 731
 732         /*
 733          * For each active vnode fix things that changed
 734          *
 735          * Note that we can visit a vnode more than once
 736          * and we can race with fsync.
 737          *
 738          * hfs_changefs_callback will be called for each vnode
 739          * hung off of this mount point
 740          *
 741          * The vnode will be properly referenced and unreferenced
 742          * around the callback
 743          */
 744         cargs.hfsmp = hfsmp;
 745         cargs.namefix = namefix;
 746         cargs.permfix = permfix;
 747         cargs.permswitch = permswitch;
 748
 749         vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
 750
 751 #if CONFIG_HFS_STD
 752         /*
 753          * If we're switching name converters we can now
 754          * connect the new hfs_get_hfsname converter and
 755          * release our interest in the old converters.
 756          */
 757         if (namefix) {
 758                 /* HFS standard only */
 759                 hfsmp->hfs_get_hfsname = get_hfsname_func;
 760                 vcb->volumeNameEncodingHint = args->hfs_encoding;
 761                 (void) hfs_relconverter(old_encoding);
 762         }
 763 #endif
 764
 765 exit:
 766         hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
 767         return (retval);
 768 }
 769
 770
 771 struct hfs_reload_cargs {
 772         struct hfsmount *hfsmp;
 773         int             error;
 774 };
 775
 776 static int
 777 hfs_reload_callback(struct vnode *vp, void *cargs)
 778 {
 779         struct cnode *cp;
 780         struct hfs_reload_cargs *args;
 781         int lockflags;
 782
 783         args = (struct hfs_reload_cargs *)cargs;
 784         /*
 785          * flush all the buffers associated with this node
 786          */
 787         (void) buf_invalidateblks(vp, 0, 0, 0);
 788
 789         cp = VTOC(vp);
 790         /*
 791          * Remove any directory hints
 792          */
 793         if (vnode_isdir(vp))
 794                 hfs_reldirhints(cp, 0);
 795
 796         /*
 797          * Re-read cnode data for all active vnodes (non-metadata files).
 798          */
 799         if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
 800                 struct cat_fork *datafork;
 801                 struct cat_desc desc;
 802
 803                 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
 804
 805                 /* lookup by fileID since name could have changed */
 806                 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 807                 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, 0, &desc, &cp->c_attr, datafork);
 808                 hfs_systemfile_unlock(args->hfsmp, lockflags);
 809                 if (args->error) {
 810                         return (VNODE_RETURNED_DONE);
 811                 }
 812
 813                 /* update cnode's catalog descriptor */
 814                 (void) replace_desc(cp, &desc);
 815         }
 816         return (VNODE_RETURNED);
 817 }
 818
 819 /*
 820  * Reload all incore data for a filesystem (used after running fsck on
 821  * the root filesystem and finding things to fix). The filesystem must
 822  * be mounted read-only.
 823  *
 824  * Things to do to update the mount:
 825  *      invalidate all cached meta-data.
 826  *      invalidate all inactive vnodes.
 827  *      invalidate all cached file data.
 828  *      re-read volume header from disk.
 829  *      re-load meta-file info (extents, file size).
 830  *      re-load B-tree header data.
 831  *      re-read cnode data for all active vnodes.
 832  */
 833 int
 834 hfs_reload(struct mount *mountp)
 835 {
 836         register struct vnode *devvp;
 837         struct buf *bp;
 838         int error, i;
 839         struct hfsmount *hfsmp;
 840         struct HFSPlusVolumeHeader *vhp;
 841         ExtendedVCB *vcb;
 842         struct filefork *forkp;
 843         struct cat_desc cndesc;
 844         struct hfs_reload_cargs args;
 845         daddr64_t priIDSector;
 846
 847         hfsmp = VFSTOHFS(mountp);
 848         vcb = HFSTOVCB(hfsmp);
 849
 850         if (vcb->vcbSigWord == kHFSSigWord)
 851                 return (EINVAL);        /* rooting from HFS is not supported! */
 852
 853         /*
 854          * Invalidate all cached meta-data.
 855          */
 856         devvp = hfsmp->hfs_devvp;
 857         if (buf_invalidateblks(devvp, 0, 0, 0))
 858                 panic("hfs_reload: dirty1");
 859
 860         args.hfsmp = hfsmp;
 861         args.error = 0;
 862         /*
 863          * hfs_reload_callback will be called for each vnode
 864          * hung off of this mount point that can't be recycled...
 865          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 866          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 867          * properly referenced and unreferenced around the callback
 868          */
 869         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
 870
 871         if (args.error)
 872                 return (args.error);
 873
 874         /*
 875          * Re-read VolumeHeader from disk.
 876          */
 877         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 878                         HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 879
 880         error = (int)buf_meta_bread(hfsmp->hfs_devvp,
 881                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
 882                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
 883         if (error) {
 884                 if (bp != NULL)
 885                         buf_brelse(bp);
 886                 return (error);
 887         }
 888
 889         vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 890
 891         /* Do a quick sanity check */
 892         if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
 893              SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
 894             (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
 895              SWAP_BE16(vhp->version) != kHFSXVersion) ||
 896             SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
 897                 buf_brelse(bp);
 898                 return (EIO);
 899         }
 900
 901         vcb->vcbLsMod           = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 902         vcb->vcbAtrb            = SWAP_BE32 (vhp->attributes);
 903         vcb->vcbJinfoBlock  = SWAP_BE32(vhp->journalInfoBlock);
 904         vcb->vcbClpSiz          = SWAP_BE32 (vhp->rsrcClumpSize);
 905         vcb->vcbNxtCNID         = SWAP_BE32 (vhp->nextCatalogID);
 906         vcb->vcbVolBkUp         = to_bsd_time(SWAP_BE32(vhp->backupDate));
 907         vcb->vcbWrCnt           = SWAP_BE32 (vhp->writeCount);
 908         vcb->vcbFilCnt          = SWAP_BE32 (vhp->fileCount);
 909         vcb->vcbDirCnt          = SWAP_BE32 (vhp->folderCount);
 910         HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
 911         vcb->totalBlocks        = SWAP_BE32 (vhp->totalBlocks);
 912         vcb->freeBlocks         = SWAP_BE32 (vhp->freeBlocks);
 913         vcb->encodingsBitmap    = SWAP_BE64 (vhp->encodingsBitmap);
 914         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 915         vcb->localCreateDate    = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
 916
 917         /*
 918          * Re-load meta-file vnode data (extent info, file size, etc).
 919          */
 920         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 921         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 922                 forkp->ff_extents[i].startBlock =
 923                         SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 924                 forkp->ff_extents[i].blockCount =
 925                         SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 926         }
 927         forkp->ff_size      = SWAP_BE64 (vhp->extentsFile.logicalSize);
 928         forkp->ff_blocks    = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 929         forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
 930
 931
 932         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 933         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 934                 forkp->ff_extents[i].startBlock =
 935                         SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 936                 forkp->ff_extents[i].blockCount =
 937                         SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 938         }
 939         forkp->ff_size      = SWAP_BE64 (vhp->catalogFile.logicalSize);
 940         forkp->ff_blocks    = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 941         forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
 942
 943         if (hfsmp->hfs_attribute_vp) {
 944                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 945                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 946                         forkp->ff_extents[i].startBlock =
 947                                 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 948                         forkp->ff_extents[i].blockCount =
 949                                 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 950                 }
 951                 forkp->ff_size      = SWAP_BE64 (vhp->attributesFile.logicalSize);
 952                 forkp->ff_blocks    = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 953                 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
 954         }
 955
 956         forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
 957         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 958                 forkp->ff_extents[i].startBlock =
 959                         SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 960                 forkp->ff_extents[i].blockCount =
 961                         SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 962         }
 963         forkp->ff_size      = SWAP_BE64 (vhp->allocationFile.logicalSize);
 964         forkp->ff_blocks    = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 965         forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
 966
 967         buf_brelse(bp);
 968         vhp = NULL;
 969
 970         /*
 971          * Re-load B-tree header data
 972          */
 973         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 974         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 975                 return (error);
 976
 977         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 978         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 979                 return (error);
 980
 981         if (hfsmp->hfs_attribute_vp) {
 982                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 983                 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 984                         return (error);
 985         }
 986
 987         /* Reload the volume name */
 988         if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, NULL, NULL)))
 989                 return (error);
 990         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 991         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 992         cat_releasedesc(&cndesc);
 993
 994         /* Re-establish private/hidden directories. */
 995         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 996         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 997
 998         /* In case any volume information changed to trigger a notification */
 999         hfs_generate_volume_notifications(hfsmp);
1000
1001         return (0);
1002 }
1003
1004 __unused
1005 static uint64_t tv_to_usecs(struct timeval *tv)
1006 {
1007         return tv->tv_sec * 1000000ULL + tv->tv_usec;
1008 }
1009
1010 // Returns TRUE if b - a >= usecs
1011 static boolean_t hfs_has_elapsed (const struct timeval *a,
1012                                   const struct timeval *b,
1013                                   uint64_t usecs)
1014 {
1015     struct timeval diff;
1016     timersub(b, a, &diff);
1017     return diff.tv_sec * 1000000ULL + diff.tv_usec >= usecs;
1018 }
1019
1020 static void
1021 hfs_syncer(void *arg0, void *unused)
1022 {
1023 #pragma unused(unused)
1024
1025     struct hfsmount *hfsmp = arg0;
1026     struct timeval   now;
1027
1028     microuptime(&now);
1029
1030     KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_START, hfsmp,
1031                           tv_to_usecs(&now),
1032                           tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1033                           hfsmp->hfs_mp->mnt_pending_write_size, 0);
1034
1035     hfs_syncer_lock(hfsmp);
1036
1037     if (!hfsmp->hfs_syncer) {
1038         // hfs_unmount is waiting for us leave now and let it do the sync
1039         hfsmp->hfs_sync_incomplete = FALSE;
1040         hfs_syncer_unlock(hfsmp);
1041         hfs_syncer_wakeup(hfsmp);
1042         return;
1043     }
1044
1045     /* Check to see whether we should flush now: either the oldest is
1046        > HFS_MAX_META_DELAY or HFS_META_DELAY has elapsed since the
1047        request and there are no pending writes. */
1048
1049     boolean_t flush_now = FALSE;
1050
1051     if (hfs_has_elapsed(&hfsmp->hfs_sync_req_oldest, &now, HFS_MAX_META_DELAY))
1052         flush_now = TRUE;
1053     else if (!hfsmp->hfs_mp->mnt_pending_write_size) {
1054         /* N.B. accessing mnt_last_write_completed_timestamp is not thread safe, but
1055            it won't matter for what we're using it for. */
1056         if (hfs_has_elapsed(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp,
1057                             &now,
1058                             HFS_META_DELAY)) {
1059             flush_now = TRUE;
1060         }
1061     }
1062
1063     if (!flush_now) {
1064         thread_call_t syncer = hfsmp->hfs_syncer;
1065
1066         hfs_syncer_unlock(hfsmp);
1067
1068         hfs_syncer_queue(syncer);
1069
1070         return;
1071     }
1072
1073     timerclear(&hfsmp->hfs_sync_req_oldest);
1074
1075     hfs_syncer_unlock(hfsmp);
1076
1077     KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_START,
1078                           tv_to_usecs(&now),
1079                           tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1080                           tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp),
1081                           hfsmp->hfs_mp->mnt_pending_write_size, 0);
1082
1083     if (hfsmp->hfs_syncer_thread) {
1084         printf("hfs: syncer already running!");
1085                 return;
1086         }
1087
1088     hfsmp->hfs_syncer_thread = current_thread();
1089
1090     hfs_start_transaction(hfsmp);   // so we hold off any new writes
1091
1092     /*
1093      * We intentionally do a synchronous flush (of the journal or entire volume) here.
1094      * For journaled volumes, this means we wait until the metadata blocks are written
1095      * to both the journal and their final locations (in the B-trees, etc.).
1096      *
1097      * This tends to avoid interleaving the metadata writes with other writes (for
1098      * example, user data, or to the journal when a later transaction notices that
1099      * an earlier transaction has finished its async writes, and then updates the
1100      * journal start in the journal header).  Avoiding interleaving of writes is
1101      * very good for performance on simple flash devices like SD cards, thumb drives;
1102      * and on devices like floppies.  Since removable devices tend to be this kind of
1103      * simple device, doing a synchronous flush actually improves performance in
1104      * practice.
1105      *
1106      * NOTE: For non-journaled volumes, the call to hfs_sync will also cause dirty
1107      * user data to be written.
1108      */
1109     if (hfsmp->jnl) {
1110         hfs_journal_flush(hfsmp, TRUE);
1111     } else {
1112         hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1113     }
1114
1115     KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_END,
1116                           (microuptime(&now), tv_to_usecs(&now)),
1117                           tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1118                           tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp),
1119                           hfsmp->hfs_mp->mnt_pending_write_size, 0);
1120
1121     hfs_end_transaction(hfsmp);
1122
1123     hfsmp->hfs_syncer_thread = NULL;
1124
1125     hfs_syncer_lock(hfsmp);
1126
1127     // If hfs_unmount lets us and we missed a sync, schedule again
1128     if (hfsmp->hfs_syncer && timerisset(&hfsmp->hfs_sync_req_oldest)) {
1129         thread_call_t syncer = hfsmp->hfs_syncer;
1130
1131         hfs_syncer_unlock(hfsmp);
1132
1133         hfs_syncer_queue(syncer);
1134     } else {
1135         hfsmp->hfs_sync_incomplete = FALSE;
1136         hfs_syncer_unlock(hfsmp);
1137         hfs_syncer_wakeup(hfsmp);
1138     }
1139
1140     /* BE CAREFUL WHAT YOU ADD HERE: at this point hfs_unmount is free
1141        to continue and therefore hfsmp might be invalid. */
1142
1143     KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_END, 0, 0, 0, 0, 0);
1144 }
1145
1146
1147 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1148
1149 /*
1150  * Call into the allocator code and perform a full scan of the bitmap file.
1151  *
1152  * This allows us to TRIM unallocated ranges if needed, and also to build up
1153  * an in-memory summary table of the state of the allocated blocks.
1154  */
1155 void hfs_scan_blocks (struct hfsmount *hfsmp) {
1156         /*
1157          * Take the allocation file lock.  Journal transactions will block until
1158          * we're done here.
1159          */
1160
1161         int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1162
1163         /*
1164          * We serialize here with the HFS mount lock as we're mounting.
1165          *
1166          * The mount can only proceed once this thread has acquired the bitmap
1167          * lock, since we absolutely do not want someone else racing in and
1168          * getting the bitmap lock, doing a read/write of the bitmap file,
1169          * then us getting the bitmap lock.
1170          *
1171          * To prevent this, the mount thread takes the HFS mount mutex, starts us
1172          * up, then immediately msleeps on the scan_var variable in the mount
1173          * point as a condition variable.  This serialization is safe since
1174          * if we race in and try to proceed while they're still holding the lock,
1175          * we'll block trying to acquire the global lock.  Since the mount thread
1176          * acquires the HFS mutex before starting this function in a new thread,
1177          * any lock acquisition on our part must be linearizably AFTER the mount thread's.
1178          *
1179          * Note that the HFS mount mutex is always taken last, and always for only
1180          * a short time.  In this case, we just take it long enough to mark the
1181          * scan-in-flight bit.
1182          */
1183         (void) hfs_lock_mount (hfsmp);
1184         hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_INFLIGHT;
1185         wakeup((caddr_t) &hfsmp->scan_var);
1186         hfs_unlock_mount (hfsmp);
1187
1188         /* Initialize the summary table */
1189         if (hfs_init_summary (hfsmp)) {
1190                 printf("hfs: could not initialize summary table for %s\n", hfsmp->vcbVN);
1191         }
1192
1193         /*
1194          * ScanUnmapBlocks assumes that the bitmap lock is held when you
1195          * call the function. We don't care if there were any errors issuing unmaps.
1196          *
1197          * It will also attempt to build up the summary table for subsequent
1198          * allocator use, as configured.
1199          */
1200         (void) ScanUnmapBlocks(hfsmp);
1201
1202         hfs_systemfile_unlock(hfsmp, flags);
1203 }
1204
1205 static int hfs_root_unmounted_cleanly = 0;
1206
1207 SYSCTL_DECL(_vfs_generic);
1208 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1209
1210 /*
1211  * Common code for mount and mountroot
1212  */
1213 int
1214 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1215             int journal_replay_only, vfs_context_t context)
1216 {
1217         struct proc *p = vfs_context_proc(context);
1218         int retval = E_NONE;
1219         struct hfsmount *hfsmp = NULL;
1220         struct buf *bp;
1221         dev_t dev;
1222         HFSMasterDirectoryBlock *mdbp = NULL;
1223         int ronly;
1224 #if QUOTA
1225         int i;
1226 #endif
1227         int mntwrapper;
1228         kauth_cred_t cred;
1229         u_int64_t disksize;
1230         daddr64_t log_blkcnt;
1231         u_int32_t log_blksize;
1232         u_int32_t phys_blksize;
1233         u_int32_t minblksize;
1234         u_int32_t iswritable;
1235         daddr64_t mdb_offset;
1236         int isvirtual = 0;
1237         int isroot = 0;
1238         u_int32_t device_features = 0;
1239         int isssd;
1240
1241         if (args == NULL) {
1242                 /* only hfs_mountroot passes us NULL as the 'args' argument */
1243                 isroot = 1;
1244         }
1245
1246         ronly = vfs_isrdonly(mp);
1247         dev = vnode_specrdev(devvp);
1248         cred = p ? vfs_context_ucred(context) : NOCRED;
1249         mntwrapper = 0;
1250
1251         bp = NULL;
1252         hfsmp = NULL;
1253         mdbp = NULL;
1254         minblksize = kHFSBlockSize;
1255
1256         /* Advisory locking should be handled at the VFS layer */
1257         vfs_setlocklocal(mp);
1258
1259         /* Get the logical block size (treated as physical block size everywhere) */
1260         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1261                 if (HFS_MOUNT_DEBUG) {
1262                         printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1263                 }
1264                 retval = ENXIO;
1265                 goto error_exit;
1266         }
1267         if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1268                 printf("hfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
1269                 retval = ENXIO;
1270                 goto error_exit;
1271         }
1272
1273         /* Get the physical block size. */
1274         retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1275         if (retval) {
1276                 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1277                         if (HFS_MOUNT_DEBUG) {
1278                                 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1279                         }
1280                         retval = ENXIO;
1281                         goto error_exit;
1282                 }
1283                 /* If device does not support this ioctl, assume that physical
1284                  * block size is same as logical block size
1285                  */
1286                 phys_blksize = log_blksize;
1287         }
1288         if (phys_blksize == 0 || phys_blksize > MAXBSIZE) {
1289                 printf("hfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
1290                 retval = ENXIO;
1291                 goto error_exit;
1292         }
1293
1294         /* Switch to 512 byte sectors (temporarily) */
1295         if (log_blksize > 512) {
1296                 u_int32_t size512 = 512;
1297
1298                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1299                         if (HFS_MOUNT_DEBUG) {
1300                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1301                         }
1302                         retval = ENXIO;
1303                         goto error_exit;
1304                 }
1305         }
1306         /* Get the number of 512 byte physical blocks. */
1307         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1308                 /* resetting block size may fail if getting block count did */
1309                 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1310                 if (HFS_MOUNT_DEBUG) {
1311                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1312                 }
1313                 retval = ENXIO;
1314                 goto error_exit;
1315         }
1316         /* Compute an accurate disk size (i.e. within 512 bytes) */
1317         disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1318
1319         /*
1320          * On Tiger it is not necessary to switch the device
1321          * block size to be 4k if there are more than 31-bits
1322          * worth of blocks but to insure compatibility with
1323          * pre-Tiger systems we have to do it.
1324          *
1325          * If the device size is not a multiple of 4K (8 * 512), then
1326          * switching the logical block size isn't going to help because
1327          * we will be unable to write the alternate volume header.
1328          * In this case, just leave the logical block size unchanged.
1329          */
1330         if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1331                 minblksize = log_blksize = 4096;
1332                 if (phys_blksize < log_blksize)
1333                         phys_blksize = log_blksize;
1334         }
1335
1336         /*
1337          * The cluster layer is not currently prepared to deal with a logical
1338          * block size larger than the system's page size.  (It can handle
1339          * blocks per page, but not multiple pages per block.)  So limit the
1340          * logical block size to the page size.
1341          */
1342         if (log_blksize > PAGE_SIZE) {
1343                 log_blksize = PAGE_SIZE;
1344         }
1345
1346         /* Now switch to our preferred physical block size. */
1347         if (log_blksize > 512) {
1348                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1349                         if (HFS_MOUNT_DEBUG) {
1350                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1351                         }
1352                         retval = ENXIO;
1353                         goto error_exit;
1354                 }
1355                 /* Get the count of physical blocks. */
1356                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1357                         if (HFS_MOUNT_DEBUG) {
1358                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1359                         }
1360                         retval = ENXIO;
1361                         goto error_exit;
1362                 }
1363         }
1364         /*
1365          * At this point:
1366          *   minblksize is the minimum physical block size
1367          *   log_blksize has our preferred physical block size
1368          *   log_blkcnt has the total number of physical blocks
1369          */
1370
1371         mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1372         if ((retval = (int)buf_meta_bread(devvp,
1373                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1374                                 phys_blksize, cred, &bp))) {
1375                 if (HFS_MOUNT_DEBUG) {
1376                         printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1377                 }
1378                 goto error_exit;
1379         }
1380         MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1381         if (mdbp == NULL) {
1382                 retval = ENOMEM;
1383                 if (HFS_MOUNT_DEBUG) {
1384                         printf("hfs_mountfs: MALLOC failed\n");
1385                 }
1386                 goto error_exit;
1387         }
1388         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1389         buf_brelse(bp);
1390         bp = NULL;
1391
1392         MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1393         if (hfsmp == NULL) {
1394                 if (HFS_MOUNT_DEBUG) {
1395                         printf("hfs_mountfs: MALLOC (2) failed\n");
1396                 }
1397                 retval = ENOMEM;
1398                 goto error_exit;
1399         }
1400         bzero(hfsmp, sizeof(struct hfsmount));
1401
1402         hfs_chashinit_finish(hfsmp);
1403
1404         /* Init the ID lookup hashtable */
1405         hfs_idhash_init (hfsmp);
1406
1407         /*
1408          * See if the disk supports unmap (trim).
1409          *
1410          * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
1411          * returned by vfs_ioattr.  We need to call VNOP_IOCTL ourselves.
1412          */
1413         if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
1414                 if (device_features & DK_FEATURE_UNMAP) {
1415                         hfsmp->hfs_flags |= HFS_UNMAP;
1416                 }
1417         }
1418
1419         /*
1420          * See if the disk is a solid state device, too.  We need this to decide what to do about
1421          * hotfiles.
1422          */
1423         if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1424                 if (isssd) {
1425                         hfsmp->hfs_flags |= HFS_SSD;
1426                 }
1427         }
1428
1429
1430         /*
1431          *  Init the volume information structure
1432          */
1433
1434         lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1435         lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1436         lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1437         lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1438         lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1439
1440         vfs_setfsprivate(mp, hfsmp);
1441         hfsmp->hfs_mp = mp;                     /* Make VFSTOHFS work */
1442         hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1443         hfsmp->hfs_devvp = devvp;
1444         vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
1445         hfsmp->hfs_logical_block_size = log_blksize;
1446         hfsmp->hfs_logical_block_count = log_blkcnt;
1447         hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1448         hfsmp->hfs_physical_block_size = phys_blksize;
1449         hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1450         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1451         if (ronly)
1452                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1453         if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1454                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1455
1456 #if QUOTA
1457         for (i = 0; i < MAXQUOTAS; i++)
1458                 dqfileinit(&hfsmp->hfs_qfiles[i]);
1459 #endif
1460
1461         if (args) {
1462                 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1463                 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1464                 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1465                 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1466                 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                               /* tell the VFS */
1467                 if (args->hfs_mask != (mode_t)VNOVAL) {
1468                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1469                         if (args->flags & HFSFSMNT_NOXONFILES) {
1470                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1471                         } else {
1472                                 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1473                         }
1474                 } else {
1475                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1476                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1477                 }
1478                 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1479                         mntwrapper = 1;
1480         } else {
1481                 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1482                 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1483                         hfsmp->hfs_uid = UNKNOWNUID;
1484                         hfsmp->hfs_gid = UNKNOWNGID;
1485                         vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                       /* tell the VFS */
1486                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1487                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1488                 }
1489         }
1490
1491         /* Find out if disk media is writable. */
1492         if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1493                 if (iswritable)
1494                         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1495                 else
1496                         hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1497         }
1498
1499         // record the current time at which we're mounting this volume
1500         struct timeval tv;
1501         microtime(&tv);
1502         hfsmp->hfs_mount_time = tv.tv_sec;
1503
1504         /* Mount a standard HFS disk */
1505         if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1506             (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1507 #if CONFIG_HFS_STD
1508                 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1509                 if (vfs_isrdwr(mp)) {
1510                         retval = EROFS;
1511                         goto error_exit;
1512                 }
1513
1514                 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1515
1516                 /* Treat it as if it's read-only and not writeable */
1517                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1518                 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1519
1520                 /* If only journal replay is requested, exit immediately */
1521                 if (journal_replay_only) {
1522                         retval = 0;
1523                         goto error_exit;
1524                 }
1525
1526                 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1527                         retval = EINVAL;  /* Cannot root from HFS standard disks */
1528                         goto error_exit;
1529                 }
1530                 /* HFS disks can only use 512 byte physical blocks */
1531                 if (log_blksize > kHFSBlockSize) {
1532                         log_blksize = kHFSBlockSize;
1533                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1534                                 retval = ENXIO;
1535                                 goto error_exit;
1536                         }
1537                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1538                                 retval = ENXIO;
1539                                 goto error_exit;
1540                         }
1541                         hfsmp->hfs_logical_block_size = log_blksize;
1542                         hfsmp->hfs_logical_block_count = log_blkcnt;
1543                         hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1544                         hfsmp->hfs_physical_block_size = log_blksize;
1545                         hfsmp->hfs_log_per_phys = 1;
1546                 }
1547                 if (args) {
1548                         hfsmp->hfs_encoding = args->hfs_encoding;
1549                         HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1550
1551                         /* establish the timezone */
1552                         gTimeZone = args->hfs_timezone;
1553                 }
1554
1555                 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1556                                         &hfsmp->hfs_get_hfsname);
1557                 if (retval)
1558                         goto error_exit;
1559
1560                 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1561                 if (retval)
1562                         (void) hfs_relconverter(hfsmp->hfs_encoding);
1563 #else
1564                 /* On platforms where HFS Standard is not supported, deny the mount altogether */
1565                 retval = EINVAL;
1566                 goto error_exit;
1567 #endif
1568
1569         }
1570         else { /* Mount an HFS Plus disk */
1571                 HFSPlusVolumeHeader *vhp;
1572                 off_t embeddedOffset;
1573                 int   jnl_disable = 0;
1574
1575                 /* Get the embedded Volume Header */
1576                 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1577                         embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1578                         embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1579                                           (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1580
1581                         /*
1582                          * If the embedded volume doesn't start on a block
1583                          * boundary, then switch the device to a 512-byte
1584                          * block size so everything will line up on a block
1585                          * boundary.
1586                          */
1587                         if ((embeddedOffset % log_blksize) != 0) {
1588                                 printf("hfs_mountfs: embedded volume offset not"
1589                                     " a multiple of physical block size (%d);"
1590                                     " switching to 512\n", log_blksize);
1591                                 log_blksize = 512;
1592                                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1593                                     (caddr_t)&log_blksize, FWRITE, context)) {
1594
1595                                         if (HFS_MOUNT_DEBUG) {
1596                                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1597                                         }
1598                                         retval = ENXIO;
1599                                         goto error_exit;
1600                                 }
1601                                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1602                                     (caddr_t)&log_blkcnt, 0, context)) {
1603                                         if (HFS_MOUNT_DEBUG) {
1604                                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1605                                         }
1606                                         retval = ENXIO;
1607                                         goto error_exit;
1608                                 }
1609                                 /* Note: relative block count adjustment */
1610                                 hfsmp->hfs_logical_block_count *=
1611                                     hfsmp->hfs_logical_block_size / log_blksize;
1612
1613                                 /* Update logical /physical block size */
1614                                 hfsmp->hfs_logical_block_size = log_blksize;
1615                                 hfsmp->hfs_physical_block_size = log_blksize;
1616
1617                                 phys_blksize = log_blksize;
1618                                 hfsmp->hfs_log_per_phys = 1;
1619                         }
1620
1621                         disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1622                                    (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1623
1624                         hfsmp->hfs_logical_block_count = disksize / log_blksize;
1625
1626                         hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1627
1628                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1629                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1630                                         phys_blksize, cred, &bp);
1631                         if (retval) {
1632                                 if (HFS_MOUNT_DEBUG) {
1633                                         printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1634                                 }
1635                                 goto error_exit;
1636                         }
1637                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1638                         buf_brelse(bp);
1639                         bp = NULL;
1640                         vhp = (HFSPlusVolumeHeader*) mdbp;
1641
1642                 }
1643                 else { /* pure HFS+ */
1644                         embeddedOffset = 0;
1645                         vhp = (HFSPlusVolumeHeader*) mdbp;
1646                 }
1647
1648                 if (isroot) {
1649                         hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0);
1650                 }
1651
1652                 /*
1653                  * On inconsistent disks, do not allow read-write mount
1654                  * unless it is the boot volume being mounted.  We also
1655                  * always want to replay the journal if the journal_replay_only
1656                  * flag is set because that will (most likely) get the
1657                  * disk into a consistent state before fsck_hfs starts
1658                  * looking at it.
1659                  */
1660                 if (  !(vfs_flags(mp) & MNT_ROOTFS)
1661                    && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1662                    && !journal_replay_only
1663                    && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1664
1665                         if (HFS_MOUNT_DEBUG) {
1666                                 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1667                         }
1668                         retval = EINVAL;
1669                         goto error_exit;
1670                 }
1671
1672
1673                 // XXXdbg
1674                 //
1675                 hfsmp->jnl = NULL;
1676                 hfsmp->jvp = NULL;
1677                 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1678                     args->journal_disable) {
1679                     jnl_disable = 1;
1680                 }
1681
1682                 //
1683                 // We only initialize the journal here if the last person
1684                 // to mount this volume was journaling aware.  Otherwise
1685                 // we delay journal initialization until later at the end
1686                 // of hfs_MountHFSPlusVolume() because the last person who
1687                 // mounted it could have messed things up behind our back
1688                 // (so we need to go find the .journal file, make sure it's
1689                 // the right size, re-sync up if it was moved, etc).
1690                 //
1691                 if (   (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1692                         && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1693                         && !jnl_disable) {
1694
1695                         // if we're able to init the journal, mark the mount
1696                         // point as journaled.
1697                         //
1698                         if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1699                                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1700                         } else {
1701                                 if (retval == EROFS) {
1702                                         // EROFS is a special error code that means the volume has an external
1703                                         // journal which we couldn't find.  in that case we do not want to
1704                                         // rewrite the volume header - we'll just refuse to mount the volume.
1705                                         if (HFS_MOUNT_DEBUG) {
1706                                                 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1707                                         }
1708                                         retval = EINVAL;
1709                                         goto error_exit;
1710                                 }
1711
1712                                 // if the journal failed to open, then set the lastMountedVersion
1713                                 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1714                                 // of just bailing out because the volume is journaled.
1715                                 if (!ronly) {
1716                                         if (HFS_MOUNT_DEBUG) {
1717                                                 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1718                                         }
1719
1720                                         HFSPlusVolumeHeader *jvhp;
1721
1722                                     hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1723
1724                                     if (mdb_offset == 0) {
1725                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1726                                     }
1727
1728                                     bp = NULL;
1729                                     retval = (int)buf_meta_bread(devvp,
1730                                                     HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1731                                                     phys_blksize, cred, &bp);
1732                                     if (retval == 0) {
1733                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1734
1735                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1736                                                 printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1737                                             jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1738                                             buf_bwrite(bp);
1739                                         } else {
1740                                             buf_brelse(bp);
1741                                         }
1742                                         bp = NULL;
1743                                     } else if (bp) {
1744                                         buf_brelse(bp);
1745                                         // clear this so the error exit path won't try to use it
1746                                         bp = NULL;
1747                                     }
1748                                 }
1749
1750                                 // if this isn't the root device just bail out.
1751                                 // If it is the root device we just continue on
1752                                 // in the hopes that fsck_hfs will be able to
1753                                 // fix any damage that exists on the volume.
1754                                 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1755                                         if (HFS_MOUNT_DEBUG) {
1756                                                 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1757                                         }
1758                                     retval = EINVAL;
1759                                     goto error_exit;
1760                                 }
1761                         }
1762                 }
1763                 // XXXdbg
1764
1765                 /* Either the journal is replayed successfully, or there
1766                  * was nothing to replay, or no journal exists.  In any case,
1767                  * return success.
1768                  */
1769                 if (journal_replay_only) {
1770                         retval = 0;
1771                         goto error_exit;
1772                 }
1773
1774                 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1775
1776                 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1777                 /*
1778                  * If the backend didn't like our physical blocksize
1779                  * then retry with physical blocksize of 512.
1780                  */
1781                 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1782                         printf("hfs_mountfs: could not use physical block size "
1783                                         "(%d) switching to 512\n", log_blksize);
1784                         log_blksize = 512;
1785                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1786                                 if (HFS_MOUNT_DEBUG) {
1787                                         printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1788                                 }
1789                                 retval = ENXIO;
1790                                 goto error_exit;
1791                         }
1792                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1793                                 if (HFS_MOUNT_DEBUG) {
1794                                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1795                                 }
1796                                 retval = ENXIO;
1797                                 goto error_exit;
1798                         }
1799                         devvp->v_specsize = log_blksize;
1800                         /* Note: relative block count adjustment (in case this is an embedded volume). */
1801                         hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1802                         hfsmp->hfs_logical_block_size = log_blksize;
1803                         hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1804
1805                         hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1806
1807                         if (hfsmp->jnl && hfsmp->jvp == devvp) {
1808                             // close and re-open this with the new block size
1809                             journal_close(hfsmp->jnl);
1810                             hfsmp->jnl = NULL;
1811                             if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1812                                         vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1813                                 } else {
1814                                         // if the journal failed to open, then set the lastMountedVersion
1815                                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
1816                                         // of just bailing out because the volume is journaled.
1817                                         if (!ronly) {
1818                                                 if (HFS_MOUNT_DEBUG) {
1819                                                         printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1820                                                 }
1821                                         HFSPlusVolumeHeader *jvhp;
1822
1823                                         hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1824
1825                                         if (mdb_offset == 0) {
1826                                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1827                                         }
1828
1829                                                 bp = NULL;
1830                                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1831                                                         phys_blksize, cred, &bp);
1832                                         if (retval == 0) {
1833                                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1834
1835                                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1836                                                                 printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1837                                                         jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1838                                                         buf_bwrite(bp);
1839                                                         } else {
1840                                                         buf_brelse(bp);
1841                                                         }
1842                                                         bp = NULL;
1843                                         } else if (bp) {
1844                                                         buf_brelse(bp);
1845                                                         // clear this so the error exit path won't try to use it
1846                                                         bp = NULL;
1847                                         }
1848                                         }
1849
1850                                         // if this isn't the root device just bail out.
1851                                         // If it is the root device we just continue on
1852                                         // in the hopes that fsck_hfs will be able to
1853                                         // fix any damage that exists on the volume.
1854                                         if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1855                                                 if (HFS_MOUNT_DEBUG) {
1856                                                         printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1857                                                 }
1858                                         retval = EINVAL;
1859                                         goto error_exit;
1860                                         }
1861                                 }
1862                         }
1863
1864                         /* Try again with a smaller block size... */
1865                         retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1866                         if (retval && HFS_MOUNT_DEBUG) {
1867                                 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1868                         }
1869                 }
1870                 if (retval)
1871                         (void) hfs_relconverter(0);
1872         }
1873
1874         // save off a snapshot of the mtime from the previous mount
1875         // (for matador).
1876         hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1877
1878         if ( retval ) {
1879                 if (HFS_MOUNT_DEBUG) {
1880                         printf("hfs_mountfs: encountered failure %d \n", retval);
1881                 }
1882                 goto error_exit;
1883         }
1884
1885         mp->mnt_vfsstat.f_fsid.val[0] = dev;
1886         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1887         vfs_setmaxsymlen(mp, 0);
1888
1889         mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1890 #if NAMEDSTREAMS
1891         mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1892 #endif
1893         if ((hfsmp->hfs_flags & HFS_STANDARD) == 0 ) {
1894                 /* Tell VFS that we support directory hard links. */
1895                 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1896         }
1897 #if CONFIG_HFS_STD
1898         else {
1899                 /* HFS standard doesn't support extended readdir! */
1900                 mount_set_noreaddirext (mp);
1901         }
1902 #endif
1903
1904         if (args) {
1905                 /*
1906                  * Set the free space warning levels for a non-root volume:
1907                  *
1908                  * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1909                  * is less.  Set the "warning" limit to 2% of the volume size or 150MB,
1910                  * whichever is less.  And last, set the "desired" freespace level to
1911                  * to 3% of the volume size or 200MB, whichever is less.
1912                  */
1913                 hfsmp->hfs_freespace_notify_dangerlimit =
1914                         MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1915                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1916                 hfsmp->hfs_freespace_notify_warninglimit =
1917                         MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1918                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1919                 hfsmp->hfs_freespace_notify_desiredlevel =
1920                         MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1921                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1922         } else {
1923                 /*
1924                  * Set the free space warning levels for the root volume:
1925                  *
1926                  * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1927                  * is less.  Set the "warning" limit to 10% of the volume size or 1GB,
1928                  * whichever is less.  And last, set the "desired" freespace level to
1929                  * to 11% of the volume size or 1.25GB, whichever is less.
1930                  */
1931                 hfsmp->hfs_freespace_notify_dangerlimit =
1932                         MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1933                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1934                 hfsmp->hfs_freespace_notify_warninglimit =
1935                         MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1936                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1937                 hfsmp->hfs_freespace_notify_desiredlevel =
1938                         MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1939                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1940         };
1941
1942         /* Check if the file system exists on virtual device, like disk image */
1943         if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1944                 if (isvirtual) {
1945                         hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1946                 }
1947         }
1948
1949         /* do not allow ejectability checks on the root device */
1950         if (isroot == 0) {
1951                 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1952                                 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1953                         hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1954                         if (hfsmp->hfs_syncer == NULL) {
1955                                 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1956                                                 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1957                         }
1958                 }
1959         }
1960
1961         printf("hfs: mounted %s on device %s\n", (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"),
1962             (devvp->v_name ? devvp->v_name : (isroot ? "root_device": "unknown device")));
1963
1964         /*
1965          * Start looking for free space to drop below this level and generate a
1966          * warning immediately if needed:
1967          */
1968         hfsmp->hfs_notification_conditions = 0;
1969         hfs_generate_volume_notifications(hfsmp);
1970
1971         if (ronly == 0) {
1972                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1973         }
1974         FREE(mdbp, M_TEMP);
1975         return (0);
1976
1977 error_exit:
1978         if (bp)
1979                 buf_brelse(bp);
1980         if (mdbp)
1981                 FREE(mdbp, M_TEMP);
1982
1983         if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1984                 vnode_clearmountedon(hfsmp->jvp);
1985                 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1986                 hfsmp->jvp = NULL;
1987         }
1988         if (hfsmp) {
1989                 if (hfsmp->hfs_devvp) {
1990                         vnode_rele(hfsmp->hfs_devvp);
1991                 }
1992                 hfs_locks_destroy(hfsmp);
1993                 hfs_delete_chash(hfsmp);
1994                 hfs_idhash_destroy (hfsmp);
1995
1996                 FREE(hfsmp, M_HFSMNT);
1997                 vfs_setfsprivate(mp, NULL);
1998         }
1999         return (retval);
2000 }
2001
2002
2003 /*
2004  * Make a filesystem operational.
2005  * Nothing to do at the moment.
2006  */
2007 /* ARGSUSED */
2008 static int
2009 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2010 {
2011         return (0);
2012 }
2013
2014
2015 /*
2016  * unmount system call
2017  */
2018 int
2019 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2020 {
2021         struct proc *p = vfs_context_proc(context);
2022         struct hfsmount *hfsmp = VFSTOHFS(mp);
2023         int retval = E_NONE;
2024         int flags;
2025         int force;
2026         int started_tr = 0;
2027
2028         flags = 0;
2029         force = 0;
2030         if (mntflags & MNT_FORCE) {
2031                 flags |= FORCECLOSE;
2032                 force = 1;
2033         }
2034
2035         printf("hfs: unmount initiated on %s on device %s\n",
2036                         (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"),
2037                         (hfsmp->hfs_devvp ? ((hfsmp->hfs_devvp->v_name ? hfsmp->hfs_devvp->v_name : "unknown device")) : "unknown device"));
2038
2039         if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2040                 return (retval);
2041
2042         if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2043                 (void) hfs_recording_suspend(hfsmp);
2044
2045     // Tidy up the syncer
2046         if (hfsmp->hfs_syncer)
2047         {
2048         hfs_syncer_lock(hfsmp);
2049
2050         /* First, make sure everything else knows we don't want any more
2051            requests queued. */
2052         thread_call_t syncer = hfsmp->hfs_syncer;
2053         hfsmp->hfs_syncer = NULL;
2054
2055         hfs_syncer_unlock(hfsmp);
2056
2057         // Now deal with requests that are outstanding
2058         if (hfsmp->hfs_sync_incomplete) {
2059             if (thread_call_cancel(syncer)) {
2060                 // We managed to cancel the timer so we're done
2061                 hfsmp->hfs_sync_incomplete = FALSE;
2062             } else {
2063                 // Syncer must be running right now so we have to wait
2064                 hfs_syncer_lock(hfsmp);
2065                 while (hfsmp->hfs_sync_incomplete)
2066                     hfs_syncer_wait(hfsmp);
2067                 hfs_syncer_unlock(hfsmp);
2068             }
2069         }
2070
2071         // Now we're safe to free the syncer
2072                 thread_call_free(syncer);
2073         }
2074
2075         if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2076                 if (hfsmp->hfs_summary_table) {
2077                         int err = 0;
2078                         /*
2079                          * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
2080                          */
2081                         if (hfsmp->hfs_allocation_vp) {
2082                                 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2083                         }
2084                         FREE (hfsmp->hfs_summary_table, M_TEMP);
2085                         hfsmp->hfs_summary_table = NULL;
2086                         hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
2087
2088                         if (err == 0 && hfsmp->hfs_allocation_vp){
2089                                 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
2090                         }
2091
2092                 }
2093         }
2094
2095         /*
2096          * Flush out the b-trees, volume bitmap and Volume Header
2097          */
2098         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2099                 retval = hfs_start_transaction(hfsmp);
2100                 if (retval == 0) {
2101                     started_tr = 1;
2102                 } else if (!force) {
2103                     goto err_exit;
2104                 }
2105
2106                 if (hfsmp->hfs_startup_vp) {
2107                         (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2108                         retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2109                         hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2110                         if (retval && !force)
2111                                 goto err_exit;
2112                 }
2113
2114                 if (hfsmp->hfs_attribute_vp) {
2115                         (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2116                         retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2117                         hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2118                         if (retval && !force)
2119                                 goto err_exit;
2120                 }
2121
2122                 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2123                 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2124                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2125                 if (retval && !force)
2126                         goto err_exit;
2127
2128                 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2129                 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2130                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2131                 if (retval && !force)
2132                         goto err_exit;
2133
2134                 if (hfsmp->hfs_allocation_vp) {
2135                         (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2136                         retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2137                         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2138                         if (retval && !force)
2139                                 goto err_exit;
2140                 }
2141
2142                 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2143                         retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2144                         if (retval && !force)
2145                                 goto err_exit;
2146                 }
2147
2148                 /* If runtime corruption was detected, indicate that the volume
2149                  * was not unmounted cleanly.
2150                  */
2151                 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2152                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2153                 } else {
2154                         HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2155                 }
2156
2157                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2158                         int i;
2159                         u_int32_t min_start = hfsmp->totalBlocks;
2160
2161                         // set the nextAllocation pointer to the smallest free block number
2162                         // we've seen so on the next mount we won't rescan unnecessarily
2163                         lck_spin_lock(&hfsmp->vcbFreeExtLock);
2164                         for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2165                                 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2166                                         min_start = hfsmp->vcbFreeExt[i].startBlock;
2167                                 }
2168                         }
2169                         lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2170                         if (min_start < hfsmp->nextAllocation) {
2171                                 hfsmp->nextAllocation = min_start;
2172                         }
2173                 }
2174
2175                 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2176                 if (retval) {
2177                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2178                         if (!force)
2179                                 goto err_exit;  /* could not flush everything */
2180                 }
2181
2182                 if (started_tr) {
2183                     hfs_end_transaction(hfsmp);
2184                     started_tr = 0;
2185                 }
2186         }
2187
2188         if (hfsmp->jnl) {
2189                 hfs_journal_flush(hfsmp, FALSE);
2190         }
2191
2192         /*
2193          *      Invalidate our caches and release metadata vnodes
2194          */
2195         (void) hfsUnmount(hfsmp, p);
2196
2197 #if CONFIG_HFS_STD
2198         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2199                 (void) hfs_relconverter(hfsmp->hfs_encoding);
2200         }
2201 #endif
2202
2203         // XXXdbg
2204         if (hfsmp->jnl) {
2205             journal_close(hfsmp->jnl);
2206             hfsmp->jnl = NULL;
2207         }
2208
2209         VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2210
2211         if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2212             vnode_clearmountedon(hfsmp->jvp);
2213             retval = VNOP_CLOSE(hfsmp->jvp,
2214                                hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2215                                vfs_context_kernel());
2216             vnode_put(hfsmp->jvp);
2217             hfsmp->jvp = NULL;
2218         }
2219         // XXXdbg
2220
2221         /*
2222          * Last chance to dump unreferenced system files.
2223          */
2224         (void) vflush(mp, NULLVP, FORCECLOSE);
2225
2226 #if HFS_SPARSE_DEV
2227         /* Drop our reference on the backing fs (if any). */
2228         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2229                 struct vnode * tmpvp;
2230
2231                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2232                 tmpvp = hfsmp->hfs_backingfs_rootvp;
2233                 hfsmp->hfs_backingfs_rootvp = NULLVP;
2234                 vnode_rele(tmpvp);
2235         }
2236 #endif /* HFS_SPARSE_DEV */
2237
2238         vnode_rele(hfsmp->hfs_devvp);
2239
2240         hfs_locks_destroy(hfsmp);
2241         hfs_delete_chash(hfsmp);
2242         hfs_idhash_destroy(hfsmp);
2243         FREE(hfsmp, M_HFSMNT);
2244
2245         return (0);
2246
2247   err_exit:
2248         if (started_tr) {
2249                 hfs_end_transaction(hfsmp);
2250         }
2251         return retval;
2252 }
2253
2254
2255 /*
2256  * Return the root of a filesystem.
2257  */
2258 static int
2259 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2260 {
2261         return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2262 }
2263
2264
2265 /*
2266  * Do operations associated with quotas
2267  */
2268 #if !QUOTA
2269 static int
2270 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2271 {
2272         return (ENOTSUP);
2273 }
2274 #else
2275 static int
2276 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2277 {
2278         struct proc *p = vfs_context_proc(context);
2279         int cmd, type, error;
2280
2281         if (uid == ~0U)
2282                 uid = kauth_cred_getuid(vfs_context_ucred(context));
2283         cmd = cmds >> SUBCMDSHIFT;
2284
2285         switch (cmd) {
2286         case Q_SYNC:
2287         case Q_QUOTASTAT:
2288                 break;
2289         case Q_GETQUOTA:
2290                 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2291                         break;
2292                 /* fall through */
2293         default:
2294                 if ( (error = vfs_context_suser(context)) )
2295                         return (error);
2296         }
2297
2298         type = cmds & SUBCMDMASK;
2299         if ((u_int)type >= MAXQUOTAS)
2300                 return (EINVAL);
2301         if (vfs_busy(mp, LK_NOWAIT))
2302                 return (0);
2303
2304         switch (cmd) {
2305
2306         case Q_QUOTAON:
2307                 error = hfs_quotaon(p, mp, type, datap);
2308                 break;
2309
2310         case Q_QUOTAOFF:
2311                 error = hfs_quotaoff(p, mp, type);
2312                 break;
2313
2314         case Q_SETQUOTA:
2315                 error = hfs_setquota(mp, uid, type, datap);
2316                 break;
2317
2318         case Q_SETUSE:
2319                 error = hfs_setuse(mp, uid, type, datap);
2320                 break;
2321
2322         case Q_GETQUOTA:
2323                 error = hfs_getquota(mp, uid, type, datap);
2324                 break;
2325
2326         case Q_SYNC:
2327                 error = hfs_qsync(mp);
2328                 break;
2329
2330         case Q_QUOTASTAT:
2331                 error = hfs_quotastat(mp, type, datap);
2332                 break;
2333
2334         default:
2335                 error = EINVAL;
2336                 break;
2337         }
2338         vfs_unbusy(mp);
2339
2340         return (error);
2341 }
2342 #endif /* QUOTA */
2343
2344 /* Subtype is composite of bits */
2345 #define HFS_SUBTYPE_JOURNALED      0x01
2346 #define HFS_SUBTYPE_CASESENSITIVE  0x02
2347 /* bits 2 - 6 reserved */
2348 #define HFS_SUBTYPE_STANDARDHFS    0x80
2349
2350 /*
2351  * Get file system statistics.
2352  */
2353 int
2354 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2355 {
2356         ExtendedVCB *vcb = VFSTOVCB(mp);
2357         struct hfsmount *hfsmp = VFSTOHFS(mp);
2358         u_int32_t freeCNIDs;
2359         u_int16_t subtype = 0;
2360
2361         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2362
2363         sbp->f_bsize = (u_int32_t)vcb->blockSize;
2364         sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2365         sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2366         sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2367         sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2368         sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2));  /* max files is constrained by total blocks */
2369         sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2370
2371         /*
2372          * Subtypes (flavors) for HFS
2373          *   0:   Mac OS Extended
2374          *   1:   Mac OS Extended (Journaled)
2375          *   2:   Mac OS Extended (Case Sensitive)
2376          *   3:   Mac OS Extended (Case Sensitive, Journaled)
2377          *   4 - 127:   Reserved
2378          * 128:   Mac OS Standard
2379          *
2380          */
2381         if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
2382                 /* HFS+ & variants */
2383                 if (hfsmp->jnl) {
2384                         subtype |= HFS_SUBTYPE_JOURNALED;
2385                 }
2386                 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
2387                         subtype |= HFS_SUBTYPE_CASESENSITIVE;
2388                 }
2389         }
2390 #if CONFIG_HFS_STD
2391         else {
2392                 /* HFS standard */
2393                 subtype = HFS_SUBTYPE_STANDARDHFS;
2394         }
2395 #endif
2396         sbp->f_fssubtype = subtype;
2397
2398         return (0);
2399 }
2400
2401
2402 //
2403 // XXXdbg -- this is a callback to be used by the journal to
2404 //           get meta data blocks flushed out to disk.
2405 //
2406 // XXXdbg -- be smarter and don't flush *every* block on each
2407 //           call.  try to only flush some so we don't wind up
2408 //           being too synchronous.
2409 //
2410 __private_extern__
2411 void
2412 hfs_sync_metadata(void *arg)
2413 {
2414         struct mount *mp = (struct mount *)arg;
2415         struct hfsmount *hfsmp;
2416         ExtendedVCB *vcb;
2417         buf_t   bp;
2418         int  retval;
2419         daddr64_t priIDSector;
2420         hfsmp = VFSTOHFS(mp);
2421         vcb = HFSTOVCB(hfsmp);
2422
2423         // now make sure the super block is flushed
2424         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2425                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2426
2427         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2428                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2429                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
2430         if ((retval != 0 ) && (retval != ENXIO)) {
2431                 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2432                        (int)priIDSector, retval);
2433         }
2434
2435         if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2436             buf_bwrite(bp);
2437         } else if (bp) {
2438             buf_brelse(bp);
2439         }
2440
2441         // the alternate super block...
2442         // XXXdbg - we probably don't need to do this each and every time.
2443         //          hfs_btreeio.c:FlushAlternate() should flag when it was
2444         //          written...
2445         if (hfsmp->hfs_alt_id_sector) {
2446                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2447                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2448                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2449                 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2450                     buf_bwrite(bp);
2451                 } else if (bp) {
2452                     buf_brelse(bp);
2453                 }
2454         }
2455 }
2456
2457
2458 struct hfs_sync_cargs {
2459         kauth_cred_t cred;
2460         struct proc  *p;
2461         int    waitfor;
2462         int    error;
2463 };
2464
2465
2466 static int
2467 hfs_sync_callback(struct vnode *vp, void *cargs)
2468 {
2469         struct cnode *cp;
2470         struct hfs_sync_cargs *args;
2471         int error;
2472
2473         args = (struct hfs_sync_cargs *)cargs;
2474
2475         if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
2476                 return (VNODE_RETURNED);
2477         }
2478         cp = VTOC(vp);
2479
2480         if ((cp->c_flag & C_MODIFIED) ||
2481             (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2482             vnode_hasdirtyblks(vp)) {
2483                 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2484
2485                 if (error)
2486                         args->error = error;
2487         }
2488         hfs_unlock(cp);
2489         return (VNODE_RETURNED);
2490 }
2491
2492
2493
2494 /*
2495  * Go through the disk queues to initiate sandbagged IO;
2496  * go through the inodes to write those that have been modified;
2497  * initiate the writing of the super block if it has been modified.
2498  *
2499  * Note: we are always called with the filesystem marked `MPBUSY'.
2500  */
2501 int
2502 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2503 {
2504         struct proc *p = vfs_context_proc(context);
2505         struct cnode *cp;
2506         struct hfsmount *hfsmp;
2507         ExtendedVCB *vcb;
2508         struct vnode *meta_vp[4];
2509         int i;
2510         int error, allerror = 0;
2511         struct hfs_sync_cargs args;
2512
2513         hfsmp = VFSTOHFS(mp);
2514
2515         /*
2516          * hfs_changefs might be manipulating vnodes so back off
2517          */
2518         if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2519                 return (0);
2520
2521         if (hfsmp->hfs_flags & HFS_READ_ONLY)
2522                 return (EROFS);
2523
2524         /* skip over frozen volumes */
2525         if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2526                 return 0;
2527
2528         args.cred = kauth_cred_get();
2529         args.waitfor = waitfor;
2530         args.p = p;
2531         args.error = 0;
2532         /*
2533          * hfs_sync_callback will be called for each vnode
2534          * hung off of this mount point... the vnode will be
2535          * properly referenced and unreferenced around the callback
2536          */
2537         vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2538
2539         if (args.error)
2540                 allerror = args.error;
2541
2542         vcb = HFSTOVCB(hfsmp);
2543
2544         meta_vp[0] = vcb->extentsRefNum;
2545         meta_vp[1] = vcb->catalogRefNum;
2546         meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
2547         meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2548
2549         /* Now sync our three metadata files */
2550         for (i = 0; i < 4; ++i) {
2551                 struct vnode *btvp;
2552
2553                 btvp = meta_vp[i];;
2554                 if ((btvp==0) || (vnode_mount(btvp) != mp))
2555                         continue;
2556
2557                 /* XXX use hfs_systemfile_lock instead ? */
2558                 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2559                 cp = VTOC(btvp);
2560
2561                 if (((cp->c_flag &  C_MODIFIED) == 0) &&
2562                     (cp->c_touch_acctime == 0) &&
2563                     (cp->c_touch_chgtime == 0) &&
2564                     (cp->c_touch_modtime == 0) &&
2565                     vnode_hasdirtyblks(btvp) == 0) {
2566                         hfs_unlock(VTOC(btvp));
2567                         continue;
2568                 }
2569                 error = vnode_get(btvp);
2570                 if (error) {
2571                         hfs_unlock(VTOC(btvp));
2572                         continue;
2573                 }
2574                 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2575                         allerror = error;
2576
2577                 hfs_unlock(cp);
2578                 vnode_put(btvp);
2579         };
2580
2581
2582 #if CONFIG_HFS_STD
2583         /*
2584          * Force stale file system control information to be flushed.
2585          */
2586         if (vcb->vcbSigWord == kHFSSigWord) {
2587                 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2588                         allerror = error;
2589                 }
2590         }
2591 #endif
2592
2593 #if QUOTA
2594         hfs_qsync(mp);
2595 #endif /* QUOTA */
2596
2597         hfs_hotfilesync(hfsmp, vfs_context_kernel());
2598
2599         /*
2600          * Write back modified superblock.
2601          */
2602         if (IsVCBDirty(vcb)) {
2603                 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2604                 if (error)
2605                         allerror = error;
2606         }
2607
2608         if (hfsmp->jnl) {
2609             hfs_journal_flush(hfsmp, FALSE);
2610         }
2611
2612         lck_rw_unlock_shared(&hfsmp->hfs_insync);
2613         return (allerror);
2614 }
2615
2616
2617 /*
2618  * File handle to vnode
2619  *
2620  * Have to be really careful about stale file handles:
2621  * - check that the cnode id is valid
2622  * - call hfs_vget() to get the locked cnode
2623  * - check for an unallocated cnode (i_mode == 0)
2624  * - check that the given client host has export rights and return
2625  *   those rights via. exflagsp and credanonp
2626  */
2627 static int
2628 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2629 {
2630         struct hfsfid *hfsfhp;
2631         struct vnode *nvp;
2632         int result;
2633
2634         *vpp = NULL;
2635         hfsfhp = (struct hfsfid *)fhp;
2636
2637         if (fhlen < (int)sizeof(struct hfsfid))
2638                 return (EINVAL);
2639
2640         result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2641         if (result) {
2642                 if (result == ENOENT)
2643                         result = ESTALE;
2644                 return result;
2645         }
2646
2647         /*
2648          * We used to use the create time as the gen id of the file handle,
2649          * but it is not static enough because it can change at any point
2650          * via system calls.  We still don't have another volume ID or other
2651          * unique identifier to use for a generation ID across reboots that
2652          * persists until the file is removed.  Using only the CNID exposes
2653          * us to the potential wrap-around case, but as of 2/2008, it would take
2654          * over 2 months to wrap around if the machine did nothing but allocate
2655          * CNIDs.  Using some kind of wrap counter would only be effective if
2656          * each file had the wrap counter associated with it.  For now,
2657          * we use only the CNID to identify the file as it's good enough.
2658          */
2659
2660         *vpp = nvp;
2661
2662         hfs_unlock(VTOC(nvp));
2663         return (0);
2664 }
2665
2666
2667 /*
2668  * Vnode pointer to File handle
2669  */
2670 /* ARGSUSED */
2671 static int
2672 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2673 {
2674         struct cnode *cp;
2675         struct hfsfid *hfsfhp;
2676
2677         if (ISHFS(VTOVCB(vp)))
2678                 return (ENOTSUP);       /* hfs standard is not exportable */
2679
2680         if (*fhlenp < (int)sizeof(struct hfsfid))
2681                 return (EOVERFLOW);
2682
2683         cp = VTOC(vp);
2684         hfsfhp = (struct hfsfid *)fhp;
2685         /* only the CNID is used to identify the file now */
2686         hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2687         hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2688         *fhlenp = sizeof(struct hfsfid);
2689
2690         return (0);
2691 }
2692
2693
2694 /*
2695  * Initialize HFS filesystems, done only once per boot.
2696  *
2697  * HFS is not a kext-based file system.  This makes it difficult to find
2698  * out when the last HFS file system was unmounted and call hfs_uninit()
2699  * to deallocate data structures allocated in hfs_init().  Therefore we
2700  * never deallocate memory allocated by lock attribute and group initializations
2701  * in this function.
2702  */
2703 static int
2704 hfs_init(__unused struct vfsconf *vfsp)
2705 {
2706         static int done = 0;
2707
2708         if (done)
2709                 return (0);
2710         done = 1;
2711         hfs_chashinit();
2712         hfs_converterinit();
2713
2714         BTReserveSetup();
2715
2716         hfs_lock_attr    = lck_attr_alloc_init();
2717         hfs_group_attr   = lck_grp_attr_alloc_init();
2718         hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2719         hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2720         hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2721
2722 #if HFS_COMPRESSION
2723         decmpfs_init();
2724 #endif
2725
2726         return (0);
2727 }
2728
2729
2730 /*
2731  * Destroy all locks, mutexes and spinlocks in hfsmp on unmount or failed mount
2732  */
2733 static void
2734 hfs_locks_destroy(struct hfsmount *hfsmp)
2735 {
2736
2737         lck_mtx_destroy(&hfsmp->hfs_mutex, hfs_mutex_group);
2738         lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2739         lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group);
2740         lck_rw_destroy(&hfsmp->hfs_insync, hfs_rwlock_group);
2741         lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2742
2743         return;
2744 }
2745
2746
2747 static int
2748 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2749 {
2750         struct hfsmount * hfsmp;
2751         char fstypename[MFSNAMELEN];
2752
2753         if (vp == NULL)
2754                 return (EINVAL);
2755
2756         if (!vnode_isvroot(vp))
2757                 return (EINVAL);
2758
2759         vnode_vfsname(vp, fstypename);
2760         if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2761                 return (EINVAL);
2762
2763         hfsmp = VTOHFS(vp);
2764
2765         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2766                 return (EINVAL);
2767
2768         *hfsmpp = hfsmp;
2769
2770         return (0);
2771 }
2772
2773 // XXXdbg
2774 #include <sys/filedesc.h>
2775
2776 /*
2777  * HFS filesystem related variables.
2778  */
2779 int
2780 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2781                         user_addr_t newp, size_t newlen, vfs_context_t context)
2782 {
2783         struct proc *p = vfs_context_proc(context);
2784         int error;
2785         struct hfsmount *hfsmp;
2786
2787         /* all sysctl names at this level are terminal */
2788
2789         if (name[0] == HFS_ENCODINGBIAS) {
2790                 int bias;
2791
2792                 bias = hfs_getencodingbias();
2793                 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2794                 if (error == 0 && newp)
2795                         hfs_setencodingbias(bias);
2796                 return (error);
2797
2798         } else if (name[0] == HFS_EXTEND_FS) {
2799                 u_int64_t  newsize;
2800                 vnode_t vp = vfs_context_cwd(context);
2801
2802                 if (newp == USER_ADDR_NULL || vp == NULLVP)
2803                         return (EINVAL);
2804                 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2805                         return (error);
2806                 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2807                 if (error)
2808                         return (error);
2809
2810                 error = hfs_extendfs(hfsmp, newsize, context);
2811                 return (error);
2812
2813         } else if (name[0] == HFS_ENCODINGHINT) {
2814                 size_t bufsize;
2815                 size_t bytes;
2816                 u_int32_t hint;
2817                 u_int16_t *unicode_name = NULL;
2818                 char *filename = NULL;
2819
2820                 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2821                         return (EINVAL);
2822
2823                 bufsize = MAX(newlen * 3, MAXPATHLEN);
2824                 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2825                 if (filename == NULL) {
2826                         error = ENOMEM;
2827                         goto encodinghint_exit;
2828                 }
2829                 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2830                 if (filename == NULL) {
2831                         error = ENOMEM;
2832                         goto encodinghint_exit;
2833                 }
2834
2835                 error = copyin(newp, (caddr_t)filename, newlen);
2836                 if (error == 0) {
2837                         error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2838                                                &bytes, bufsize, 0, UTF_DECOMPOSED);
2839                         if (error == 0) {
2840                                 hint = hfs_pickencoding(unicode_name, bytes / 2);
2841                                 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2842                         }
2843                 }
2844
2845 encodinghint_exit:
2846                 if (unicode_name)
2847                         FREE(unicode_name, M_TEMP);
2848                 if (filename)
2849                         FREE(filename, M_TEMP);
2850                 return (error);
2851
2852         } else if (name[0] == HFS_ENABLE_JOURNALING) {
2853                 // make the file system journaled...
2854                 vnode_t vp = vfs_context_cwd(context);
2855                 vnode_t jvp;
2856                 ExtendedVCB *vcb;
2857                 struct cat_attr jnl_attr;
2858             struct cat_attr     jinfo_attr;
2859                 struct cat_fork jnl_fork;
2860                 struct cat_fork jinfo_fork;
2861                 buf_t jib_buf;
2862                 uint64_t jib_blkno;
2863                 uint32_t tmpblkno;
2864                 uint64_t journal_byte_offset;
2865                 uint64_t journal_size;
2866                 vnode_t jib_vp = NULLVP;
2867                 struct JournalInfoBlock local_jib;
2868                 int err = 0;
2869                 void *jnl = NULL;
2870                 int lockflags;
2871
2872                 /* Only root can enable journaling */
2873                 if (!kauth_cred_issuser(kauth_cred_get())) {
2874                         return (EPERM);
2875                 }
2876                 if (vp == NULLVP)
2877                         return EINVAL;
2878
2879                 hfsmp = VTOHFS(vp);
2880                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2881                         return EROFS;
2882                 }
2883                 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2884                         printf("hfs: can't make a plain hfs volume journaled.\n");
2885                         return EINVAL;
2886                 }
2887
2888                 if (hfsmp->jnl) {
2889                     printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2890                     return EAGAIN;
2891                 }
2892                 vcb = HFSTOVCB(hfsmp);
2893
2894                 /* Set up local copies of the initialization info */
2895                 tmpblkno = (uint32_t) name[1];
2896                 jib_blkno = (uint64_t) tmpblkno;
2897                 journal_byte_offset = (uint64_t) name[2];
2898                 journal_byte_offset *= hfsmp->blockSize;
2899                 journal_byte_offset += hfsmp->hfsPlusIOPosOffset;
2900                 journal_size = (uint64_t)((unsigned)name[3]);
2901
2902                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2903                 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2904                         BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2905
2906                         printf("hfs: volume has a btree w/non-contiguous nodes.  can not enable journaling.\n");
2907                         hfs_systemfile_unlock(hfsmp, lockflags);
2908                         return EINVAL;
2909                 }
2910                 hfs_systemfile_unlock(hfsmp, lockflags);
2911
2912                 // make sure these both exist!
2913                 if (   GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2914                         || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2915
2916                         return EINVAL;
2917                 }
2918
2919                 /*
2920                  * At this point, we have a copy of the metadata that lives in the catalog for the
2921                  * journal info block.  Compare that the journal info block's single extent matches
2922                  * that which was passed into this sysctl.
2923                  *
2924                  * If it is different, deny the journal enable call.
2925                  */
2926                 if (jinfo_fork.cf_blocks > 1) {
2927                         /* too many blocks */
2928                         return EINVAL;
2929                 }
2930
2931                 if (jinfo_fork.cf_extents[0].startBlock != jib_blkno) {
2932                         /* Wrong block */
2933                         return EINVAL;
2934                 }
2935
2936                 /*
2937                  * We want to immediately purge the vnode for the JIB.
2938                  *
2939                  * Because it was written to from userland, there's probably
2940                  * a vnode somewhere in the vnode cache (possibly with UBC backed blocks).
2941                  * So we bring the vnode into core, then immediately do whatever
2942                  * we can to flush/vclean it out.  This is because those blocks will be
2943                  * interpreted as user data, which may be treated separately on some platforms
2944                  * than metadata.  If the vnode is gone, then there cannot be backing blocks
2945                  * in the UBC.
2946                  */
2947                 if (hfs_vget (hfsmp, jinfo_attr.ca_fileid, &jib_vp, 1, 0)) {
2948                         return EINVAL;
2949                 }
2950                 /*
2951                  * Now we have a vnode for the JIB. recycle it. Because we hold an iocount
2952                  * on the vnode, we'll just mark it for termination when the last iocount
2953                  * (hopefully ours), is dropped.
2954                  */
2955                 vnode_recycle (jib_vp);
2956                 err = vnode_put (jib_vp);
2957                 if (err) {
2958                         return EINVAL;
2959                 }
2960
2961                 /* Initialize the local copy of the JIB (just like hfs.util) */
2962                 memset (&local_jib, 'Z', sizeof(struct JournalInfoBlock));
2963                 local_jib.flags = SWAP_BE32(kJIJournalInFSMask);
2964                 /* Note that the JIB's offset is in bytes */
2965                 local_jib.offset = SWAP_BE64(journal_byte_offset);
2966                 local_jib.size = SWAP_BE64(journal_size);
2967
2968                 /*
2969                  * Now write out the local JIB.  This essentially overwrites the userland
2970                  * copy of the JIB.  Read it as BLK_META to treat it as a metadata read/write.
2971                  */
2972                 jib_buf = buf_getblk (hfsmp->hfs_devvp,
2973                                 jib_blkno * (hfsmp->blockSize / hfsmp->hfs_logical_block_size),
2974                                 hfsmp->blockSize, 0, 0, BLK_META);
2975                 char* buf_ptr = (char*) buf_dataptr (jib_buf);
2976
2977                 /* Zero out the portion of the block that won't contain JIB data */
2978                 memset (buf_ptr, 0, hfsmp->blockSize);
2979
2980                 bcopy(&local_jib, buf_ptr, sizeof(local_jib));
2981                 if (buf_bwrite (jib_buf)) {
2982                         return EIO;
2983                 }
2984
2985                 /* Force a flush track cache */
2986                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
2987
2988
2989                 /* Now proceed with full volume sync */
2990                 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2991
2992                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2993                            (off_t)name[2], (off_t)name[3]);
2994
2995                 //
2996                 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2997                 //          enabling the journal on a separate device so it is safe
2998                 //          to just copy hfs_devvp here.  If hfs_util gets the ability
2999                 //          to dynamically enable the journal on a separate device then
3000                 //          we will have to do the same thing as hfs_early_journal_init()
3001                 //          to locate and open the journal device.
3002                 //
3003                 jvp = hfsmp->hfs_devvp;
3004                 jnl = journal_create(jvp, journal_byte_offset, journal_size,
3005                                                          hfsmp->hfs_devvp,
3006                                                          hfsmp->hfs_logical_block_size,
3007                                                          0,
3008                                                          0,
3009                                                          hfs_sync_metadata, hfsmp->hfs_mp,
3010                                                          hfsmp->hfs_mp);
3011
3012                 /*
3013                  * Set up the trim callback function so that we can add
3014                  * recently freed extents to the free extent cache once
3015                  * the transaction that freed them is written to the
3016                  * journal on disk.
3017                  */
3018                 if (jnl)
3019                         journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
3020
3021                 if (jnl == NULL) {
3022                         printf("hfs: FAILED to create the journal!\n");
3023                         if (jvp && jvp != hfsmp->hfs_devvp) {
3024                                 vnode_clearmountedon(jvp);
3025                                 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3026                         }
3027                         jvp = NULL;
3028
3029                         return EINVAL;
3030                 }
3031
3032                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3033
3034                 /*
3035                  * Flush all dirty metadata buffers.
3036                  */
3037                 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
3038                 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
3039                 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
3040                 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
3041                 if (hfsmp->hfs_attribute_vp)
3042                         buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
3043
3044                 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
3045                 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
3046                 hfsmp->jvp = jvp;
3047                 hfsmp->jnl = jnl;
3048
3049                 // save this off for the hack-y check in hfs_remove()
3050                 hfsmp->jnl_start        = (u_int32_t)name[2];
3051                 hfsmp->jnl_size         = (off_t)((unsigned)name[3]);
3052                 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
3053                 hfsmp->hfs_jnlfileid    = jnl_attr.ca_fileid;
3054
3055                 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3056
3057                 hfs_unlock_global (hfsmp);
3058                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3059
3060                 {
3061                         fsid_t fsid;
3062
3063                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3064                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3065                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3066                 }
3067                 return 0;
3068         } else if (name[0] == HFS_DISABLE_JOURNALING) {
3069                 // clear the journaling bit
3070                 vnode_t vp = vfs_context_cwd(context);
3071
3072                 /* Only root can disable journaling */
3073                 if (!kauth_cred_issuser(kauth_cred_get())) {
3074                         return (EPERM);
3075                 }
3076                 if (vp == NULLVP)
3077                         return EINVAL;
3078
3079                 hfsmp = VTOHFS(vp);
3080
3081                 /*
3082                  * Disabling journaling is disallowed on volumes with directory hard links
3083                  * because we have not tested the relevant code path.
3084                  */
3085                 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
3086                         printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
3087                         return EPERM;
3088                 }
3089
3090                 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
3091
3092                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3093
3094                 // Lights out for you buddy!
3095                 journal_close(hfsmp->jnl);
3096                 hfsmp->jnl = NULL;
3097
3098                 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
3099                         vnode_clearmountedon(hfsmp->jvp);
3100                         VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3101                         vnode_put(hfsmp->jvp);
3102                 }
3103                 hfsmp->jvp = NULL;
3104                 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3105                 hfsmp->jnl_start        = 0;
3106                 hfsmp->hfs_jnlinfoblkid = 0;
3107                 hfsmp->hfs_jnlfileid    = 0;
3108
3109                 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
3110
3111                 hfs_unlock_global (hfsmp);
3112
3113                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3114
3115                 {
3116                         fsid_t fsid;
3117
3118                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3119                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3120                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3121                 }
3122                 return 0;
3123         } else if (name[0] == HFS_GET_JOURNAL_INFO) {
3124                 vnode_t vp = vfs_context_cwd(context);
3125                 off_t jnl_start, jnl_size;
3126
3127                 if (vp == NULLVP)
3128                         return EINVAL;
3129
3130                 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3131                 if (proc_is64bit(current_proc()))
3132                         return EINVAL;
3133
3134                 hfsmp = VTOHFS(vp);
3135             if (hfsmp->jnl == NULL) {
3136                         jnl_start = 0;
3137                         jnl_size  = 0;
3138             } else {
3139                         jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3140                         jnl_size  = (off_t)hfsmp->jnl_size;
3141             }
3142
3143             if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3144                         return error;
3145                 }
3146             if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3147                         return error;
3148                 }
3149
3150                 return 0;
3151         } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3152
3153             return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3154
3155         } else if (name[0] == VFS_CTL_QUERY) {
3156         struct sysctl_req *req;
3157         union union_vfsidctl vc;
3158         struct mount *mp;
3159             struct vfsquery vq;
3160
3161                 req = CAST_DOWN(struct sysctl_req *, oldp);     /* we're new style vfs sysctl. */
3162
3163         error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3164                 if (error) return (error);
3165
3166                 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3167         if (mp == NULL) return (ENOENT);
3168
3169                 hfsmp = VFSTOHFS(mp);
3170                 bzero(&vq, sizeof(vq));
3171                 vq.vq_flags = hfsmp->hfs_notification_conditions;
3172                 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3173         } else if (name[0] == HFS_REPLAY_JOURNAL) {
3174                 vnode_t devvp = NULL;
3175                 int device_fd;
3176                 if (namelen != 2) {
3177                         return (EINVAL);
3178                 }
3179                 device_fd = name[1];
3180                 error = file_vnode(device_fd, &devvp);
3181                 if (error) {
3182                         return error;
3183                 }
3184                 error = vnode_getwithref(devvp);
3185                 if (error) {
3186                         file_drop(device_fd);
3187                         return error;
3188                 }
3189                 error = hfs_journal_replay(devvp, context);
3190                 file_drop(device_fd);
3191                 vnode_put(devvp);
3192                 return error;
3193         } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3194                 hfs_resize_debug = 1;
3195                 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3196                 return 0;
3197         }
3198
3199         return (ENOTSUP);
3200 }
3201
3202 /*
3203  * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3204  * the build_path ioctl.  We use it to leverage the code below that updates
3205  * the origin list cache if necessary
3206  */
3207
3208 int
3209 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3210 {
3211         int error;
3212         int lockflags;
3213         struct hfsmount *hfsmp;
3214
3215         hfsmp = VFSTOHFS(mp);
3216
3217         error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3218         if (error)
3219                 return (error);
3220
3221         /*
3222          * ADLs may need to have their origin state updated
3223          * since build_path needs a valid parent.  The same is true
3224          * for hardlinked files as well.  There isn't a race window here
3225          * in re-acquiring the cnode lock since we aren't pulling any data
3226          * out of the cnode; instead, we're going to the catalog.
3227          */
3228         if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3229             (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0)) {
3230                 cnode_t *cp = VTOC(*vpp);
3231                 struct cat_desc cdesc;
3232
3233                 if (!hfs_haslinkorigin(cp)) {
3234                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3235                         error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3236                         hfs_systemfile_unlock(hfsmp, lockflags);
3237                         if (error == 0) {
3238                                 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3239                                         (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3240                                         hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3241                                 }
3242                                 cat_releasedesc(&cdesc);
3243                         }
3244                 }
3245                 hfs_unlock(cp);
3246         }
3247         return (0);
3248 }
3249
3250
3251 /*
3252  * Look up an HFS object by ID.
3253  *
3254  * The object is returned with an iocount reference and the cnode locked.
3255  *
3256  * If the object is a file then it will represent the data fork.
3257  */
3258 int
3259 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3260 {
3261         struct vnode *vp = NULLVP;
3262         struct cat_desc cndesc;
3263         struct cat_attr cnattr;
3264         struct cat_fork cnfork;
3265         u_int32_t linkref = 0;
3266         int error;
3267
3268         /* Check for cnids that should't be exported. */
3269         if ((cnid < kHFSFirstUserCatalogNodeID) &&
3270             (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3271                 return (ENOENT);
3272         }
3273         /* Don't export our private directories. */
3274         if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3275             cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3276                 return (ENOENT);
3277         }
3278         /*
3279          * Check the hash first
3280          */
3281         vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3282         if (vp) {
3283                 *vpp = vp;
3284                 return(0);
3285         }
3286
3287         bzero(&cndesc, sizeof(cndesc));
3288         bzero(&cnattr, sizeof(cnattr));
3289         bzero(&cnfork, sizeof(cnfork));
3290
3291         /*
3292          * Not in hash, lookup in catalog
3293          */
3294         if (cnid == kHFSRootParentID) {
3295                 static char hfs_rootname[] = "/";
3296
3297                 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3298                 cndesc.cd_namelen = 1;
3299                 cndesc.cd_parentcnid = kHFSRootParentID;
3300                 cndesc.cd_cnid = kHFSRootFolderID;
3301                 cndesc.cd_flags = CD_ISDIR;
3302
3303                 cnattr.ca_fileid = kHFSRootFolderID;
3304                 cnattr.ca_linkcount = 1;
3305                 cnattr.ca_entries = 1;
3306                 cnattr.ca_dircount = 1;
3307                 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3308         } else {
3309                 int lockflags;
3310                 cnid_t pid;
3311                 const char *nameptr;
3312
3313                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3314                 error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork);
3315                 hfs_systemfile_unlock(hfsmp, lockflags);
3316
3317                 if (error) {
3318                         *vpp = NULL;
3319                         return (error);
3320                 }
3321
3322                 /*
3323                  * Check for a raw hardlink inode and save its linkref.
3324                  */
3325                 pid = cndesc.cd_parentcnid;
3326                 nameptr = (const char *)cndesc.cd_nameptr;
3327
3328                 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3329                     (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3330                         linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3331
3332                 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3333                            (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3334                         linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3335
3336                 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3337                            (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3338                         *vpp = NULL;
3339                         cat_releasedesc(&cndesc);
3340                         return (ENOENT);  /* open unlinked file */
3341                 }
3342         }
3343
3344         /*
3345          * Finish initializing cnode descriptor for hardlinks.
3346          *
3347          * We need a valid name and parent for reverse lookups.
3348          */
3349         if (linkref) {
3350                 cnid_t lastid;
3351                 struct cat_desc linkdesc;
3352                 int linkerr = 0;
3353
3354                 cnattr.ca_linkref = linkref;
3355                 bzero (&linkdesc, sizeof (linkdesc));
3356
3357                 /*
3358                  * If the caller supplied the raw inode value, then we don't know exactly
3359                  * which hardlink they wanted. It's likely that they acquired the raw inode
3360                  * value BEFORE the item became a hardlink, in which case, they probably
3361                  * want the oldest link.  So request the oldest link from the catalog.
3362                  *
3363                  * Unfortunately, this requires that we iterate through all N hardlinks. On the plus
3364                  * side, since we know that we want the last linkID, we can also have this one
3365                  * call give us back the name of the last ID, since it's going to have it in-hand...
3366                  */
3367                 linkerr = hfs_lookup_lastlink (hfsmp, linkref, &lastid, &linkdesc);
3368                 if ((linkerr == 0) && (lastid != 0)) {
3369                         /*
3370                          * Release any lingering buffers attached to our local descriptor.
3371                          * Then copy the name and other business into the cndesc
3372                          */
3373                         cat_releasedesc (&cndesc);
3374                         bcopy (&linkdesc, &cndesc, sizeof(linkdesc));
3375                 }
3376                 /* If it failed, the linkref code will just use whatever it had in-hand below. */
3377         }
3378
3379         if (linkref) {
3380                 int newvnode_flags = 0;
3381
3382                 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3383                                                                 &cnfork, &vp, &newvnode_flags);
3384                 if (error == 0) {
3385                         VTOC(vp)->c_flag |= C_HARDLINK;
3386                         vnode_setmultipath(vp);
3387                 }
3388         } else {
3389                 struct componentname cn;
3390                 int newvnode_flags = 0;
3391
3392                 /* Supply hfs_getnewvnode with a component name. */
3393                 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3394                 cn.cn_nameiop = LOOKUP;
3395                 cn.cn_flags = ISLASTCN | HASBUF;
3396                 cn.cn_context = NULL;
3397                 cn.cn_pnlen = MAXPATHLEN;
3398                 cn.cn_nameptr = cn.cn_pnbuf;
3399                 cn.cn_namelen = cndesc.cd_namelen;
3400                 cn.cn_hash = 0;
3401                 cn.cn_consume = 0;
3402                 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3403
3404                 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3405                                                                 &cnfork, &vp, &newvnode_flags);
3406
3407                 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3408                         hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3409                 }
3410                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3411         }
3412         cat_releasedesc(&cndesc);
3413
3414         *vpp = vp;
3415         if (vp && skiplock) {
3416                 hfs_unlock(VTOC(vp));
3417         }
3418         return (error);
3419 }
3420
3421
3422 /*
3423  * Flush out all the files in a filesystem.
3424  */
3425 static int
3426 #if QUOTA
3427 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3428 #else
3429 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3430 #endif /* QUOTA */
3431 {
3432         struct hfsmount *hfsmp;
3433         struct vnode *skipvp = NULLVP;
3434         int error;
3435         int accounted_root_usecounts;
3436 #if QUOTA
3437         int i;
3438 #endif
3439
3440         hfsmp = VFSTOHFS(mp);
3441
3442         accounted_root_usecounts = 0;
3443 #if QUOTA
3444         /*
3445          * The open quota files have an indirect reference on
3446          * the root directory vnode.  We must account for this
3447          * extra reference when doing the intial vflush.
3448          */
3449         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3450                 /* Find out how many quota files we have open. */
3451                 for (i = 0; i < MAXQUOTAS; i++) {
3452                         if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3453                                 ++accounted_root_usecounts;
3454                 }
3455         }
3456 #endif /* QUOTA */
3457         if (hfsmp->hfs_flags & HFS_CS) {
3458                 ++accounted_root_usecounts;
3459         }
3460
3461         if (accounted_root_usecounts > 0) {
3462                 /* Obtain the root vnode so we can skip over it. */
3463                 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3464         }
3465
3466         error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3467         if (error != 0)
3468                 return(error);
3469
3470         error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3471
3472         if (skipvp) {
3473                 /*
3474                  * See if there are additional references on the
3475                  * root vp besides the ones obtained from the open
3476                  * quota files and CoreStorage.
3477                  */
3478                 if ((error == 0) &&
3479                     (vnode_isinuse(skipvp,  accounted_root_usecounts))) {
3480                         error = EBUSY;  /* root directory is still open */
3481                 }
3482                 hfs_unlock(VTOC(skipvp));
3483                 /* release the iocount from the hfs_chash_getvnode call above. */
3484                 vnode_put(skipvp);
3485         }
3486         if (error && (flags & FORCECLOSE) == 0)
3487                 return (error);
3488
3489 #if QUOTA
3490         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3491                 for (i = 0; i < MAXQUOTAS; i++) {
3492                         if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3493                                 continue;
3494                         hfs_quotaoff(p, mp, i);
3495                 }
3496         }
3497 #endif /* QUOTA */
3498         if (hfsmp->hfs_flags & HFS_CS) {
3499                 error = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSSETFSVNODE,
3500                     (caddr_t)NULL, 0, vfs_context_kernel());
3501                 vnode_rele(skipvp);
3502                 printf("hfs_flushfiles: VNOP_IOCTL(_DKIOCCSSETFSVNODE) failed with error code %d\n",
3503                     error);
3504
3505                 /* ignore the CS error and proceed with the unmount. */
3506                 error = 0;
3507         }
3508         if (skipvp) {
3509                 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3510         }
3511
3512         return (error);
3513 }
3514
3515 /*
3516  * Update volume encoding bitmap (HFS Plus only)
3517  *
3518  * Mark a legacy text encoding as in-use (as needed)
3519  * in the volume header of this HFS+ filesystem.
3520  */
3521 __private_extern__
3522 void
3523 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3524 {
3525 #define  kIndexMacUkrainian     48  /* MacUkrainian encoding is 152 */
3526 #define  kIndexMacFarsi         49  /* MacFarsi encoding is 140 */
3527
3528         u_int32_t       index;
3529
3530         switch (encoding) {
3531         case kTextEncodingMacUkrainian:
3532                 index = kIndexMacUkrainian;
3533                 break;
3534         case kTextEncodingMacFarsi:
3535                 index = kIndexMacFarsi;
3536                 break;
3537         default:
3538                 index = encoding;
3539                 break;
3540         }
3541
3542         /* Only mark the encoding as in-use if it wasn't already set */
3543         if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3544                 hfs_lock_mount (hfsmp);
3545                 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3546                 MarkVCBDirty(hfsmp);
3547                 hfs_unlock_mount(hfsmp);
3548         }
3549 }
3550
3551 /*
3552  * Update volume stats
3553  *
3554  * On journal volumes this will cause a volume header flush
3555  */
3556 int
3557 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3558 {
3559         struct timeval tv;
3560
3561         microtime(&tv);
3562
3563         hfs_lock_mount (hfsmp);
3564
3565         MarkVCBDirty(hfsmp);
3566         hfsmp->hfs_mtime = tv.tv_sec;
3567
3568         switch (op) {
3569         case VOL_UPDATE:
3570                 break;
3571         case VOL_MKDIR:
3572                 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3573                         ++hfsmp->hfs_dircount;
3574                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3575                         ++hfsmp->vcbNmRtDirs;
3576                 break;
3577         case VOL_RMDIR:
3578                 if (hfsmp->hfs_dircount != 0)
3579                         --hfsmp->hfs_dircount;
3580                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3581                         --hfsmp->vcbNmRtDirs;
3582                 break;
3583         case VOL_MKFILE:
3584                 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3585                         ++hfsmp->hfs_filecount;
3586                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3587                         ++hfsmp->vcbNmFls;
3588                 break;
3589         case VOL_RMFILE:
3590                 if (hfsmp->hfs_filecount != 0)
3591                         --hfsmp->hfs_filecount;
3592                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3593                         --hfsmp->vcbNmFls;
3594                 break;
3595         }
3596
3597         hfs_unlock_mount (hfsmp);
3598
3599         if (hfsmp->jnl) {
3600                 hfs_flushvolumeheader(hfsmp, 0, 0);
3601         }
3602
3603         return (0);
3604 }
3605
3606
3607 #if CONFIG_HFS_STD
3608 static int
3609 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3610 {
3611         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3612         struct filefork *fp;
3613         HFSMasterDirectoryBlock *mdb;
3614         struct buf *bp = NULL;
3615         int retval;
3616         int sector_size;
3617         ByteCount namelen;
3618
3619         sector_size = hfsmp->hfs_logical_block_size;
3620         retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
3621         if (retval) {
3622                 if (bp)
3623                         buf_brelse(bp);
3624                 return retval;
3625         }
3626
3627         hfs_lock_mount (hfsmp);
3628
3629         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
3630
3631         mdb->drCrDate   = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3632         mdb->drLsMod    = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3633         mdb->drAtrb     = SWAP_BE16 (vcb->vcbAtrb);
3634         mdb->drNmFls    = SWAP_BE16 (vcb->vcbNmFls);
3635         mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3636         mdb->drClpSiz   = SWAP_BE32 (vcb->vcbClpSiz);
3637         mdb->drNxtCNID  = SWAP_BE32 (vcb->vcbNxtCNID);
3638         mdb->drFreeBks  = SWAP_BE16 (vcb->freeBlocks);
3639
3640         namelen = strlen((char *)vcb->vcbVN);
3641         retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3642         /* Retry with MacRoman in case that's how it was exported. */
3643         if (retval)
3644                 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3645
3646         mdb->drVolBkUp  = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3647         mdb->drWrCnt    = SWAP_BE32 (vcb->vcbWrCnt);
3648         mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3649         mdb->drFilCnt   = SWAP_BE32 (vcb->vcbFilCnt);
3650         mdb->drDirCnt   = SWAP_BE32 (vcb->vcbDirCnt);
3651
3652         bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3653
3654         fp = VTOF(vcb->extentsRefNum);
3655         mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3656         mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3657         mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3658         mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3659         mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3660         mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3661         mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3662         mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3663         FTOC(fp)->c_flag &= ~C_MODIFIED;
3664
3665         fp = VTOF(vcb->catalogRefNum);
3666         mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3667         mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3668         mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3669         mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3670         mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3671         mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3672         mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3673         mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3674         FTOC(fp)->c_flag &= ~C_MODIFIED;
3675
3676         MarkVCBClean( vcb );
3677
3678         hfs_unlock_mount (hfsmp);
3679
3680         /* If requested, flush out the alternate MDB */
3681         if (altflush) {
3682                 struct buf *alt_bp = NULL;
3683
3684                 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) {
3685                         bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
3686
3687                         (void) VNOP_BWRITE(alt_bp);
3688                 } else if (alt_bp)
3689                         buf_brelse(alt_bp);
3690         }
3691
3692         if (waitfor != MNT_WAIT)
3693                 buf_bawrite(bp);
3694         else
3695                 retval = VNOP_BWRITE(bp);
3696
3697         return (retval);
3698 }
3699 #endif
3700
3701 /*
3702  *  Flush any dirty in-memory mount data to the on-disk
3703  *  volume header.
3704  *
3705  *  Note: the on-disk volume signature is intentionally
3706  *  not flushed since the on-disk "H+" and "HX" signatures
3707  *  are always stored in-memory as "H+".
3708  */
3709 int
3710 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3711 {
3712         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3713         struct filefork *fp;
3714         HFSPlusVolumeHeader *volumeHeader, *altVH;
3715         int retval;
3716         struct buf *bp, *alt_bp;
3717         int i;
3718         daddr64_t priIDSector;
3719         int critical;
3720         u_int16_t  signature;
3721         u_int16_t  hfsversion;
3722
3723         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3724                 return(0);
3725         }
3726 #if CONFIG_HFS_STD
3727         if (hfsmp->hfs_flags & HFS_STANDARD) {
3728                 return hfs_flushMDB(hfsmp, waitfor, altflush);
3729         }
3730 #endif
3731         critical = altflush;
3732         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3733                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3734
3735         if (hfs_start_transaction(hfsmp) != 0) {
3736             return EINVAL;
3737         }
3738
3739         bp = NULL;
3740         alt_bp = NULL;
3741
3742         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3743                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3744                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
3745         if (retval) {
3746                 printf("hfs: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN);
3747                 goto err_exit;
3748         }
3749
3750         volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3751                         HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3752
3753         /*
3754          * Sanity check what we just read.  If it's bad, try the alternate
3755          * instead.
3756          */
3757         signature = SWAP_BE16 (volumeHeader->signature);
3758         hfsversion   = SWAP_BE16 (volumeHeader->version);
3759         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3760             (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3761             (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3762                 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3763                       vcb->vcbVN, signature, hfsversion,
3764                       SWAP_BE32 (volumeHeader->blockSize),
3765                       hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3766                 hfs_mark_volume_inconsistent(hfsmp);
3767
3768                 if (hfsmp->hfs_alt_id_sector) {
3769                         retval = buf_meta_bread(hfsmp->hfs_devvp,
3770                             HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3771                             hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3772                         if (retval) {
3773                                 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3774                                 goto err_exit;
3775                         }
3776
3777                         altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3778                                 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3779                         signature = SWAP_BE16(altVH->signature);
3780                         hfsversion = SWAP_BE16(altVH->version);
3781
3782                         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3783                             (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3784                             (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3785                                 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3786                                     vcb->vcbVN, signature, hfsversion,
3787                                     SWAP_BE32(altVH->blockSize));
3788                                 retval = EIO;
3789                                 goto err_exit;
3790                         }
3791
3792                         /* The alternate is plausible, so use it. */
3793                         bcopy(altVH, volumeHeader, kMDBSize);
3794                         buf_brelse(alt_bp);
3795                         alt_bp = NULL;
3796                 } else {
3797                         /* No alternate VH, nothing more we can do. */
3798                         retval = EIO;
3799                         goto err_exit;
3800                 }
3801         }
3802
3803         if (hfsmp->jnl) {
3804                 journal_modify_block_start(hfsmp->jnl, bp);
3805         }
3806
3807         /*
3808          * For embedded HFS+ volumes, update create date if it changed
3809          * (ie from a setattrlist call)
3810          */
3811         if ((vcb->hfsPlusIOPosOffset != 0) &&
3812             (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3813                 struct buf *bp2;
3814                 HFSMasterDirectoryBlock *mdb;
3815
3816                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3817                                 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3818                                 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3819                 if (retval) {
3820                         if (bp2)
3821                                 buf_brelse(bp2);
3822                         retval = 0;
3823                 } else {
3824                         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3825                                 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3826
3827                         if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3828                           {
3829                                 if (hfsmp->jnl) {
3830                                     journal_modify_block_start(hfsmp->jnl, bp2);
3831                                 }
3832
3833                                 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);       /* pick up the new create date */
3834
3835                                 if (hfsmp->jnl) {
3836                                         journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3837                                 } else {
3838                                         (void) VNOP_BWRITE(bp2);                /* write out the changes */
3839                                 }
3840                           }
3841                         else
3842                           {
3843                                 buf_brelse(bp2);                                                /* just release it */
3844                           }
3845                   }
3846         }
3847
3848         hfs_lock_mount (hfsmp);
3849
3850         /* Note: only update the lower 16 bits worth of attributes */
3851         volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
3852         volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3853         if (hfsmp->jnl) {
3854                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3855         } else {
3856                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3857         }
3858         volumeHeader->createDate        = SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
3859         volumeHeader->modifyDate        = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3860         volumeHeader->backupDate        = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3861         volumeHeader->fileCount         = SWAP_BE32 (vcb->vcbFilCnt);
3862         volumeHeader->folderCount       = SWAP_BE32 (vcb->vcbDirCnt);
3863         volumeHeader->totalBlocks       = SWAP_BE32 (vcb->totalBlocks);
3864         volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks);
3865         volumeHeader->nextAllocation    = SWAP_BE32 (vcb->nextAllocation);
3866         volumeHeader->rsrcClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3867         volumeHeader->dataClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3868         volumeHeader->nextCatalogID     = SWAP_BE32 (vcb->vcbNxtCNID);
3869         volumeHeader->writeCount        = SWAP_BE32 (vcb->vcbWrCnt);
3870         volumeHeader->encodingsBitmap   = SWAP_BE64 (vcb->encodingsBitmap);
3871
3872         if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3873                 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3874                 critical = 1;
3875         }
3876
3877         /*
3878          * System files are only dirty when altflush is set.
3879          */
3880         if (altflush == 0) {
3881                 goto done;
3882         }
3883
3884         /* Sync Extents over-flow file meta data */
3885         fp = VTOF(vcb->extentsRefNum);
3886         if (FTOC(fp)->c_flag & C_MODIFIED) {
3887                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3888                         volumeHeader->extentsFile.extents[i].startBlock =
3889                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3890                         volumeHeader->extentsFile.extents[i].blockCount =
3891                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3892                 }
3893                 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3894                 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3895                 volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3896                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3897         }
3898
3899         /* Sync Catalog file meta data */
3900         fp = VTOF(vcb->catalogRefNum);
3901         if (FTOC(fp)->c_flag & C_MODIFIED) {
3902                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3903                         volumeHeader->catalogFile.extents[i].startBlock =
3904                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3905                         volumeHeader->catalogFile.extents[i].blockCount =
3906                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3907                 }
3908                 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3909                 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3910                 volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3911                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3912         }
3913
3914         /* Sync Allocation file meta data */
3915         fp = VTOF(vcb->allocationsRefNum);
3916         if (FTOC(fp)->c_flag & C_MODIFIED) {
3917                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3918                         volumeHeader->allocationFile.extents[i].startBlock =
3919                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3920                         volumeHeader->allocationFile.extents[i].blockCount =
3921                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3922                 }
3923                 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3924                 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3925                 volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3926                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3927         }
3928
3929         /* Sync Attribute file meta data */
3930         if (hfsmp->hfs_attribute_vp) {
3931                 fp = VTOF(hfsmp->hfs_attribute_vp);
3932                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3933                         volumeHeader->attributesFile.extents[i].startBlock =
3934                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3935                         volumeHeader->attributesFile.extents[i].blockCount =
3936                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3937                 }
3938                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3939                 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3940                 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3941                 volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3942         }
3943
3944         /* Sync Startup file meta data */
3945         if (hfsmp->hfs_startup_vp) {
3946                 fp = VTOF(hfsmp->hfs_startup_vp);
3947                 if (FTOC(fp)->c_flag & C_MODIFIED) {
3948                         for (i = 0; i < kHFSPlusExtentDensity; i++) {
3949                                 volumeHeader->startupFile.extents[i].startBlock =
3950                                         SWAP_BE32 (fp->ff_extents[i].startBlock);
3951                                 volumeHeader->startupFile.extents[i].blockCount =
3952                                         SWAP_BE32 (fp->ff_extents[i].blockCount);
3953                         }
3954                         volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3955                         volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3956                         volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3957                         FTOC(fp)->c_flag &= ~C_MODIFIED;
3958                 }
3959         }
3960
3961 done:
3962         MarkVCBClean(hfsmp);
3963         hfs_unlock_mount (hfsmp);
3964
3965         /* If requested, flush out the alternate volume header */
3966         if (altflush && hfsmp->hfs_alt_id_sector) {
3967                 if (buf_meta_bread(hfsmp->hfs_devvp,
3968                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3969                                 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3970                         if (hfsmp->jnl) {
3971                                 journal_modify_block_start(hfsmp->jnl, alt_bp);
3972                         }
3973
3974                         bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3975                                         HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3976                                         kMDBSize);
3977
3978                         if (hfsmp->jnl) {
3979                                 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3980                         } else {
3981                                 (void) VNOP_BWRITE(alt_bp);
3982                         }
3983                 } else if (alt_bp)
3984                         buf_brelse(alt_bp);
3985         }
3986
3987         if (hfsmp->jnl) {
3988                 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3989         } else {
3990                 if (waitfor != MNT_WAIT)
3991                         buf_bawrite(bp);
3992                 else {
3993                     retval = VNOP_BWRITE(bp);
3994                     /* When critical data changes, flush the device cache */
3995                     if (critical && (retval == 0)) {
3996                         (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3997                                          NULL, FWRITE, NULL);
3998                     }
3999                 }
4000         }
4001         hfs_end_transaction(hfsmp);
4002
4003         return (retval);
4004
4005 err_exit:
4006         if (alt_bp)
4007                 buf_brelse(alt_bp);
4008         if (bp)
4009                 buf_brelse(bp);
4010         hfs_end_transaction(hfsmp);
4011         return retval;
4012 }
4013
4014
4015 /*
4016  * Extend a file system.
4017  */
4018 int
4019 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4020 {
4021         struct proc *p = vfs_context_proc(context);
4022         kauth_cred_t cred = vfs_context_ucred(context);
4023         struct  vnode *vp;
4024         struct  vnode *devvp;
4025         struct  buf *bp;
4026         struct  filefork *fp = NULL;
4027         ExtendedVCB  *vcb;
4028         struct  cat_fork forkdata;
4029         u_int64_t  oldsize;
4030         u_int64_t  newblkcnt;
4031         u_int64_t  prev_phys_block_count;
4032         u_int32_t  addblks;
4033         u_int64_t  sector_count;
4034         u_int32_t  sector_size;
4035         u_int32_t  phys_sector_size;
4036         u_int32_t  overage_blocks;
4037         daddr64_t  prev_alt_sector;
4038         daddr_t    bitmapblks;
4039         int  lockflags = 0;
4040         int  error;
4041         int64_t oldBitmapSize;
4042         Boolean  usedExtendFileC = false;
4043         int transaction_begun = 0;
4044
4045         devvp = hfsmp->hfs_devvp;
4046         vcb = HFSTOVCB(hfsmp);
4047
4048         /*
4049          * - HFS Plus file systems only.
4050          * - Journaling must be enabled.
4051          * - No embedded volumes.
4052          */
4053         if ((vcb->vcbSigWord == kHFSSigWord) ||
4054              (hfsmp->jnl == NULL) ||
4055              (vcb->hfsPlusIOPosOffset != 0)) {
4056                 return (EPERM);
4057         }
4058         /*
4059          * If extending file system by non-root, then verify
4060          * ownership and check permissions.
4061          */
4062         if (suser(cred, NULL)) {
4063                 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
4064
4065                 if (error)
4066                         return (error);
4067                 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
4068                 if (error == 0) {
4069                         error = hfs_write_access(vp, cred, p, false);
4070                 }
4071                 hfs_unlock(VTOC(vp));
4072                 vnode_put(vp);
4073                 if (error)
4074                         return (error);
4075
4076                 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
4077                 if (error)
4078                         return (error);
4079         }
4080         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
4081                 return (ENXIO);
4082         }
4083         if (sector_size != hfsmp->hfs_logical_block_size) {
4084                 return (ENXIO);
4085         }
4086         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
4087                 return (ENXIO);
4088         }
4089         if ((sector_size * sector_count) < newsize) {
4090                 printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN);
4091                 return (ENOSPC);
4092         }
4093         error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
4094         if (error) {
4095                 if ((error != ENOTSUP) && (error != ENOTTY)) {
4096                         return (ENXIO);
4097                 }
4098                 /* If ioctl is not supported, force physical and logical sector size to be same */
4099                 phys_sector_size = sector_size;
4100         }
4101         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4102
4103         /*
4104          * Validate new size.
4105          */
4106         if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
4107                 printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4108                 return (EINVAL);
4109         }
4110         newblkcnt = newsize / vcb->blockSize;
4111         if (newblkcnt > (u_int64_t)0xFFFFFFFF) {
4112                 printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize);
4113                 return (EOVERFLOW);
4114         }
4115
4116         addblks = newblkcnt - vcb->totalBlocks;
4117
4118         if (hfs_resize_debug) {
4119                 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
4120                 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
4121         }
4122         printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
4123
4124         hfs_lock_mount (hfsmp);
4125         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4126                 hfs_unlock_mount(hfsmp);
4127                 error = EALREADY;
4128                 goto out;
4129         }
4130         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4131         hfs_unlock_mount (hfsmp);
4132
4133         /* Start with a clean journal. */
4134         hfs_journal_flush(hfsmp, TRUE);
4135
4136         /*
4137          * Enclose changes inside a transaction.
4138          */
4139         if (hfs_start_transaction(hfsmp) != 0) {
4140                 error = EINVAL;
4141                 goto out;
4142         }
4143         transaction_begun = 1;
4144
4145
4146         /* Update the hfsmp fields for the physical information about the device */
4147         prev_phys_block_count = hfsmp->hfs_logical_block_count;
4148         prev_alt_sector = hfsmp->hfs_alt_id_sector;
4149
4150         hfsmp->hfs_logical_block_count = sector_count;
4151         /*
4152          * Note that the new AltVH location must be based on the device's EOF rather than the new
4153          * filesystem's EOF, so we use logical_block_count here rather than newsize.
4154          */
4155         hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
4156                                   HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
4157         hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
4158
4159
4160         /*
4161          * Note: we take the attributes lock in case we have an attribute data vnode
4162          * which needs to change size.
4163          */
4164         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4165         vp = vcb->allocationsRefNum;
4166         fp = VTOF(vp);
4167         bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
4168
4169         /*
4170          * Calculate additional space required (if any) by allocation bitmap.
4171          */
4172         oldBitmapSize = fp->ff_size;
4173         bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
4174         if (bitmapblks > (daddr_t)fp->ff_blocks)
4175                 bitmapblks -= fp->ff_blocks;
4176         else
4177                 bitmapblks = 0;
4178
4179         /*
4180          * The allocation bitmap can contain unused bits that are beyond end of
4181          * current volume's allocation blocks.  Usually they are supposed to be
4182          * zero'ed out but there can be cases where they might be marked as used.
4183          * After extending the file system, those bits can represent valid
4184          * allocation blocks, so we mark all the bits from the end of current
4185          * volume to end of allocation bitmap as "free".
4186          *
4187          * Figure out the number of overage blocks before proceeding though,
4188          * so we don't add more bytes to our I/O than necessary.
4189          * First figure out the total number of blocks representable by the
4190          * end of the bitmap file vs. the total number of blocks in the new FS.
4191          * Then subtract away the number of blocks in the current FS.  This is how much
4192          * we can mark as free right now without having to grow the bitmap file.
4193          */
4194         overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
4195         overage_blocks = MIN (overage_blocks, newblkcnt);
4196         overage_blocks -= vcb->totalBlocks;
4197
4198         BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
4199
4200         if (bitmapblks > 0) {
4201                 daddr64_t blkno;
4202                 daddr_t blkcnt;
4203                 off_t bytesAdded;
4204
4205                 /*
4206                  * Get the bitmap's current size (in allocation blocks) so we know
4207                  * where to start zero filling once the new space is added.  We've
4208                  * got to do this before the bitmap is grown.
4209                  */
4210                 blkno  = (daddr64_t)fp->ff_blocks;
4211
4212                 /*
4213                  * Try to grow the allocation file in the normal way, using allocation
4214                  * blocks already existing in the file system.  This way, we might be
4215                  * able to grow the bitmap contiguously, or at least in the metadata
4216                  * zone.
4217                  */
4218                 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4219                                 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4220                                 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4221
4222                 if (error == 0) {
4223                         usedExtendFileC = true;
4224                 } else {
4225                         /*
4226                          * If the above allocation failed, fall back to allocating the new
4227                          * extent of the bitmap from the space we're going to add.  Since those
4228                          * blocks don't yet belong to the file system, we have to update the
4229                          * extent list directly, and manually adjust the file size.
4230                          */
4231                         bytesAdded = 0;
4232                         error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4233                         if (error) {
4234                                 printf("hfs_extendfs: error %d adding extents\n", error);
4235                                 goto out;
4236                         }
4237                         fp->ff_blocks += bitmapblks;
4238                         VTOC(vp)->c_blocks = fp->ff_blocks;
4239                         VTOC(vp)->c_flag |= C_MODIFIED;
4240                 }
4241
4242                 /*
4243                  * Update the allocation file's size to include the newly allocated
4244                  * blocks.  Note that ExtendFileC doesn't do this, which is why this
4245                  * statement is outside the above "if" statement.
4246                  */
4247                 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4248
4249                 /*
4250                  * Zero out the new bitmap blocks.
4251                  */
4252                 {
4253
4254                         bp = NULL;
4255                         blkcnt = bitmapblks;
4256                         while (blkcnt > 0) {
4257                                 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4258                                 if (error) {
4259                                         if (bp) {
4260                                                 buf_brelse(bp);
4261                                         }
4262                                         break;
4263                                 }
4264                                 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4265                                 buf_markaged(bp);
4266                                 error = (int)buf_bwrite(bp);
4267                                 if (error)
4268                                         break;
4269                                 --blkcnt;
4270                                 ++blkno;
4271                         }
4272                 }
4273                 if (error) {
4274                         printf("hfs_extendfs: error %d clearing blocks\n", error);
4275                         goto out;
4276                 }
4277                 /*
4278                  * Mark the new bitmap space as allocated.
4279                  *
4280                  * Note that ExtendFileC will have marked any blocks it allocated, so
4281                  * this is only needed if we used AddFileExtent.  Also note that this
4282                  * has to come *after* the zero filling of new blocks in the case where
4283                  * we used AddFileExtent (since the part of the bitmap we're touching
4284                  * is in those newly allocated blocks).
4285                  */
4286                 if (!usedExtendFileC) {
4287                         error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4288                         if (error) {
4289                                 printf("hfs_extendfs: error %d setting bitmap\n", error);
4290                                 goto out;
4291                         }
4292                         vcb->freeBlocks -= bitmapblks;
4293                 }
4294         }
4295         /*
4296          * Mark the new alternate VH as allocated.
4297          */
4298         if (vcb->blockSize == 512)
4299                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4300         else
4301                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4302         if (error) {
4303                 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4304                 goto out;
4305         }
4306         /*
4307          * Mark the old alternate VH as free.
4308          */
4309         if (vcb->blockSize == 512)
4310                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4311         else
4312                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4313         /*
4314          * Adjust file system variables for new space.
4315          */
4316         vcb->totalBlocks += addblks;
4317         vcb->freeBlocks += addblks;
4318         MarkVCBDirty(vcb);
4319         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4320         if (error) {
4321                 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4322                 /*
4323                  * Restore to old state.
4324                  */
4325                 if (usedExtendFileC) {
4326                         (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4327                                                                  FTOC(fp)->c_fileid, false);
4328                 } else {
4329                         fp->ff_blocks -= bitmapblks;
4330                         fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4331                         /*
4332                          * No need to mark the excess blocks free since those bitmap blocks
4333                          * are no longer part of the bitmap.  But we do need to undo the
4334                          * effect of the "vcb->freeBlocks -= bitmapblks" above.
4335                          */
4336                         vcb->freeBlocks += bitmapblks;
4337                 }
4338                 vcb->totalBlocks -= addblks;
4339                 vcb->freeBlocks -= addblks;
4340                 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4341                 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4342                 MarkVCBDirty(vcb);
4343                 if (vcb->blockSize == 512) {
4344                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4345                                 hfs_mark_volume_inconsistent(hfsmp);
4346                         }
4347                 } else {
4348                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4349                                 hfs_mark_volume_inconsistent(hfsmp);
4350                         }
4351                 }
4352                 goto out;
4353         }
4354         /*
4355          * Invalidate the old alternate volume header.
4356          */
4357         bp = NULL;
4358         if (prev_alt_sector) {
4359                 if (buf_meta_bread(hfsmp->hfs_devvp,
4360                                 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4361                                 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4362                         journal_modify_block_start(hfsmp->jnl, bp);
4363
4364                         bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4365
4366                         journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4367                 } else if (bp) {
4368                         buf_brelse(bp);
4369                 }
4370         }
4371
4372         /*
4373          * Update the metadata zone size based on current volume size
4374          */
4375         hfs_metadatazone_init(hfsmp, false);
4376
4377         /*
4378          * Adjust the size of hfsmp->hfs_attrdata_vp
4379          */
4380         if (hfsmp->hfs_attrdata_vp) {
4381                 struct cnode *attr_cp;
4382                 struct filefork *attr_fp;
4383
4384                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4385                         attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4386                         attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4387
4388                         attr_cp->c_blocks = newblkcnt;
4389                         attr_fp->ff_blocks = newblkcnt;
4390                         attr_fp->ff_extents[0].blockCount = newblkcnt;
4391                         attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4392                         ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4393                         vnode_put(hfsmp->hfs_attrdata_vp);
4394                 }
4395         }
4396
4397         /*
4398          * Update the R/B Tree if necessary.  Since we don't have to drop the systemfile
4399          * locks in the middle of these operations like we do in the truncate case
4400          * where we have to relocate files, we can only update the red-black tree
4401          * if there were actual changes made to the bitmap.  Also, we can't really scan the
4402          * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4403          * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4404          * not currently controlled by the tree.
4405          *
4406          * We only update hfsmp->allocLimit if totalBlocks actually increased.
4407          */
4408         if (error == 0) {
4409                 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4410         }
4411
4412         /* Release all locks and sync up journal content before
4413          * checking and extending, if required, the journal
4414          */
4415         if (lockflags) {
4416                 hfs_systemfile_unlock(hfsmp, lockflags);
4417                 lockflags = 0;
4418         }
4419         if (transaction_begun) {
4420                 hfs_end_transaction(hfsmp);
4421                 hfs_journal_flush(hfsmp, TRUE);
4422                 transaction_begun = 0;
4423         }
4424
4425         /* Increase the journal size, if required. */
4426         error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
4427         if (error) {
4428                 printf ("hfs_extendfs: Could not extend journal size\n");
4429                 goto out_noalloc;
4430         }
4431
4432         /* Log successful extending */
4433         printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4434                hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4435
4436 out:
4437         if (error && fp) {
4438                 /* Restore allocation fork. */
4439                 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4440                 VTOC(vp)->c_blocks = fp->ff_blocks;
4441
4442         }
4443
4444 out_noalloc:
4445         hfs_lock_mount (hfsmp);
4446         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4447         hfs_unlock_mount (hfsmp);
4448         if (lockflags) {
4449                 hfs_systemfile_unlock(hfsmp, lockflags);
4450         }
4451         if (transaction_begun) {
4452                 hfs_end_transaction(hfsmp);
4453                 hfs_journal_flush(hfsmp, FALSE);
4454                 /* Just to be sure, sync all data to the disk */
4455                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4456         }
4457         if (error) {
4458                 printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
4459         }
4460
4461         return MacToVFSError(error);
4462 }
4463
4464 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
4465
4466 /*
4467  * Truncate a file system (while still mounted).
4468  */
4469 int
4470 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4471 {
4472         struct  buf *bp = NULL;
4473         u_int64_t oldsize;
4474         u_int32_t newblkcnt;
4475         u_int32_t reclaimblks = 0;
4476         int lockflags = 0;
4477         int transaction_begun = 0;
4478         Boolean updateFreeBlocks = false;
4479         Boolean disable_sparse = false;
4480         int error = 0;
4481
4482         hfs_lock_mount (hfsmp);
4483         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4484                 hfs_unlock_mount (hfsmp);
4485                 return (EALREADY);
4486         }
4487         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4488         hfsmp->hfs_resize_blocksmoved = 0;
4489         hfsmp->hfs_resize_totalblocks = 0;
4490         hfsmp->hfs_resize_progress = 0;
4491         hfs_unlock_mount (hfsmp);
4492
4493         /*
4494          * - Journaled HFS Plus volumes only.
4495          * - No embedded volumes.
4496          */
4497         if ((hfsmp->jnl == NULL) ||
4498             (hfsmp->hfsPlusIOPosOffset != 0)) {
4499                 error = EPERM;
4500                 goto out;
4501         }
4502         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4503         newblkcnt = newsize / hfsmp->blockSize;
4504         reclaimblks = hfsmp->totalBlocks - newblkcnt;
4505
4506         if (hfs_resize_debug) {
4507                 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4508                 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4509         }
4510
4511         /* Make sure new size is valid. */
4512         if ((newsize < HFS_MIN_SIZE) ||
4513             (newsize >= oldsize) ||
4514             (newsize % hfsmp->hfs_logical_block_size) ||
4515             (newsize % hfsmp->hfs_physical_block_size)) {
4516                 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4517                 error = EINVAL;
4518                 goto out;
4519         }
4520
4521         /*
4522          * Make sure that the file system has enough free blocks reclaim.
4523          *
4524          * Before resize, the disk is divided into four zones -
4525          *      A. Allocated_Stationary - These are allocated blocks that exist
4526          *         before the new end of disk.  These blocks will not be
4527          *         relocated or modified during resize.
4528          *      B. Free_Stationary - These are free blocks that exist before the
4529          *         new end of disk.  These blocks can be used for any new
4530          *         allocations during resize, including allocation for relocating
4531          *         data from the area of disk being reclaimed.
4532          *      C. Allocated_To-Reclaim - These are allocated blocks that exist
4533          *         beyond the new end of disk.  These blocks need to be reclaimed
4534          *         during resize by allocating equal number of blocks in Free
4535          *         Stationary zone and copying the data.
4536          *      D. Free_To-Reclaim - These are free blocks that exist beyond the
4537          *         new end of disk.  Nothing special needs to be done to reclaim
4538          *         them.
4539          *
4540          * Total number of blocks on the disk before resize:
4541          * ------------------------------------------------
4542          *      Total Blocks = Allocated_Stationary + Free_Stationary +
4543          *                     Allocated_To-Reclaim + Free_To-Reclaim
4544          *
4545          * Total number of blocks that need to be reclaimed:
4546          * ------------------------------------------------
4547          *      Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4548          *
4549          * Note that the check below also makes sure that we have enough space
4550          * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4551          * Therefore we do not need to check total number of blocks to relocate
4552          * later in the code.
4553          *
4554          * The condition below gets converted to:
4555          *
4556          * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4557          *
4558          * which is equivalent to:
4559          *
4560          *              Allocated To-Reclaim >= Free Stationary
4561          */
4562         if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4563                 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4564                 error = ENOSPC;
4565                 goto out;
4566         }
4567
4568         /* Start with a clean journal. */
4569         hfs_journal_flush(hfsmp, TRUE);
4570
4571         if (hfs_start_transaction(hfsmp) != 0) {
4572                 error = EINVAL;
4573                 goto out;
4574         }
4575         transaction_begun = 1;
4576
4577         /* Take the bitmap lock to update the alloc limit field */
4578         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4579
4580         /*
4581          * Prevent new allocations from using the part we're trying to truncate.
4582          *
4583          * NOTE: allocLimit is set to the allocation block number where the new
4584          * alternate volume header will be.  That way there will be no files to
4585          * interfere with allocating the new alternate volume header, and no files
4586          * in the allocation blocks beyond (i.e. the blocks we're trying to
4587          * truncate away.
4588          *
4589          * Also shrink the red-black tree if needed.
4590          */
4591         if (hfsmp->blockSize == 512) {
4592                 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4593         }
4594         else {
4595                 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4596         }
4597
4598         /* Sparse devices use first fit allocation which is not ideal
4599          * for volume resize which requires best fit allocation.  If a
4600          * sparse device is being truncated, disable the sparse device
4601          * property temporarily for the duration of resize.  Also reset
4602          * the free extent cache so that it is rebuilt as sorted by
4603          * totalBlocks instead of startBlock.
4604          *
4605          * Note that this will affect all allocations on the volume and
4606          * ideal fix would be just to modify resize-related allocations,
4607          * but it will result in complexity like handling of two free
4608          * extent caches sorted differently, etc.  So we stick to this
4609          * solution for now.
4610          */
4611         hfs_lock_mount (hfsmp);
4612         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4613                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4614                 ResetVCBFreeExtCache(hfsmp);
4615                 disable_sparse = true;
4616         }
4617
4618         /*
4619          * Update the volume free block count to reflect the total number
4620          * of free blocks that will exist after a successful resize.
4621          * Relocation of extents will result in no net change in the total
4622          * free space on the disk.  Therefore the code that allocates
4623          * space for new extent and deallocates the old extent explicitly
4624          * prevents updating the volume free block count.  It will also
4625          * prevent false disk full error when the number of blocks in
4626          * an extent being relocated is more than the free blocks that
4627          * will exist after the volume is resized.
4628          */
4629         hfsmp->freeBlocks -= reclaimblks;
4630         updateFreeBlocks = true;
4631         hfs_unlock_mount(hfsmp);
4632
4633         if (lockflags) {
4634                 hfs_systemfile_unlock(hfsmp, lockflags);
4635                 lockflags = 0;
4636         }
4637
4638         /*
4639          * Update the metadata zone size to match the new volume size,
4640          * and if it too less, metadata zone might be disabled.
4641          */
4642         hfs_metadatazone_init(hfsmp, false);
4643
4644         /*
4645          * If some files have blocks at or beyond the location of the
4646          * new alternate volume header, recalculate free blocks and
4647          * reclaim blocks.  Otherwise just update free blocks count.
4648          *
4649          * The current allocLimit is set to the location of new alternate
4650          * volume header, and reclaimblks are the total number of blocks
4651          * that need to be reclaimed.  So the check below is really
4652          * ignoring the blocks allocated for old alternate volume header.
4653          */
4654         if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4655                 /*
4656                  * hfs_reclaimspace will use separate transactions when
4657                  * relocating files (so we don't overwhelm the journal).
4658                  */
4659                 hfs_end_transaction(hfsmp);
4660                 transaction_begun = 0;
4661
4662                 /* Attempt to reclaim some space. */
4663                 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4664                 if (error != 0) {
4665                         printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4666                         error = ENOSPC;
4667                         goto out;
4668                 }
4669                 if (hfs_start_transaction(hfsmp) != 0) {
4670                         error = EINVAL;
4671                         goto out;
4672                 }
4673                 transaction_begun = 1;
4674
4675                 /* Check if we're clear now. */
4676                 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4677                 if (error != 0) {
4678                         printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4679                         error = EAGAIN;  /* tell client to try again */
4680                         goto out;
4681                 }
4682         }
4683
4684         /*
4685          * Note: we take the attributes lock in case we have an attribute data vnode
4686          * which needs to change size.
4687          */
4688         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4689
4690         /*
4691          * Allocate last 1KB for alternate volume header.
4692          */
4693         error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4694         if (error) {
4695                 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4696                 goto out;
4697         }
4698
4699         /*
4700          * Mark the old alternate volume header as free.
4701          * We don't bother shrinking allocation bitmap file.
4702          */
4703         if (hfsmp->blockSize == 512)
4704                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4705         else
4706                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4707
4708         /*
4709          * Invalidate the existing alternate volume header.
4710          *
4711          * Don't include this in a transaction (don't call journal_modify_block)
4712          * since this block will be outside of the truncated file system!
4713          */
4714         if (hfsmp->hfs_alt_id_sector) {
4715                 error = buf_meta_bread(hfsmp->hfs_devvp,
4716                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4717                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4718                 if (error == 0) {
4719                         bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4720                         (void) VNOP_BWRITE(bp);
4721                 } else {
4722                         if (bp) {
4723                                 buf_brelse(bp);
4724                         }
4725                 }
4726                 bp = NULL;
4727         }
4728
4729         /* Log successful shrinking. */
4730         printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4731                hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4732
4733         /*
4734          * Adjust file system variables and flush them to disk.
4735          */
4736         hfsmp->totalBlocks = newblkcnt;
4737         hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4738         hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
4739
4740         /*
4741          * Note that although the logical block size is updated here, it is only done for
4742          * the benefit of the partition management software.  The logical block count change
4743          * has not yet actually been propagated to the disk device yet.
4744          */
4745
4746         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4747         MarkVCBDirty(hfsmp);
4748         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4749         if (error)
4750                 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4751
4752         /*
4753          * Adjust the size of hfsmp->hfs_attrdata_vp
4754          */
4755         if (hfsmp->hfs_attrdata_vp) {
4756                 struct cnode *cp;
4757                 struct filefork *fp;
4758
4759                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4760                         cp = VTOC(hfsmp->hfs_attrdata_vp);
4761                         fp = VTOF(hfsmp->hfs_attrdata_vp);
4762
4763                         cp->c_blocks = newblkcnt;
4764                         fp->ff_blocks = newblkcnt;
4765                         fp->ff_extents[0].blockCount = newblkcnt;
4766                         fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4767                         ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4768                         vnode_put(hfsmp->hfs_attrdata_vp);
4769                 }
4770         }
4771
4772 out:
4773         /*
4774          * Update the allocLimit to acknowledge the last one or two blocks now.
4775          * Add it to the tree as well if necessary.
4776          */
4777         UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4778
4779         hfs_lock_mount (hfsmp);
4780         if (disable_sparse == true) {
4781                 /* Now that resize is completed, set the volume to be sparse
4782                  * device again so that all further allocations will be first
4783                  * fit instead of best fit.  Reset free extent cache so that
4784                  * it is rebuilt.
4785                  */
4786                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4787                 ResetVCBFreeExtCache(hfsmp);
4788         }
4789
4790         if (error && (updateFreeBlocks == true)) {
4791                 hfsmp->freeBlocks += reclaimblks;
4792         }
4793
4794         if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4795                 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4796         }
4797         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4798         hfs_unlock_mount (hfsmp);
4799
4800         /* On error, reset the metadata zone for original volume size */
4801         if (error && (updateFreeBlocks == true)) {
4802                 hfs_metadatazone_init(hfsmp, false);
4803         }
4804
4805         if (lockflags) {
4806                 hfs_systemfile_unlock(hfsmp, lockflags);
4807         }
4808         if (transaction_begun) {
4809                 hfs_end_transaction(hfsmp);
4810                 hfs_journal_flush(hfsmp, FALSE);
4811                 /* Just to be sure, sync all data to the disk */
4812                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4813         }
4814
4815         if (error) {
4816                 printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
4817         }
4818
4819         return MacToVFSError(error);
4820 }
4821
4822
4823 /*
4824  * Invalidate the physical block numbers associated with buffer cache blocks
4825  * in the given extent of the given vnode.
4826  */
4827 struct hfs_inval_blk_no {
4828         daddr64_t sectorStart;
4829         daddr64_t sectorCount;
4830 };
4831 static int
4832 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4833 {
4834         daddr64_t blkno;
4835         struct hfs_inval_blk_no *args;
4836
4837         blkno = buf_blkno(bp);
4838         args = args_in;
4839
4840         if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4841                 buf_setblkno(bp, buf_lblkno(bp));
4842
4843         return BUF_RETURNED;
4844 }
4845 static void
4846 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4847 {
4848         struct hfs_inval_blk_no args;
4849         args.sectorStart = sectorStart;
4850         args.sectorCount = sectorCount;
4851
4852         buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4853 }
4854
4855
4856 /*
4857  * Copy the contents of an extent to a new location.  Also invalidates the
4858  * physical block number of any buffer cache block in the copied extent
4859  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4860  * determine the new physical block number).
4861  *
4862  * At this point, for regular files, we hold the truncate lock exclusive
4863  * and the cnode lock exclusive.
4864  */
4865 static int
4866 hfs_copy_extent(
4867         struct hfsmount *hfsmp,
4868         struct vnode *vp,               /* The file whose extent is being copied. */
4869         u_int32_t oldStart,             /* The start of the source extent. */
4870         u_int32_t newStart,             /* The start of the destination extent. */
4871         u_int32_t blockCount,   /* The number of allocation blocks to copy. */
4872         vfs_context_t context)
4873 {
4874         int err = 0;
4875         size_t bufferSize;
4876         void *buffer = NULL;
4877         struct vfsioattr ioattr;
4878         buf_t bp = NULL;
4879         off_t resid;
4880         size_t ioSize;
4881         u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
4882         daddr64_t srcSector, destSector;
4883         u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4884 #if CONFIG_PROTECT
4885         int cpenabled = 0;
4886 #endif
4887
4888         /*
4889          * Sanity check that we have locked the vnode of the file we're copying.
4890          *
4891          * But since hfs_systemfile_lock() doesn't actually take the lock on
4892          * the allocation file if a journal is active, ignore the check if the
4893          * file being copied is the allocation file.
4894          */
4895         struct cnode *cp = VTOC(vp);
4896         if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4897                 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4898
4899 #if CONFIG_PROTECT
4900         /*
4901          * Prepare the CP blob and get it ready for use, if necessary.
4902          *
4903          * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
4904          * because they are implicitly protected via the media key on iOS.  As such, they
4905          * must not be relocated except with the media key.  So it is OK to not pass down
4906          * a special cpentry to the IOMedia/LwVM code for handling.
4907          */
4908         if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
4909                 int cp_err = 0;
4910                 /*
4911                  * Ideally, the file whose extents we are about to manipulate is using the
4912                  * newer offset-based IVs so that we can manipulate it regardless of the
4913                  * current lock state.  However, we must maintain support for older-style
4914                  * EAs.
4915                  *
4916                  * For the older EA case, the IV was tied to the device LBA for file content.
4917                  * This means that encrypted data cannot be moved from one location to another
4918                  * in the filesystem without garbling the IV data.  As a result, we need to
4919                  * access the file's plaintext because we cannot do our AES-symmetry trick
4920                  * here.  This requires that we attempt a key-unwrap here (via cp_handle_relocate)
4921                  * to make forward progress.  If the keys are unavailable then we will
4922                  * simply stop the resize in its tracks here since we cannot move
4923                  * this extent at this time.
4924                  */
4925                 if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
4926                         cp_err = cp_handle_relocate(cp, hfsmp);
4927                 }
4928
4929                 if (cp_err) {
4930                         printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
4931                         return cp_err;
4932                 }
4933
4934                 cpenabled = 1;
4935         }
4936 #endif
4937
4938
4939         /*
4940          * Determine the I/O size to use
4941          *
4942          * NOTE: Many external drives will result in an ioSize of 128KB.
4943          * TODO: Should we use a larger buffer, doing several consecutive
4944          * reads, then several consecutive writes?
4945          */
4946         vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4947         bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4948         if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4949                 return ENOMEM;
4950
4951         /* Get a buffer for doing the I/O */
4952         bp = buf_alloc(hfsmp->hfs_devvp);
4953         buf_setdataptr(bp, (uintptr_t)buffer);
4954
4955         resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4956         srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4957         destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4958         while (resid > 0) {
4959                 ioSize = MIN(bufferSize, (size_t) resid);
4960                 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4961
4962                 /* Prepare the buffer for reading */
4963                 buf_reset(bp, B_READ);
4964                 buf_setsize(bp, ioSize);
4965                 buf_setcount(bp, ioSize);
4966                 buf_setblkno(bp, srcSector);
4967                 buf_setlblkno(bp, srcSector);
4968
4969                 /*
4970                  * Note that because this is an I/O to the device vp
4971                  * it is correct to have lblkno and blkno both point to the
4972                  * start sector being read from.  If it were being issued against the
4973                  * underlying file then that would be different.
4974                  */
4975
4976                 /* Attach the new CP blob  to the buffer if needed */
4977 #if CONFIG_PROTECT
4978                 if (cpenabled) {
4979                         if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
4980                                 /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
4981                                 cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
4982                                 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
4983                         }
4984                         else {
4985                                 /*
4986                                  * Use the cnode's cp key.  This file is tied to the
4987                                  * LBAs of the physical blocks that it occupies.
4988                                  */
4989                                 buf_setcpaddr (bp, cp->c_cpentry);
4990                         }
4991
4992                         /* Initialize the content protection file offset to start at 0 */
4993                         buf_setcpoff (bp, 0);
4994                 }
4995 #endif
4996
4997                 /* Do the read */
4998                 err = VNOP_STRATEGY(bp);
4999                 if (!err)
5000                         err = buf_biowait(bp);
5001                 if (err) {
5002 #if CONFIG_PROTECT
5003                         /* Turn the flag off in error cases. */
5004                         if (cpenabled) {
5005                                 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
5006                         }
5007 #endif
5008                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
5009                         break;
5010                 }
5011
5012                 /* Prepare the buffer for writing */
5013                 buf_reset(bp, B_WRITE);
5014                 buf_setsize(bp, ioSize);
5015                 buf_setcount(bp, ioSize);
5016                 buf_setblkno(bp, destSector);
5017                 buf_setlblkno(bp, destSector);
5018                 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
5019                         buf_markfua(bp);
5020
5021 #if CONFIG_PROTECT
5022                 /* Attach the CP to the buffer if needed */
5023                 if (cpenabled) {
5024                         if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
5025                                 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
5026                         }
5027                         else {
5028                                 /*
5029                                  * Use the cnode's CP key.  This file is still tied
5030                                  * to the LBAs of the physical blocks that it occupies.
5031                                  */
5032                                 buf_setcpaddr (bp, cp->c_cpentry);
5033                         }
5034                         /*
5035                          * The last STRATEGY call may have updated the cp file offset behind our
5036                          * back, so we cannot trust it.  Re-initialize the content protection
5037                          * file offset back to 0 before initiating the write portion of this I/O.
5038                          */
5039                         buf_setcpoff (bp, 0);
5040                 }
5041 #endif
5042
5043                 /* Do the write */
5044                 vnode_startwrite(hfsmp->hfs_devvp);
5045                 err = VNOP_STRATEGY(bp);
5046                 if (!err) {
5047                         err = buf_biowait(bp);
5048                 }
5049 #if CONFIG_PROTECT
5050                 /* Turn the flag off regardless once the strategy call finishes. */
5051                 if (cpenabled) {
5052                         cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
5053                 }
5054 #endif
5055                 if (err) {
5056                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
5057                         break;
5058                 }
5059
5060                 resid -= ioSize;
5061                 srcSector += ioSizeSectors;
5062                 destSector += ioSizeSectors;
5063         }
5064         if (bp)
5065                 buf_free(bp);
5066         if (buffer)
5067                 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
5068
5069         /* Make sure all writes have been flushed to disk. */
5070         if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
5071                 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5072                 if (err) {
5073                         printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
5074                         err = 0;        /* Don't fail the copy. */
5075                 }
5076         }
5077
5078         if (!err)
5079                 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
5080
5081         return err;
5082 }
5083
5084
5085 /* Structure to store state of reclaiming extents from a
5086  * given file.  hfs_reclaim_file()/hfs_reclaim_xattr()
5087  * initializes the values in this structure which are then
5088  * used by code that reclaims and splits the extents.
5089  */
5090 struct hfs_reclaim_extent_info {
5091         struct vnode *vp;
5092         u_int32_t fileID;
5093         u_int8_t forkType;
5094         u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
5095         u_int8_t is_sysfile;                 /* Extent belongs to system file */
5096         u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
5097         u_int8_t extent_index;
5098         int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
5099         u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
5100         u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
5101         u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
5102         struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
5103         union record {
5104                 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
5105                 HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
5106         } record;
5107         HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
5108                                               * For catalog extent record, points to the correct
5109                                               * extent information in filefork.  For overflow extent
5110                                               * record, or xattr record, points to extent record
5111                                               * in the structure above
5112                                               */
5113         struct cat_desc *dirlink_desc;
5114         struct cat_attr *dirlink_attr;
5115         struct filefork *dirlink_fork;        /* For directory hard links, fp points actually to this */
5116         struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr()
5117                                                * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
5118                                                * use it for writing updated extent record
5119                                                */
5120         struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
5121         u_int16_t recordlen;
5122         int overflow_count;                   /* For debugging, counter for overflow extent record */
5123         FCB *fcb;                             /* Pointer to the current btree being traversed */
5124 };
5125
5126 /*
5127  * Split the current extent into two extents, with first extent
5128  * to contain given number of allocation blocks.  Splitting of
5129  * extent creates one new extent entry which can result in
5130  * shifting of many entries through all the extent records of a
5131  * file, and/or creating a new extent record in the overflow
5132  * extent btree.
5133  *
5134  * Example:
5135  * The diagram below represents two consecutive extent records,
5136  * for simplicity, lets call them record X and X+1 respectively.
5137  * Interesting extent entries have been denoted by letters.
5138  * If the letter is unchanged before and after split, it means
5139  * that the extent entry was not modified during the split.
5140  * A '.' means that the entry remains unchanged after the split
5141  * and is not relevant for our example.  A '0' means that the
5142  * extent entry is empty.
5143  *
5144  * If there isn't sufficient contiguous free space to relocate
5145  * an extent (extent "C" below), we will have to break the one
5146  * extent into multiple smaller extents, and relocate each of
5147  * the smaller extents individually.  The way we do this is by
5148  * finding the largest contiguous free space that is currently
5149  * available (N allocation blocks), and then convert extent "C"
5150  * into two extents, C1 and C2, that occupy exactly the same
5151  * allocation blocks as extent C.  Extent C1 is the first
5152  * N allocation blocks of extent C, and extent C2 is the remainder
5153  * of extent C.  Then we can relocate extent C1 since we know
5154  * we have enough contiguous free space to relocate it in its
5155  * entirety.  We then repeat the process starting with extent C2.
5156  *
5157  * In record X, only the entries following entry C are shifted, and
5158  * the original entry C is replaced with two entries C1 and C2 which
5159  * are actually two extent entries for contiguous allocation blocks.
5160  *
5161  * Note that the entry E from record X is shifted into record X+1 as
5162  * the new first entry.  Since the first entry of record X+1 is updated,
5163  * the FABN will also get updated with the blockCount of entry E.
5164  * This also results in shifting of all extent entries in record X+1.
5165  * Note that the number of empty entries after the split has been
5166  * changed from 3 to 2.
5167  *
5168  * Before:
5169  *               record X                           record X+1
5170  *  ---------------------===---------     ---------------------------------
5171  *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
5172  *  ---------------------===---------     ---------------------------------
5173  *
5174  * After:
5175  *  ---------------------=======-----     ---------------------------------
5176  *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
5177  *  ---------------------=======-----     ---------------------------------
5178  *
5179  *  C1.startBlock = C.startBlock
5180  *  C1.blockCount = N
5181  *
5182  *  C2.startBlock = C.startBlock + N
5183  *  C2.blockCount = C.blockCount - N
5184  *
5185  *                                        FABN = old FABN - E.blockCount
5186  *
5187  * Inputs:
5188  *      extent_info -   This is the structure that contains state about
5189  *                      the current file, extent, and extent record that
5190  *                      is being relocated.  This structure is shared
5191  *                      among code that traverses through all the extents
5192  *                      of the file, code that relocates extents, and
5193  *                      code that splits the extent.
5194  *      newBlockCount - The blockCount of the extent to be split after
5195  *                      successfully split operation.
5196  * Output:
5197  *      Zero on success, non-zero on failure.
5198  */
5199 static int
5200 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
5201 {
5202         int error = 0;
5203         int index = extent_info->extent_index;
5204         int i;
5205         HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
5206         HFSPlusExtentDescriptor last_extent;
5207         HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
5208         HFSPlusExtentRecord *extents_rec = NULL;
5209         HFSPlusExtentKey *extents_key = NULL;
5210         HFSPlusAttrRecord *xattr_rec = NULL;
5211         HFSPlusAttrKey *xattr_key = NULL;
5212         struct BTreeIterator iterator;
5213         struct FSBufferDescriptor btdata;
5214         uint16_t reclen;
5215         uint32_t read_recStartBlock;    /* Starting allocation block number to read old extent record */
5216         uint32_t write_recStartBlock;   /* Starting allocation block number to insert newly updated extent record */
5217         Boolean create_record = false;
5218         Boolean is_xattr;
5219         struct cnode *cp;
5220
5221         is_xattr = extent_info->is_xattr;
5222         extents = extent_info->extents;
5223         cp = VTOC(extent_info->vp);
5224
5225         if (newBlockCount == 0) {
5226                 if (hfs_resize_debug) {
5227                         printf ("hfs_split_extent: No splitting required for newBlockCount=0\n");
5228                 }
5229                 return error;
5230         }
5231
5232         if (hfs_resize_debug) {
5233                 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
5234         }
5235
5236         /* Extents overflow btree can not have more than 8 extents.
5237          * No split allowed if the 8th extent is already used.
5238          */
5239         if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
5240                 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
5241                 error = ENOSPC;
5242                 goto out;
5243         }
5244
5245         /* Determine the starting allocation block number for the following
5246          * overflow extent record, if any, before the current record
5247          * gets modified.
5248          */
5249         read_recStartBlock = extent_info->recStartBlock;
5250         for (i = 0; i < kHFSPlusExtentDensity; i++) {
5251                 if (extents[i].blockCount == 0) {
5252                         break;
5253                 }
5254                 read_recStartBlock += extents[i].blockCount;
5255         }
5256
5257         /* Shift and split */
5258         if (index == kHFSPlusExtentDensity-1) {
5259                 /* The new extent created after split will go into following overflow extent record */
5260                 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
5261                 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
5262
5263                 /* Last extent in the record will be split, so nothing to shift */
5264         } else {
5265                 /* Splitting of extents can result in at most of one
5266                  * extent entry to be shifted into following overflow extent
5267                  * record.  So, store the last extent entry for later.
5268                  */
5269                 shift_extent = extents[kHFSPlusExtentDensity-1];
5270                 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
5271                         printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
5272                 }
5273
5274                 /* Start shifting extent information from the end of the extent
5275                  * record to the index where we want to insert the new extent.
5276                  * Note that kHFSPlusExtentDensity-1 is already saved above, and
5277                  * does not need to be shifted.  The extent entry that is being
5278                  * split does not get shifted.
5279                  */
5280                 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
5281                         if (hfs_resize_debug) {
5282                                 if (extents[i].blockCount) {
5283                                         printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
5284                                 }
5285                         }
5286                         extents[i+1] = extents[i];
5287                 }
5288         }
5289
5290         if (index == kHFSPlusExtentDensity-1) {
5291                 /* The second half of the extent being split will be the overflow
5292                  * entry that will go into following overflow extent record.  The
5293                  * value has been stored in 'shift_extent' above, so there is
5294                  * nothing to be done here.
5295                  */
5296         } else {
5297                 /* Update the values in the second half of the extent being split
5298                  * before updating the first half of the split.  Note that the
5299                  * extent to split or first half of the split is at index 'index'
5300                  * and a new extent or second half of the split will be inserted at
5301                  * 'index+1' or into following overflow extent record.
5302                  */
5303                 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
5304                 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
5305         }
5306         /* Update the extent being split, only the block count will change */
5307         extents[index].blockCount = newBlockCount;
5308
5309         if (hfs_resize_debug) {
5310                 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
5311                 if (index != kHFSPlusExtentDensity-1) {
5312                         printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
5313                 } else {
5314                         printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
5315                 }
5316         }
5317
5318         /* Write out information about the newly split extent to the disk */
5319         if (extent_info->catalog_fp) {
5320                 /* (extent_info->catalog_fp != NULL) means the newly split
5321                  * extent exists in the catalog record.  This means that
5322                  * the cnode was updated.  Therefore, to write out the changes,
5323                  * mark the cnode as modified.   We cannot call hfs_update()
5324                  * in this function because the caller hfs_reclaim_extent()
5325                  * is holding the catalog lock currently.
5326                  */
5327                 cp->c_flag |= C_MODIFIED;
5328         } else {
5329                 /* The newly split extent is for large EAs or is in overflow
5330                  * extent record, so update it directly in the btree using the
5331                  * iterator information from the shared extent_info structure
5332                  */
5333                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5334                                 &(extent_info->btdata), extent_info->recordlen);
5335                 if (error) {
5336                         printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5337                         goto out;
5338                 }
5339         }
5340
5341         /* No extent entry to be shifted into another extent overflow record */
5342         if (shift_extent.blockCount == 0) {
5343                 if (hfs_resize_debug) {
5344                         printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5345                 }
5346                 error = 0;
5347                 goto out;
5348         }
5349
5350         /* The overflow extent entry has to be shifted into an extent
5351          * overflow record.  This means that we might have to shift
5352          * extent entries from all subsequent overflow records by one.
5353          * We start iteration from the first record to the last record,
5354          * and shift the extent entry from one record to another.
5355          * We might have to create a new extent record for the last
5356          * extent entry for the file.
5357          */
5358
5359         /* Initialize iterator to search the next record */
5360         bzero(&iterator, sizeof(iterator));
5361         if (is_xattr) {
5362                 /* Copy the key from the iterator that was used to update the modified attribute record. */
5363                 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5364                 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5365                 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5366
5367                 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5368                                 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5369                 if (xattr_rec == NULL) {
5370                         error = ENOMEM;
5371                         goto out;
5372                 }
5373                 btdata.bufferAddress = xattr_rec;
5374                 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5375                 btdata.itemCount = 1;
5376                 extents = xattr_rec->overflowExtents.extents;
5377         } else {
5378                 /* Initialize the extent key for the current file */
5379                 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5380                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5381                 extents_key->forkType = extent_info->forkType;
5382                 extents_key->fileID = extent_info->fileID;
5383                 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5384
5385                 MALLOC(extents_rec, HFSPlusExtentRecord *,
5386                                 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5387                 if (extents_rec == NULL) {
5388                         error = ENOMEM;
5389                         goto out;
5390                 }
5391                 btdata.bufferAddress = extents_rec;
5392                 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5393                 btdata.itemCount = 1;
5394                 extents = extents_rec[0];
5395         }
5396
5397         /* The overflow extent entry has to be shifted into an extent
5398          * overflow record.  This means that we might have to shift
5399          * extent entries from all subsequent overflow records by one.
5400          * We start iteration from the first record to the last record,
5401          * examine one extent record in each iteration and shift one
5402          * extent entry from one record to another.  We might have to
5403          * create a new extent record for the last extent entry for the
5404          * file.
5405          *
5406          * If shift_extent.blockCount is non-zero, it means that there is
5407          * an extent entry that needs to be shifted into the next
5408          * overflow extent record.  We keep on going till there are no such
5409          * entries left to be shifted.  This will also change the starting
5410          * allocation block number of the extent record which is part of
5411          * the key for the extent record in each iteration.  Note that
5412          * because the extent record key is changing while we are searching,
5413          * the record can not be updated directly, instead it has to be
5414          * deleted and inserted again.
5415          */
5416         while (shift_extent.blockCount) {
5417                 if (hfs_resize_debug) {
5418                         printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5419                 }
5420
5421                 /* Search if there is any existing overflow extent record
5422                  * that matches the current file and the logical start block
5423                  * number.
5424                  *
5425                  * For this, the logical start block number in the key is
5426                  * the value calculated based on the logical start block
5427                  * number of the current extent record and the total number
5428                  * of blocks existing in the current extent record.
5429                  */
5430                 if (is_xattr) {
5431                         xattr_key->startBlock = read_recStartBlock;
5432                 } else {
5433                         extents_key->startBlock = read_recStartBlock;
5434                 }
5435                 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5436                 if (error) {
5437                         if (error != btNotFound) {
5438                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5439                                 goto out;
5440                         }
5441                         /* No matching record was found, so create a new extent record.
5442                          * Note:  Since no record was found, we can't rely on the
5443                          * btree key in the iterator any longer.  This will be initialized
5444                          * later before we insert the record.
5445                          */
5446                         create_record = true;
5447                 }
5448
5449                 /* The extra extent entry from the previous record is being inserted
5450                  * as the first entry in the current extent record.  This will change
5451                  * the file allocation block number (FABN) of the current extent
5452                  * record, which is the startBlock value from the extent record key.
5453                  * Since one extra entry is being inserted in the record, the new
5454                  * FABN for the record will less than old FABN by the number of blocks
5455                  * in the new extent entry being inserted at the start.  We have to
5456                  * do this before we update read_recStartBlock to point at the
5457                  * startBlock of the following record.
5458                  */
5459                 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5460                 if (hfs_resize_debug) {
5461                         if (create_record) {
5462                                 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5463                         }
5464                 }
5465
5466                 /* Now update the read_recStartBlock to account for total number
5467                  * of blocks in this extent record.  It will now point to the
5468                  * starting allocation block number for the next extent record.
5469                  */
5470                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5471                         if (extents[i].blockCount == 0) {
5472                                 break;
5473                         }
5474                         read_recStartBlock += extents[i].blockCount;
5475                 }
5476
5477                 if (create_record == true) {
5478                         /* Initialize new record content with only one extent entry */
5479                         bzero(extents, sizeof(HFSPlusExtentRecord));
5480                         /* The new record will contain only one extent entry */
5481                         extents[0] = shift_extent;
5482                         /* There are no more overflow extents to be shifted */
5483                         shift_extent.startBlock = shift_extent.blockCount = 0;
5484
5485                         if (is_xattr) {
5486                                 /* BTSearchRecord above returned btNotFound,
5487                                  * but since the attribute btree is never empty
5488                                  * if we are trying to insert new overflow
5489                                  * record for the xattrs, the extents_key will
5490                                  * contain correct data.  So we don't need to
5491                                  * re-initialize it again like below.
5492                                  */
5493
5494                                 /* Initialize the new xattr record */
5495                                 xattr_rec->recordType = kHFSPlusAttrExtents;
5496                                 xattr_rec->overflowExtents.reserved = 0;
5497                                 reclen = sizeof(HFSPlusAttrExtents);
5498                         } else {
5499                                 /* BTSearchRecord above returned btNotFound,
5500                                  * which means that extents_key content might
5501                                  * not correspond to the record that we are
5502                                  * trying to create, especially when the extents
5503                                  * overflow btree is empty.  So we reinitialize
5504                                  * the extents_key again always.
5505                                  */
5506                                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5507                                 extents_key->forkType = extent_info->forkType;
5508                                 extents_key->fileID = extent_info->fileID;
5509
5510                                 /* Initialize the new extent record */
5511                                 reclen = sizeof(HFSPlusExtentRecord);
5512                         }
5513                 } else {
5514                         /* The overflow extent entry from previous record will be
5515                          * the first entry in this extent record.  If the last
5516                          * extent entry in this record is valid, it will be shifted
5517                          * into the following extent record as its first entry.  So
5518                          * save the last entry before shifting entries in current
5519                          * record.
5520                          */
5521                         last_extent = extents[kHFSPlusExtentDensity-1];
5522
5523                         /* Shift all entries by one index towards the end */
5524                         for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5525                                 extents[i+1] = extents[i];
5526                         }
5527
5528                         /* Overflow extent entry saved from previous record
5529                          * is now the first entry in the current record.
5530                          */
5531                         extents[0] = shift_extent;
5532
5533                         if (hfs_resize_debug) {
5534                                 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5535                         }
5536
5537                         /* The last entry from current record will be the
5538                          * overflow entry which will be the first entry for
5539                          * the following extent record.
5540                          */
5541                         shift_extent = last_extent;
5542
5543                         /* Since the key->startBlock is being changed for this record,
5544                          * it should be deleted and inserted with the new key.
5545                          */
5546                         error = BTDeleteRecord(extent_info->fcb, &iterator);
5547                         if (error) {
5548                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5549                                 goto out;
5550                         }
5551                         if (hfs_resize_debug) {
5552                                 printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5553                         }
5554                 }
5555
5556                 /* Insert the newly created or modified extent record */
5557                 bzero(&iterator.hint, sizeof(iterator.hint));
5558                 if (is_xattr) {
5559                         xattr_key->startBlock = write_recStartBlock;
5560                 } else {
5561                         extents_key->startBlock = write_recStartBlock;
5562                 }
5563                 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5564                 if (error) {
5565                         printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5566                         goto out;
5567                 }
5568                 if (hfs_resize_debug) {
5569                         printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5570                 }
5571         }
5572
5573 out:
5574         /*
5575          * Extents overflow btree or attributes btree headers might have
5576          * been modified during the split/shift operation, so flush the
5577          * changes to the disk while we are inside journal transaction.
5578          * We should only be able to generate I/O that modifies the B-Tree
5579          * header nodes while we're in the middle of a journal transaction.
5580          * Otherwise it might result in panic during unmount.
5581          */
5582         BTFlushPath(extent_info->fcb);
5583
5584         if (extents_rec) {
5585                 FREE (extents_rec, M_TEMP);
5586         }
5587         if (xattr_rec) {
5588                 FREE (xattr_rec, M_TEMP);
5589         }
5590         return error;
5591 }
5592
5593
5594 /*
5595  * Relocate an extent if it lies beyond the expected end of volume.
5596  *
5597  * This function is called for every extent of the file being relocated.
5598  * It allocates space for relocation, copies the data, deallocates
5599  * the old extent, and update corresponding on-disk extent.  If the function
5600  * does not find contiguous space to  relocate an extent, it splits the
5601  * extent in smaller size to be able to relocate it out of the area of
5602  * disk being reclaimed.  As an optimization, if an extent lies partially
5603  * in the area of the disk being reclaimed, it is split so that we only
5604  * have to relocate the area that was overlapping with the area of disk
5605  * being reclaimed.
5606  *
5607  * Note that every extent is relocated in its own transaction so that
5608  * they do not overwhelm the journal.  This function handles the extent
5609  * record that exists in the catalog record, extent record from overflow
5610  * extents btree, and extents for large EAs.
5611  *
5612  * Inputs:
5613  *      extent_info - This is the structure that contains state about
5614  *                    the current file, extent, and extent record that
5615  *                    is being relocated.  This structure is shared
5616  *                    among code that traverses through all the extents
5617  *                    of the file, code that relocates extents, and
5618  *                    code that splits the extent.
5619  */
5620 static int
5621 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5622 {
5623         int error = 0;
5624         int index;
5625         struct cnode *cp;
5626         u_int32_t oldStartBlock;
5627         u_int32_t oldBlockCount;
5628         u_int32_t newStartBlock;
5629         u_int32_t newBlockCount;
5630         u_int32_t roundedBlockCount;
5631         uint16_t node_size;
5632         uint32_t remainder_blocks;
5633         u_int32_t alloc_flags;
5634         int blocks_allocated = false;
5635
5636         index = extent_info->extent_index;
5637         cp = VTOC(extent_info->vp);
5638
5639         oldStartBlock = extent_info->extents[index].startBlock;
5640         oldBlockCount = extent_info->extents[index].blockCount;
5641
5642         if (0 && hfs_resize_debug) {
5643                 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5644         }
5645
5646         /* If the current extent lies completely within allocLimit,
5647          * it does not require any relocation.
5648          */
5649         if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5650                 extent_info->cur_blockCount += oldBlockCount;
5651                 return error;
5652         }
5653
5654         /* Every extent should be relocated in its own transaction
5655          * to make sure that we don't overflow the journal buffer.
5656          */
5657         error = hfs_start_transaction(hfsmp);
5658         if (error) {
5659                 return error;
5660         }
5661         extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5662
5663         /* Check if the extent lies partially in the area to reclaim,
5664          * i.e. it starts before allocLimit and ends beyond allocLimit.
5665          * We have already skipped extents that lie completely within
5666          * allocLimit in the check above, so we only check for the
5667          * startBlock.  If it lies partially, split it so that we
5668          * only relocate part of the extent.
5669          */
5670         if (oldStartBlock < allocLimit) {
5671                 newBlockCount = allocLimit - oldStartBlock;
5672
5673                 if (hfs_resize_debug) {
5674                         int idx = extent_info->extent_index;
5675                         printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5676                 }
5677
5678                 /* If the extent belongs to a btree, check and trim
5679                  * it to be multiple of the node size.
5680                  */
5681                 if (extent_info->is_sysfile) {
5682                         node_size = get_btree_nodesize(extent_info->vp);
5683                         /* If the btree node size is less than the block size,
5684                          * splitting this extent will not split a node across
5685                          * different extents.  So we only check and trim if
5686                          * node size is more than the allocation block size.
5687                          */
5688                         if (node_size > hfsmp->blockSize) {
5689                                 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5690                                 if (remainder_blocks) {
5691                                         newBlockCount -= remainder_blocks;
5692                                         if (hfs_resize_debug) {
5693                                                 printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5694                                         }
5695                                 }
5696                         }
5697                         /* The newBlockCount is zero because of rounding-down so that
5698                          * btree nodes are not split across extents.  Therefore this
5699                          * straddling extent across resize-boundary does not require
5700                          * splitting.  Skip over to relocating of complete extent.
5701                          */
5702                         if (newBlockCount == 0) {
5703                                 if (hfs_resize_debug) {
5704                                         printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n");
5705                                 }
5706                                 goto relocate_full_extent;
5707                         }
5708                 }
5709
5710                 /* Split the extents into two parts --- the first extent lies
5711                  * completely within allocLimit and therefore does not require
5712                  * relocation.  The second extent will require relocation which
5713                  * will be handled when the caller calls this function again
5714                  * for the next extent.
5715                  */
5716                 error = hfs_split_extent(extent_info, newBlockCount);
5717                 if (error == 0) {
5718                         /* Split success, no relocation required */
5719                         goto out;
5720                 }
5721                 /* Split failed, so try to relocate entire extent */
5722                 if (hfs_resize_debug) {
5723                         int idx = extent_info->extent_index;
5724                         printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5725                 }
5726         }
5727
5728 relocate_full_extent:
5729         /* At this point, the current extent requires relocation.
5730          * We will try to allocate space equal to the size of the extent
5731          * being relocated first to try to relocate it without splitting.
5732          * If the allocation fails, we will try to allocate contiguous
5733          * blocks out of metadata zone.  If that allocation also fails,
5734          * then we will take a whatever contiguous block run is returned
5735          * by the allocation, split the extent into two parts, and then
5736          * relocate the first splitted extent.
5737          */
5738         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5739         if (extent_info->is_sysfile) {
5740                 alloc_flags |= HFS_ALLOC_METAZONE;
5741         }
5742
5743         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5744                         &newStartBlock, &newBlockCount);
5745         if ((extent_info->is_sysfile == false) &&
5746             ((error == dskFulErr) || (error == ENOSPC))) {
5747                 /* For non-system files, try reallocating space in metadata zone */
5748                 alloc_flags |= HFS_ALLOC_METAZONE;
5749                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5750                                 alloc_flags, &newStartBlock, &newBlockCount);
5751         }
5752         if ((error == dskFulErr) || (error == ENOSPC)) {
5753                 /* We did not find desired contiguous space for this extent.
5754                  * So don't worry about getting contiguity anymore.  Also, allow using
5755                  * blocks that were recently deallocated.
5756                  */
5757                 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5758                 alloc_flags |= HFS_ALLOC_FLUSHTXN;
5759
5760                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5761                                 alloc_flags, &newStartBlock, &newBlockCount);
5762                 if (error) {
5763                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5764                         goto out;
5765                 }
5766                 blocks_allocated = true;
5767
5768                 /* The number of blocks allocated is less than the requested
5769                  * number of blocks.  For btree extents, check and trim the
5770                  * extent to be multiple of the node size.
5771                  */
5772                 if (extent_info->is_sysfile) {
5773                         node_size = get_btree_nodesize(extent_info->vp);
5774                         if (node_size > hfsmp->blockSize) {
5775                                 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5776                                 if (remainder_blocks) {
5777                                         roundedBlockCount = newBlockCount - remainder_blocks;
5778                                         /* Free tail-end blocks of the newly allocated extent */
5779                                         BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
5780                                                                newBlockCount - roundedBlockCount,
5781                                                                HFS_ALLOC_SKIPFREEBLKS);
5782                                         newBlockCount = roundedBlockCount;
5783                                         if (hfs_resize_debug) {
5784                                                 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5785                                         }
5786                                         if (newBlockCount == 0) {
5787                                                 printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
5788                                                 error = ENOSPC;
5789                                                 goto out;
5790                                         }
5791                                 }
5792                         }
5793                 }
5794
5795                 /* The number of blocks allocated is less than the number of
5796                  * blocks requested, so split this extent --- the first extent
5797                  * will be relocated as part of this function call and the caller
5798                  * will handle relocating the second extent by calling this
5799                  * function again for the second extent.
5800                  */
5801                 error = hfs_split_extent(extent_info, newBlockCount);
5802                 if (error) {
5803                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5804                         goto out;
5805                 }
5806                 oldBlockCount = newBlockCount;
5807         }
5808         if (error) {
5809                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5810                 goto out;
5811         }
5812         blocks_allocated = true;
5813
5814         /* Copy data from old location to new location */
5815         error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5816                         newStartBlock, newBlockCount, context);
5817         if (error) {
5818                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5819                 goto out;
5820         }
5821
5822         /* Update the extent record with the new start block information */
5823         extent_info->extents[index].startBlock = newStartBlock;
5824
5825         /* Sync the content back to the disk */
5826         if (extent_info->catalog_fp) {
5827                 /* Update the extents in catalog record */
5828                 if (extent_info->is_dirlink) {
5829                         error = cat_update_dirlink(hfsmp, extent_info->forkType,
5830                                         extent_info->dirlink_desc, extent_info->dirlink_attr,
5831                                         &(extent_info->dirlink_fork->ff_data));
5832                 } else {
5833                         cp->c_flag |= C_MODIFIED;
5834                         /* If this is a system file, sync volume headers on disk */
5835                         if (extent_info->is_sysfile) {
5836                                 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5837                         }
5838                 }
5839         } else {
5840                 /* Replace record for extents overflow or extents-based xattrs */
5841                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5842                                 &(extent_info->btdata), extent_info->recordlen);
5843         }
5844         if (error) {
5845                 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5846                 goto out;
5847         }
5848
5849         /* Deallocate the old extent */
5850         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5851         if (error) {
5852                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5853                 goto out;
5854         }
5855         extent_info->blocks_relocated += newBlockCount;
5856
5857         if (hfs_resize_debug) {
5858                 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5859         }
5860
5861 out:
5862         if (error != 0) {
5863                 if (blocks_allocated == true) {
5864                         BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5865                 }
5866         } else {
5867                 /* On success, increment the total allocation blocks processed */
5868                 extent_info->cur_blockCount += newBlockCount;
5869         }
5870
5871         hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5872
5873         /* For a non-system file, if an extent entry from catalog record
5874          * was modified, sync the in-memory changes to the catalog record
5875          * on disk before ending the transaction.
5876          */
5877          if ((extent_info->catalog_fp) &&
5878              (extent_info->is_sysfile == false)) {
5879                 (void) hfs_update(extent_info->vp, MNT_WAIT);
5880         }
5881
5882         hfs_end_transaction(hfsmp);
5883
5884         return error;
5885 }
5886
5887 /* Report intermediate progress during volume resize */
5888 static void
5889 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5890 {
5891         u_int32_t cur_progress = 0;
5892
5893         hfs_resize_progress(hfsmp, &cur_progress);
5894         if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5895                 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5896                 hfsmp->hfs_resize_progress = cur_progress;
5897         }
5898         return;
5899 }
5900
5901 /*
5902  * Reclaim space at the end of a volume for given file and forktype.
5903  *
5904  * This routine attempts to move any extent which contains allocation blocks
5905  * at or after "allocLimit."  A separate transaction is used for every extent
5906  * that needs to be moved.  If there is not contiguous space available for
5907  * moving an extent, it can be split into smaller extents.  The contents of
5908  * any moved extents are read and written via the volume's device vnode --
5909  * NOT via "vp."  During the move, moved blocks which are part of a transaction
5910  * have their physical block numbers invalidated so they will eventually be
5911  * written to their new locations.
5912  *
5913  * This function is also called for directory hard links.  Directory hard links
5914  * are regular files with no data fork and resource fork that contains alias
5915  * information for backward compatibility with pre-Leopard systems.  However
5916  * non-Mac OS X implementation can add/modify data fork or resource fork
5917  * information to directory hard links, so we check, and if required, relocate
5918  * both data fork and resource fork.
5919  *
5920  * Inputs:
5921  *    hfsmp       The volume being resized.
5922  *    vp          The vnode for the system file.
5923  *    fileID      ID of the catalog record that needs to be relocated
5924  *    forktype    The type of fork that needs relocated,
5925  *                      kHFSResourceForkType for resource fork,
5926  *                      kHFSDataForkType for data fork
5927  *    allocLimit  Allocation limit for the new volume size,
5928  *                do not use this block or beyond.  All extents
5929  *                that use this block or any blocks beyond this limit
5930  *                will be relocated.
5931  *
5932  * Side Effects:
5933  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5934  * blocks that were relocated.
5935  */
5936 static int
5937 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5938                 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5939 {
5940         int error = 0;
5941         struct hfs_reclaim_extent_info *extent_info;
5942         int i;
5943         int lockflags = 0;
5944         struct cnode *cp;
5945         struct filefork *fp;
5946         int took_truncate_lock = false;
5947         int release_desc = false;
5948         HFSPlusExtentKey *key;
5949
5950         /* If there is no vnode for this file, then there's nothing to do. */
5951         if (vp == NULL) {
5952                 return 0;
5953         }
5954
5955         cp = VTOC(vp);
5956
5957         if (hfs_resize_debug) {
5958                 const char *filename = (const char *) cp->c_desc.cd_nameptr;
5959                 int namelen = cp->c_desc.cd_namelen;
5960
5961                 if (filename == NULL) {
5962                         filename = "";
5963                         namelen = 0;
5964                 }
5965                 printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
5966         }
5967
5968         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5969                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5970         if (extent_info == NULL) {
5971                 return ENOMEM;
5972         }
5973         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5974         extent_info->vp = vp;
5975         extent_info->fileID = fileID;
5976         extent_info->forkType = forktype;
5977         extent_info->is_sysfile = vnode_issystem(vp);
5978         if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5979                 extent_info->is_dirlink = true;
5980         }
5981         /* We always need allocation bitmap and extent btree lock */
5982         lockflags = SFL_BITMAP | SFL_EXTENTS;
5983         if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5984                 lockflags |= SFL_CATALOG;
5985         } else if (fileID == kHFSAttributesFileID) {
5986                 lockflags |= SFL_ATTRIBUTE;
5987         } else if (fileID == kHFSStartupFileID) {
5988                 lockflags |= SFL_STARTUP;
5989         }
5990         extent_info->lockflags = lockflags;
5991         extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5992
5993         /* Flush data associated with current file on disk.
5994          *
5995          * If the current vnode is directory hard link, no flushing of
5996          * journal or vnode is required.  The current kernel does not
5997          * modify data/resource fork of directory hard links, so nothing
5998          * will be in the cache.  If a directory hard link is newly created,
5999          * the resource fork data is written directly using devvp and
6000          * the code that actually relocates data (hfs_copy_extent()) also
6001          * uses devvp for its I/O --- so they will see a consistent copy.
6002          */
6003         if (extent_info->is_sysfile) {
6004                 /* If the current vnode is system vnode, flush journal
6005                  * to make sure that all data is written to the disk.
6006                  */
6007                 error = hfs_journal_flush(hfsmp, TRUE);
6008                 if (error) {
6009                         printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
6010                         goto out;
6011                 }
6012         } else if (extent_info->is_dirlink == false) {
6013                 /* Flush all blocks associated with this regular file vnode.
6014                  * Normally there should not be buffer cache blocks for regular
6015                  * files, but for objects like symlinks, we can have buffer cache
6016                  * blocks associated with the vnode.  Therefore we call
6017                  * buf_flushdirtyblks() also.
6018                  */
6019                 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
6020
6021                 hfs_unlock(cp);
6022                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
6023                 took_truncate_lock = true;
6024                 (void) cluster_push(vp, 0);
6025                 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
6026                 if (error) {
6027                         goto out;
6028                 }
6029
6030                 /* If the file no longer exists, nothing left to do */
6031                 if (cp->c_flag & C_NOEXISTS) {
6032                         error = 0;
6033                         goto out;
6034                 }
6035
6036                 /* Wait for any in-progress writes to this vnode to complete, so that we'll
6037                  * be copying consistent bits.  (Otherwise, it's possible that an async
6038                  * write will complete to the old extent after we read from it.  That
6039                  * could lead to corruption.)
6040                  */
6041                 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
6042                 if (error) {
6043                         goto out;
6044                 }
6045         }
6046
6047         if (hfs_resize_debug) {
6048                 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
6049         }
6050
6051         if (extent_info->is_dirlink) {
6052                 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
6053                                 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
6054                 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
6055                                 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
6056                 MALLOC(extent_info->dirlink_fork, struct filefork *,
6057                                 sizeof(struct filefork), M_TEMP, M_WAITOK);
6058                 if ((extent_info->dirlink_desc == NULL) ||
6059                     (extent_info->dirlink_attr == NULL) ||
6060                     (extent_info->dirlink_fork == NULL)) {
6061                         error = ENOMEM;
6062                         goto out;
6063                 }
6064
6065                 /* Lookup catalog record for directory hard link and
6066                  * create a fake filefork for the value looked up from
6067                  * the disk.
6068                  */
6069                 fp = extent_info->dirlink_fork;
6070                 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
6071                 extent_info->dirlink_fork->ff_cp = cp;
6072                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6073                 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
6074                                 extent_info->dirlink_desc, extent_info->dirlink_attr,
6075                                 &(extent_info->dirlink_fork->ff_data));
6076                 hfs_systemfile_unlock(hfsmp, lockflags);
6077                 if (error) {
6078                         printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
6079                         goto out;
6080                 }
6081                 release_desc = true;
6082         } else {
6083                 fp = VTOF(vp);
6084         }
6085
6086         extent_info->catalog_fp = fp;
6087         extent_info->recStartBlock = 0;
6088         extent_info->extents = extent_info->catalog_fp->ff_extents;
6089         /* Relocate extents from the catalog record */
6090         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6091                 if (fp->ff_extents[i].blockCount == 0) {
6092                         break;
6093                 }
6094                 extent_info->extent_index = i;
6095                 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6096                 if (error) {
6097                         printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
6098                         goto out;
6099                 }
6100         }
6101
6102         /* If the number of allocation blocks processed for reclaiming
6103          * are less than total number of blocks for the file, continuing
6104          * working on overflow extents record.
6105          */
6106         if (fp->ff_blocks <= extent_info->cur_blockCount) {
6107                 if (0 && hfs_resize_debug) {
6108                         printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6109                 }
6110                 goto out;
6111         }
6112
6113         if (hfs_resize_debug) {
6114                 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6115         }
6116
6117         MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6118         if (extent_info->iterator == NULL) {
6119                 error = ENOMEM;
6120                 goto out;
6121         }
6122         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6123         key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
6124         key->keyLength = kHFSPlusExtentKeyMaximumLength;
6125         key->forkType = forktype;
6126         key->fileID = fileID;
6127         key->startBlock = extent_info->cur_blockCount;
6128
6129         extent_info->btdata.bufferAddress = extent_info->record.overflow;
6130         extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
6131         extent_info->btdata.itemCount = 1;
6132
6133         extent_info->catalog_fp = NULL;
6134
6135         /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
6136         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6137         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6138                         &(extent_info->btdata), &(extent_info->recordlen),
6139                         extent_info->iterator);
6140         hfs_systemfile_unlock(hfsmp, lockflags);
6141         while (error == 0) {
6142                 extent_info->overflow_count++;
6143                 extent_info->recStartBlock = key->startBlock;
6144                 extent_info->extents = extent_info->record.overflow;
6145                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6146                         if (extent_info->record.overflow[i].blockCount == 0) {
6147                                 goto out;
6148                         }
6149                         extent_info->extent_index = i;
6150                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6151                         if (error) {
6152                                 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
6153                                 goto out;
6154                         }
6155                 }
6156
6157                 /* Look for more overflow records */
6158                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6159                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6160                                 extent_info->iterator, &(extent_info->btdata),
6161                                 &(extent_info->recordlen));
6162                 hfs_systemfile_unlock(hfsmp, lockflags);
6163                 if (error) {
6164                         break;
6165                 }
6166                 /* Stop when we encounter a different file or fork. */
6167                 if ((key->fileID != fileID) || (key->forkType != forktype)) {
6168                         break;
6169                 }
6170         }
6171         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6172                 error = 0;
6173         }
6174
6175 out:
6176         /* If any blocks were relocated, account them and report progress */
6177         if (extent_info->blocks_relocated) {
6178                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6179                 hfs_truncatefs_progress(hfsmp);
6180                 if (fileID < kHFSFirstUserCatalogNodeID) {
6181                         printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
6182                                         extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
6183                 }
6184         }
6185         if (extent_info->iterator) {
6186                 FREE(extent_info->iterator, M_TEMP);
6187         }
6188         if (release_desc == true) {
6189                 cat_releasedesc(extent_info->dirlink_desc);
6190         }
6191         if (extent_info->dirlink_desc) {
6192                 FREE(extent_info->dirlink_desc, M_TEMP);
6193         }
6194         if (extent_info->dirlink_attr) {
6195                 FREE(extent_info->dirlink_attr, M_TEMP);
6196         }
6197         if (extent_info->dirlink_fork) {
6198                 FREE(extent_info->dirlink_fork, M_TEMP);
6199         }
6200         if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
6201                 (void) hfs_update(vp, MNT_WAIT);
6202         }
6203         if (took_truncate_lock) {
6204                 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
6205         }
6206         if (extent_info) {
6207                 FREE(extent_info, M_TEMP);
6208         }
6209         if (hfs_resize_debug) {
6210                 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
6211         }
6212
6213         return error;
6214 }
6215
6216
6217 /*
6218  * This journal_relocate callback updates the journal info block to point
6219  * at the new journal location.  This write must NOT be done using the
6220  * transaction.  We must write the block immediately.  We must also force
6221  * it to get to the media so that the new journal location will be seen by
6222  * the replay code before we can safely let journaled blocks be written
6223  * to their normal locations.
6224  *
6225  * The tests for journal_uses_fua below are mildly hacky.  Since the journal
6226  * and the file system are both on the same device, I'm leveraging what
6227  * the journal has decided about FUA.
6228  */
6229 struct hfs_journal_relocate_args {
6230         struct hfsmount *hfsmp;
6231         vfs_context_t context;
6232         u_int32_t newStartBlock;
6233         u_int32_t newBlockCount;
6234 };
6235
6236 static errno_t
6237 hfs_journal_relocate_callback(void *_args)
6238 {
6239         int error;
6240         struct hfs_journal_relocate_args *args = _args;
6241         struct hfsmount *hfsmp = args->hfsmp;
6242         buf_t bp;
6243         JournalInfoBlock *jibp;
6244
6245         error = buf_meta_bread(hfsmp->hfs_devvp,
6246                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6247                 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
6248         if (error) {
6249                 printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
6250                 if (bp) {
6251                         buf_brelse(bp);
6252                 }
6253                 return error;
6254         }
6255         jibp = (JournalInfoBlock*) buf_dataptr(bp);
6256         jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
6257         jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
6258         if (journal_uses_fua(hfsmp->jnl))
6259                 buf_markfua(bp);
6260         error = buf_bwrite(bp);
6261         if (error) {
6262                 printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
6263                 return error;
6264         }
6265         if (!journal_uses_fua(hfsmp->jnl)) {
6266                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
6267                 if (error) {
6268                         printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6269                         error = 0;              /* Don't fail the operation. */
6270                 }
6271         }
6272
6273         return error;
6274 }
6275
6276
6277 /* Type of resize operation in progress */
6278 #define HFS_RESIZE_TRUNCATE     1
6279 #define HFS_RESIZE_EXTEND       2
6280
6281 /*
6282  * Core function to relocate the journal file.  This function takes the
6283  * journal size of the newly relocated journal --- the caller can
6284  * provide a new journal size if they want to change the size of
6285  * the journal.  The function takes care of updating the journal info
6286  * block and all other data structures correctly.
6287  *
6288  * Note: This function starts a transaction and grabs the btree locks.
6289  */
6290 static int
6291 hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
6292 {
6293         int error;
6294         int journal_err;
6295         int lockflags;
6296         u_int32_t oldStartBlock;
6297         u_int32_t newStartBlock;
6298         u_int32_t oldBlockCount;
6299         u_int32_t newBlockCount;
6300         u_int32_t jnlBlockCount;
6301         u_int32_t alloc_skipfreeblks;
6302         struct cat_desc journal_desc;
6303         struct cat_attr journal_attr;
6304         struct cat_fork journal_fork;
6305         struct hfs_journal_relocate_args callback_args;
6306
6307         /* Calculate the number of allocation blocks required for the journal */
6308         jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
6309
6310         /*
6311          * During truncatefs(), the volume free block count is updated
6312          * before relocating data and reflects the total number of free
6313          * blocks that will exist on volume after the resize is successful.
6314          * This means that the allocation blocks required for relocation
6315          * have already been reserved and accounted for in the free block
6316          * count.  Therefore, block allocation and deallocation routines
6317          * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS
6318          * flag.
6319          *
6320          * This special handling is not required when the file system
6321          * is being extended as we want all the allocated and deallocated
6322          * blocks to be accounted for correctly.
6323          */
6324         if (resize_type == HFS_RESIZE_TRUNCATE) {
6325                 alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
6326         } else {
6327                 alloc_skipfreeblks = 0;
6328         }
6329
6330         error = hfs_start_transaction(hfsmp);
6331         if (error) {
6332                 printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
6333                 return error;
6334         }
6335         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6336
6337         error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount,
6338                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks,
6339                          &newStartBlock, &newBlockCount);
6340         if (error) {
6341                 printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
6342                 goto fail;
6343         }
6344         if (newBlockCount != jnlBlockCount) {
6345                 printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
6346                 goto free_fail;
6347         }
6348
6349         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork);
6350         if (error) {
6351                 printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
6352                 goto free_fail;
6353         }
6354
6355         oldStartBlock = journal_fork.cf_extents[0].startBlock;
6356         oldBlockCount = journal_fork.cf_extents[0].blockCount;
6357         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
6358         if (error) {
6359                 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6360                 goto free_fail;
6361         }
6362
6363         /* Update the catalog record for .journal */
6364         journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
6365         journal_fork.cf_extents[0].startBlock = newStartBlock;
6366         journal_fork.cf_extents[0].blockCount = newBlockCount;
6367         journal_fork.cf_blocks = newBlockCount;
6368         error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
6369         cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
6370         if (error) {
6371                 printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
6372                 goto free_fail;
6373         }
6374
6375         /*
6376          * If the journal is part of the file system, then tell the journal
6377          * code about the new location.  If the journal is on an external
6378          * device, then just keep using it as-is.
6379          */
6380         if (hfsmp->jvp == hfsmp->hfs_devvp) {
6381                 callback_args.hfsmp = hfsmp;
6382                 callback_args.context = context;
6383                 callback_args.newStartBlock = newStartBlock;
6384                 callback_args.newBlockCount = newBlockCount;
6385
6386                 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
6387                         (off_t)newBlockCount*hfsmp->blockSize, 0,
6388                         hfs_journal_relocate_callback, &callback_args);
6389                 if (error) {
6390                         /* NOTE: journal_relocate will mark the journal invalid. */
6391                         printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
6392                         goto fail;
6393                 }
6394                 if (hfs_resize_debug) {
6395                         printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
6396                 }
6397                 hfsmp->jnl_start = newStartBlock;
6398                 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
6399         }
6400
6401         hfs_systemfile_unlock(hfsmp, lockflags);
6402         error = hfs_end_transaction(hfsmp);
6403         if (error) {
6404                 printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
6405         }
6406
6407         return error;
6408
6409 free_fail:
6410         journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
6411         if (journal_err) {
6412                 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6413                 hfs_mark_volume_inconsistent(hfsmp);
6414         }
6415 fail:
6416         hfs_systemfile_unlock(hfsmp, lockflags);
6417         (void) hfs_end_transaction(hfsmp);
6418         if (hfs_resize_debug) {
6419                 printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
6420         }
6421         return error;
6422 }
6423
6424
6425 /*
6426  * Relocate the journal file when the file system is being truncated.
6427  * We do not down-size the journal when the file system size is
6428  * reduced, so we always provide the current journal size to the
6429  * relocate code.
6430  */
6431 static int
6432 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6433 {
6434         int error = 0;
6435         u_int32_t startBlock;
6436         u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6437
6438         /*
6439          * Figure out the location of the .journal file.  When the journal
6440          * is on an external device, we need to look up the .journal file.
6441          */
6442         if (hfsmp->jvp == hfsmp->hfs_devvp) {
6443                 startBlock = hfsmp->jnl_start;
6444                 blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6445         } else {
6446                 u_int32_t fileid;
6447                 u_int32_t old_jnlfileid;
6448                 struct cat_attr attr;
6449                 struct cat_fork fork;
6450
6451                 /*
6452                  * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
6453                  * is set, and it is trying to hide the .journal file.  So temporarily
6454                  * unset the field while calling GetFileInfo.
6455                  */
6456                 old_jnlfileid = hfsmp->hfs_jnlfileid;
6457                 hfsmp->hfs_jnlfileid = 0;
6458                 fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
6459                 hfsmp->hfs_jnlfileid = old_jnlfileid;
6460                 if (fileid != old_jnlfileid) {
6461                         printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
6462                         return EIO;
6463                 }
6464
6465                 startBlock = fork.cf_extents[0].startBlock;
6466                 blockCount = fork.cf_extents[0].blockCount;
6467         }
6468
6469         if (startBlock + blockCount <= allocLimit) {
6470                 /* The journal file does not require relocation */
6471                 return 0;
6472         }
6473
6474         error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
6475         if (error == 0) {
6476                 hfsmp->hfs_resize_blocksmoved += blockCount;
6477                 hfs_truncatefs_progress(hfsmp);
6478                 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
6479                                 blockCount, hfsmp->vcbVN);
6480         }
6481
6482         return error;
6483 }
6484
6485
6486 /*
6487  * Move the journal info block to a new location.  We have to make sure the
6488  * new copy of the journal info block gets to the media first, then change
6489  * the field in the volume header and the catalog record.
6490  */
6491 static int
6492 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6493 {
6494         int error;
6495         int journal_err;
6496         int lockflags;
6497         u_int32_t oldBlock;
6498         u_int32_t newBlock;
6499         u_int32_t blockCount;
6500         struct cat_desc jib_desc;
6501         struct cat_attr jib_attr;
6502         struct cat_fork jib_fork;
6503         buf_t old_bp, new_bp;
6504
6505         if (hfsmp->vcbJinfoBlock <= allocLimit) {
6506                 /* The journal info block does not require relocation */
6507                 return 0;
6508         }
6509
6510         error = hfs_start_transaction(hfsmp);
6511         if (error) {
6512                 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
6513                 return error;
6514         }
6515         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6516
6517         error = BlockAllocate(hfsmp, 1, 1, 1,
6518                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN,
6519                         &newBlock, &blockCount);
6520         if (error) {
6521                 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
6522                 goto fail;
6523         }
6524         if (blockCount != 1) {
6525                 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
6526                 goto free_fail;
6527         }
6528
6529         /* Copy the old journal info block content to the new location */
6530         error = buf_meta_bread(hfsmp->hfs_devvp,
6531                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6532                 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
6533         if (error) {
6534                 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
6535                 if (old_bp) {
6536                         buf_brelse(old_bp);
6537                 }
6538                 goto free_fail;
6539         }
6540         new_bp = buf_getblk(hfsmp->hfs_devvp,
6541                 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6542                 hfsmp->blockSize, 0, 0, BLK_META);
6543         bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
6544         buf_brelse(old_bp);
6545         if (journal_uses_fua(hfsmp->jnl))
6546                 buf_markfua(new_bp);
6547         error = buf_bwrite(new_bp);
6548         if (error) {
6549                 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
6550                 goto free_fail;
6551         }
6552         if (!journal_uses_fua(hfsmp->jnl)) {
6553                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
6554                 if (error) {
6555                         printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6556                         /* Don't fail the operation. */
6557                 }
6558         }
6559
6560         /* Deallocate the old block once the new one has the new valid content */
6561         error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
6562         if (error) {
6563                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6564                 goto free_fail;
6565         }
6566
6567
6568         /* Update the catalog record for .journal_info_block */
6569         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork);
6570         if (error) {
6571                 printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
6572                 goto fail;
6573         }
6574         oldBlock = jib_fork.cf_extents[0].startBlock;
6575         jib_fork.cf_size = hfsmp->blockSize;
6576         jib_fork.cf_extents[0].startBlock = newBlock;
6577         jib_fork.cf_extents[0].blockCount = 1;
6578         jib_fork.cf_blocks = 1;
6579         error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
6580         cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
6581         if (error) {
6582                 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6583                 goto fail;
6584         }
6585
6586         /* Update the pointer to the journal info block in the volume header. */
6587         hfsmp->vcbJinfoBlock = newBlock;
6588         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6589         if (error) {
6590                 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6591                 goto fail;
6592         }
6593         hfs_systemfile_unlock(hfsmp, lockflags);
6594         error = hfs_end_transaction(hfsmp);
6595         if (error) {
6596                 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6597         }
6598         error = hfs_journal_flush(hfsmp, FALSE);
6599         if (error) {
6600                 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6601         }
6602
6603         /* Account for the block relocated and print progress */
6604         hfsmp->hfs_resize_blocksmoved += 1;
6605         hfs_truncatefs_progress(hfsmp);
6606         if (!error) {
6607                 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6608                                 hfsmp->vcbVN);
6609                 if (hfs_resize_debug) {
6610                         printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6611                 }
6612         }
6613         return error;
6614
6615 free_fail:
6616         journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6617         if (journal_err) {
6618                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6619                 hfs_mark_volume_inconsistent(hfsmp);
6620         }
6621
6622 fail:
6623         hfs_systemfile_unlock(hfsmp, lockflags);
6624         (void) hfs_end_transaction(hfsmp);
6625         if (hfs_resize_debug) {
6626                 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6627         }
6628         return error;
6629 }
6630
6631
6632 static u_int64_t
6633 calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count)
6634 {
6635         u_int64_t journal_size;
6636         u_int32_t journal_scale;
6637
6638 #define DEFAULT_JOURNAL_SIZE (8*1024*1024)
6639 #define MAX_JOURNAL_SIZE     (512*1024*1024)
6640
6641         /* Calculate the journal size for this volume.   We want
6642          * at least 8 MB of journal for each 100 GB of disk space.
6643          * We cap the size at 512 MB, unless the allocation block
6644          * size is larger, in which case, we use one allocation
6645          * block.
6646          */
6647         journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
6648         journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
6649         if (journal_size > MAX_JOURNAL_SIZE) {
6650                 journal_size = MAX_JOURNAL_SIZE;
6651         }
6652         if (journal_size < hfsmp->blockSize) {
6653                 journal_size = hfsmp->blockSize;
6654         }
6655         return journal_size;
6656 }
6657
6658
6659 /*
6660  * Calculate the expected journal size based on current partition size.
6661  * If the size of the current journal is less than the calculated size,
6662  * force journal relocation with the new journal size.
6663  */
6664 static int
6665 hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
6666 {
6667         int error = 0;
6668         u_int64_t calc_journal_size;
6669
6670         if (hfsmp->jvp != hfsmp->hfs_devvp) {
6671                 if (hfs_resize_debug) {
6672                         printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
6673                 }
6674                 return 0;
6675         }
6676
6677         calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
6678         if (calc_journal_size <= hfsmp->jnl_size) {
6679                 /* The journal size requires no modification */
6680                 goto out;
6681         }
6682
6683         if (hfs_resize_debug) {
6684                 printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
6685         }
6686
6687         /* Extend the journal to the new calculated size */
6688         error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
6689         if (error == 0) {
6690                 printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n",
6691                                 hfsmp->jnl_size, hfsmp->vcbVN);
6692         }
6693 out:
6694         return error;
6695 }
6696
6697
6698 /*
6699  * This function traverses through all extended attribute records for a given
6700  * fileID, and calls function that reclaims data blocks that exist in the
6701  * area of the disk being reclaimed which in turn is responsible for allocating
6702  * new space, copying extent data, deallocating new space, and if required,
6703  * splitting the extent.
6704  *
6705  * Note: The caller has already acquired the cnode lock on the file.  Therefore
6706  * we are assured that no other thread would be creating/deleting/modifying
6707  * extended attributes for this file.
6708  *
6709  * Side Effects:
6710  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6711  * blocks that were relocated.
6712  *
6713  * Returns:
6714  *      0 on success, non-zero on failure.
6715  */
6716 static int
6717 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6718 {
6719         int error = 0;
6720         struct hfs_reclaim_extent_info *extent_info;
6721         int i;
6722         HFSPlusAttrKey *key;
6723         int *lockflags;
6724
6725         if (hfs_resize_debug) {
6726                 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6727         }
6728
6729         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6730                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6731         if (extent_info == NULL) {
6732                 return ENOMEM;
6733         }
6734         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6735         extent_info->vp = vp;
6736         extent_info->fileID = fileID;
6737         extent_info->is_xattr = true;
6738         extent_info->is_sysfile = vnode_issystem(vp);
6739         extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6740         lockflags = &(extent_info->lockflags);
6741         *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6742
6743         /* Initialize iterator from the extent_info structure */
6744         MALLOC(extent_info->iterator, struct BTreeIterator *,
6745                sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6746         if (extent_info->iterator == NULL) {
6747                 error = ENOMEM;
6748                 goto out;
6749         }
6750         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6751
6752         /* Build attribute key */
6753         key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6754         error = hfs_buildattrkey(fileID, NULL, key);
6755         if (error) {
6756                 goto out;
6757         }
6758
6759         /* Initialize btdata from extent_info structure.  Note that the
6760          * buffer pointer actually points to the xattr record from the
6761          * extent_info structure itself.
6762          */
6763         extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6764         extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6765         extent_info->btdata.itemCount = 1;
6766
6767         /*
6768          * Sync all extent-based attribute data to the disk.
6769          *
6770          * All extent-based attribute data I/O is performed via cluster
6771          * I/O using a virtual file that spans across entire file system
6772          * space.
6773          */
6774         hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
6775         (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6776         error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6777         hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT);
6778         if (error) {
6779                 goto out;
6780         }
6781
6782         /* Search for extended attribute for current file.  This
6783          * will place the iterator before the first matching record.
6784          */
6785         *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6786         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6787                         &(extent_info->btdata), &(extent_info->recordlen),
6788                         extent_info->iterator);
6789         hfs_systemfile_unlock(hfsmp, *lockflags);
6790         if (error) {
6791                 if (error != btNotFound) {
6792                         goto out;
6793                 }
6794                 /* btNotFound is expected here, so just mask it */
6795                 error = 0;
6796         }
6797
6798         while (1) {
6799                 /* Iterate to the next record */
6800                 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6801                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6802                                 extent_info->iterator, &(extent_info->btdata),
6803                                 &(extent_info->recordlen));
6804                 hfs_systemfile_unlock(hfsmp, *lockflags);
6805
6806                 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6807                 if (error || key->fileID != fileID) {
6808                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6809                                 error = 0;
6810                         }
6811                         break;
6812                 }
6813
6814                 /* We only care about extent-based EAs */
6815                 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6816                     (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6817                         continue;
6818                 }
6819
6820                 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6821                         extent_info->overflow_count = 0;
6822                         extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6823                 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6824                         extent_info->overflow_count++;
6825                         extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6826                 }
6827
6828                 extent_info->recStartBlock = key->startBlock;
6829                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6830                         if (extent_info->extents[i].blockCount == 0) {
6831                                 break;
6832                         }
6833                         extent_info->extent_index = i;
6834                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6835                         if (error) {
6836                                 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6837                                 goto out;
6838                         }
6839                 }
6840         }
6841
6842 out:
6843         /* If any blocks were relocated, account them and report progress */
6844         if (extent_info->blocks_relocated) {
6845                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6846                 hfs_truncatefs_progress(hfsmp);
6847         }
6848         if (extent_info->iterator) {
6849                 FREE(extent_info->iterator, M_TEMP);
6850         }
6851         if (extent_info) {
6852                 FREE(extent_info, M_TEMP);
6853         }
6854         if (hfs_resize_debug) {
6855                 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6856         }
6857         return error;
6858 }
6859
6860 /*
6861  * Reclaim any extent-based extended attributes allocation blocks from
6862  * the area of the disk that is being truncated.
6863  *
6864  * The function traverses the attribute btree to find out the fileIDs
6865  * of the extended attributes that need to be relocated.  For every
6866  * file whose large EA requires relocation, it looks up the cnode and
6867  * calls hfs_reclaim_xattr() to do all the work for allocating
6868  * new space, copying data, deallocating old space, and if required,
6869  * splitting the extents.
6870  *
6871  * Inputs:
6872  *      allocLimit    - starting block of the area being reclaimed
6873  *
6874  * Returns:
6875  *      returns 0 on success, non-zero on failure.
6876  */
6877 static int
6878 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6879 {
6880         int error = 0;
6881         FCB *fcb;
6882         struct BTreeIterator *iterator = NULL;
6883         struct FSBufferDescriptor btdata;
6884         HFSPlusAttrKey *key;
6885         HFSPlusAttrRecord rec;
6886         int lockflags = 0;
6887         cnid_t prev_fileid = 0;
6888         struct vnode *vp;
6889         int need_relocate;
6890         int btree_operation;
6891         u_int32_t files_moved = 0;
6892         u_int32_t prev_blocksmoved;
6893         int i;
6894
6895         fcb = VTOF(hfsmp->hfs_attribute_vp);
6896         /* Store the value to print total blocks moved by this function in end */
6897         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6898
6899         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6900                 return ENOMEM;
6901         }
6902         bzero(iterator, sizeof(*iterator));
6903         key = (HFSPlusAttrKey *)&iterator->key;
6904         btdata.bufferAddress = &rec;
6905         btdata.itemSize = sizeof(rec);
6906         btdata.itemCount = 1;
6907
6908         need_relocate = false;
6909         btree_operation = kBTreeFirstRecord;
6910         /* Traverse the attribute btree to find extent-based EAs to reclaim */
6911         while (1) {
6912                 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6913                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6914                 hfs_systemfile_unlock(hfsmp, lockflags);
6915                 if (error) {
6916                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6917                                 error = 0;
6918                         }
6919                         break;
6920                 }
6921                 btree_operation = kBTreeNextRecord;
6922
6923                 /* If the extents of current fileID were already relocated, skip it */
6924                 if (prev_fileid == key->fileID) {
6925                         continue;
6926                 }
6927
6928                 /* Check if any of the extents in the current record need to be relocated */
6929                 need_relocate = false;
6930                 switch(rec.recordType) {
6931                         case kHFSPlusAttrForkData:
6932                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6933                                         if (rec.forkData.theFork.extents[i].blockCount == 0) {
6934                                                 break;
6935                                         }
6936                                         if ((rec.forkData.theFork.extents[i].startBlock +
6937                                              rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6938                                                 need_relocate = true;
6939                                                 break;
6940                                         }
6941                                 }
6942                                 break;
6943
6944                         case kHFSPlusAttrExtents:
6945                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6946                                         if (rec.overflowExtents.extents[i].blockCount == 0) {
6947                                                 break;
6948                                         }
6949                                         if ((rec.overflowExtents.extents[i].startBlock +
6950                                              rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6951                                                 need_relocate = true;
6952                                                 break;
6953                                         }
6954                                 }
6955                                 break;
6956                 };
6957
6958                 /* Continue iterating to next attribute record */
6959                 if (need_relocate == false) {
6960                         continue;
6961                 }
6962
6963                 /* Look up the vnode for corresponding file.  The cnode
6964                  * will be locked which will ensure that no one modifies
6965                  * the xattrs when we are relocating them.
6966                  *
6967                  * We want to allow open-unlinked files to be moved,
6968                  * so provide allow_deleted == 1 for hfs_vget().
6969                  */
6970                 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6971                         continue;
6972                 }
6973
6974                 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6975                 hfs_unlock(VTOC(vp));
6976                 vnode_put(vp);
6977                 if (error) {
6978                         printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6979                         break;
6980                 }
6981                 prev_fileid = key->fileID;
6982                 files_moved++;
6983         }
6984
6985         if (files_moved) {
6986                 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6987                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6988                                 files_moved, hfsmp->vcbVN);
6989         }
6990
6991         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6992         return error;
6993 }
6994
6995 /*
6996  * Reclaim blocks from regular files.
6997  *
6998  * This function iterates over all the record in catalog btree looking
6999  * for files with extents that overlap into the space we're trying to
7000  * free up.  If a file extent requires relocation, it looks up the vnode
7001  * and calls function to relocate the data.
7002  *
7003  * Returns:
7004  *      Zero on success, non-zero on failure.
7005  */
7006 static int
7007 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
7008 {
7009         int error;
7010         FCB *fcb;
7011         struct BTreeIterator *iterator = NULL;
7012         struct FSBufferDescriptor btdata;
7013         int btree_operation;
7014         int lockflags;
7015         struct HFSPlusCatalogFile filerec;
7016         struct vnode *vp;
7017         struct vnode *rvp;
7018         struct filefork *datafork;
7019         u_int32_t files_moved = 0;
7020         u_int32_t prev_blocksmoved;
7021
7022 #if CONFIG_PROTECT
7023         int keys_generated = 0;
7024 #endif
7025
7026         fcb = VTOF(hfsmp->hfs_catalog_vp);
7027         /* Store the value to print total blocks moved by this function at the end */
7028         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
7029
7030         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
7031                 error = ENOMEM;
7032                 goto reclaim_filespace_done;
7033         }
7034
7035 #if CONFIG_PROTECT
7036         /*
7037          * For content-protected filesystems, we may need to relocate files that
7038          * are encrypted.  If they use the new-style offset-based IVs, then
7039          * we can move them regardless of the lock state.  We create a temporary
7040          * key here that we use to read/write the data, then we discard it at the
7041          * end of the function.
7042          */
7043         if (cp_fs_protected (hfsmp->hfs_mp)) {
7044                 int needs = 0;
7045                 error = cp_needs_tempkeys(hfsmp, &needs);
7046
7047                 if ((error == 0) && (needs)) {
7048                         error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
7049                         if (error == 0) {
7050                                 keys_generated = 1;
7051                         }
7052                 }
7053
7054                 if (error) {
7055                         printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
7056                         goto reclaim_filespace_done;
7057                 }
7058         }
7059
7060 #endif
7061
7062         bzero(iterator, sizeof(*iterator));
7063
7064         btdata.bufferAddress = &filerec;
7065         btdata.itemSize = sizeof(filerec);
7066         btdata.itemCount = 1;
7067
7068         btree_operation = kBTreeFirstRecord;
7069         while (1) {
7070                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
7071                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
7072                 hfs_systemfile_unlock(hfsmp, lockflags);
7073                 if (error) {
7074                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
7075                                 error = 0;
7076                         }
7077                         break;
7078                 }
7079                 btree_operation = kBTreeNextRecord;
7080
7081                 if (filerec.recordType != kHFSPlusFileRecord) {
7082                         continue;
7083                 }
7084
7085                 /* Check if any of the extents require relocation */
7086                 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
7087                         continue;
7088                 }
7089
7090                 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
7091                 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
7092                         if (hfs_resize_debug) {
7093                                 printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
7094                         }
7095                         continue;
7096                 }
7097
7098                 /* If data fork exists or item is a directory hard link, relocate blocks */
7099                 datafork = VTOF(vp);
7100                 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
7101                         error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
7102                                         kHFSDataForkType, allocLimit, context);
7103                         if (error)  {
7104                                 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7105                                 hfs_unlock(VTOC(vp));
7106                                 vnode_put(vp);
7107                                 break;
7108                         }
7109                 }
7110
7111                 /* If resource fork exists or item is a directory hard link, relocate blocks */
7112                 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
7113                         if (vnode_isdir(vp)) {
7114                                 /* Resource fork vnode lookup is invalid for directory hard link.
7115                                  * So we fake data fork vnode as resource fork vnode.
7116                                  */
7117                                 rvp = vp;
7118                         } else {
7119                                 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
7120                                 if (error) {
7121                                         printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
7122                                         hfs_unlock(VTOC(vp));
7123                                         vnode_put(vp);
7124                                         break;
7125                                 }
7126                                 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
7127                         }
7128
7129                         error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
7130                                         kHFSResourceForkType, allocLimit, context);
7131                         if (error) {
7132                                 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7133                                 hfs_unlock(VTOC(vp));
7134                                 vnode_put(vp);
7135                                 break;
7136                         }
7137                 }
7138
7139                 /* The file forks were relocated successfully, now drop the
7140                  * cnode lock and vnode reference, and continue iterating to
7141                  * next catalog record.
7142                  */
7143                 hfs_unlock(VTOC(vp));
7144                 vnode_put(vp);
7145                 files_moved++;
7146         }
7147
7148         if (files_moved) {
7149                 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
7150                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
7151                                 files_moved, hfsmp->vcbVN);
7152         }
7153
7154 reclaim_filespace_done:
7155         if (iterator) {
7156                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7157         }
7158
7159 #if CONFIG_PROTECT
7160         if (keys_generated) {
7161                 cp_entry_destroy(hfsmp->hfs_resize_cpentry);
7162                 hfsmp->hfs_resize_cpentry = NULL;
7163         }
7164 #endif
7165         return error;
7166 }
7167
7168 /*
7169  * Reclaim space at the end of a file system.
7170  *
7171  * Inputs -
7172  *      allocLimit      - start block of the space being reclaimed
7173  *      reclaimblks     - number of allocation blocks to reclaim
7174  */
7175 static int
7176 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
7177 {
7178         int error = 0;
7179
7180         /*
7181          * Preflight the bitmap to find out total number of blocks that need
7182          * relocation.
7183          *
7184          * Note: Since allocLimit is set to the location of new alternate volume
7185          * header, the check below does not account for blocks allocated for old
7186          * alternate volume header.
7187          */
7188         error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
7189         if (error) {
7190                 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
7191                 return error;
7192         }
7193         if (hfs_resize_debug) {
7194                 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
7195         }
7196
7197         /* Just to be safe, sync the content of the journal to the disk before we proceed */
7198         hfs_journal_flush(hfsmp, TRUE);
7199
7200         /* First, relocate journal file blocks if they're in the way.
7201          * Doing this first will make sure that journal relocate code
7202          * gets access to contiguous blocks on disk first.  The journal
7203          * file has to be contiguous on the disk, otherwise resize will
7204          * fail.
7205          */
7206         error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
7207         if (error) {
7208                 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
7209                 return error;
7210         }
7211
7212         /* Relocate journal info block blocks if they're in the way. */
7213         error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
7214         if (error) {
7215                 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
7216                 return error;
7217         }
7218
7219         /* Relocate extents of the Extents B-tree if they're in the way.
7220          * Relocating extents btree before other btrees is important as
7221          * this will provide access to largest contiguous block range on
7222          * the disk for relocating extents btree.  Note that extents btree
7223          * can only have maximum of 8 extents.
7224          */
7225         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
7226                         kHFSDataForkType, allocLimit, context);
7227         if (error) {
7228                 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
7229                 return error;
7230         }
7231
7232         /* Relocate extents of the Allocation file if they're in the way. */
7233         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
7234                         kHFSDataForkType, allocLimit, context);
7235         if (error) {
7236                 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
7237                 return error;
7238         }
7239
7240         /* Relocate extents of the Catalog B-tree if they're in the way. */
7241         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
7242                         kHFSDataForkType, allocLimit, context);
7243         if (error) {
7244                 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
7245                 return error;
7246         }
7247
7248         /* Relocate extents of the Attributes B-tree if they're in the way. */
7249         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
7250                         kHFSDataForkType, allocLimit, context);
7251         if (error) {
7252                 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
7253                 return error;
7254         }
7255
7256         /* Relocate extents of the Startup File if there is one and they're in the way. */
7257         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
7258                         kHFSDataForkType, allocLimit, context);
7259         if (error) {
7260                 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
7261                 return error;
7262         }
7263
7264         /*
7265          * We need to make sure the alternate volume header gets flushed if we moved
7266          * any extents in the volume header.  But we need to do that before
7267          * shrinking the size of the volume, or else the journal code will panic
7268          * with an invalid (too large) block number.
7269          *
7270          * Note that blks_moved will be set if ANY extent was moved, even
7271          * if it was just an overflow extent.  In this case, the journal_flush isn't
7272          * strictly required, but shouldn't hurt.
7273          */
7274         if (hfsmp->hfs_resize_blocksmoved) {
7275                 hfs_journal_flush(hfsmp, TRUE);
7276         }
7277
7278         /* Reclaim extents from catalog file records */
7279         error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
7280         if (error) {
7281                 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
7282                 return error;
7283         }
7284
7285         /* Reclaim extents from extent-based extended attributes, if any */
7286         error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
7287         if (error) {
7288                 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
7289                 return error;
7290         }
7291
7292         return error;
7293 }
7294
7295
7296 /*
7297  * Check if there are any extents (including overflow extents) that overlap
7298  * into the disk space that is being reclaimed.
7299  *
7300  * Output -
7301  *      true  - One of the extents need to be relocated
7302  *      false - No overflow extents need to be relocated, or there was an error
7303  */
7304 static int
7305 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
7306 {
7307         struct BTreeIterator * iterator = NULL;
7308         struct FSBufferDescriptor btdata;
7309         HFSPlusExtentRecord extrec;
7310         HFSPlusExtentKey *extkeyptr;
7311         FCB *fcb;
7312         int overlapped = false;
7313         int i, j;
7314         int error;
7315         int lockflags = 0;
7316         u_int32_t endblock;
7317
7318         /* Check if data fork overlaps the target space */
7319         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7320                 if (filerec->dataFork.extents[i].blockCount == 0) {
7321                         break;
7322                 }
7323                 endblock = filerec->dataFork.extents[i].startBlock +
7324                         filerec->dataFork.extents[i].blockCount;
7325                 if (endblock > allocLimit) {
7326                         overlapped = true;
7327                         goto out;
7328                 }
7329         }
7330
7331         /* Check if resource fork overlaps the target space */
7332         for (j = 0; j < kHFSPlusExtentDensity; ++j) {
7333                 if (filerec->resourceFork.extents[j].blockCount == 0) {
7334                         break;
7335                 }
7336                 endblock = filerec->resourceFork.extents[j].startBlock +
7337                         filerec->resourceFork.extents[j].blockCount;
7338                 if (endblock > allocLimit) {
7339                         overlapped = true;
7340                         goto out;
7341                 }
7342         }
7343
7344         /* Return back if there are no overflow extents for this file */
7345         if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
7346                 goto out;
7347         }
7348
7349         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
7350                 return 0;
7351         }
7352         bzero(iterator, sizeof(*iterator));
7353         extkeyptr = (HFSPlusExtentKey *)&iterator->key;
7354         extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
7355         extkeyptr->forkType = 0;
7356         extkeyptr->fileID = filerec->fileID;
7357         extkeyptr->startBlock = 0;
7358
7359         btdata.bufferAddress = &extrec;
7360         btdata.itemSize = sizeof(extrec);
7361         btdata.itemCount = 1;
7362
7363         fcb = VTOF(hfsmp->hfs_extents_vp);
7364
7365         lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
7366
7367         /* This will position the iterator just before the first overflow
7368          * extent record for given fileID.  It will always return btNotFound,
7369          * so we special case the error code.
7370          */
7371         error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
7372         if (error && (error != btNotFound)) {
7373                 goto out;
7374         }
7375
7376         /* BTIterateRecord() might return error if the btree is empty, and
7377          * therefore we return that the extent does not overflow to the caller
7378          */
7379         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7380         while (error == 0) {
7381                 /* Stop when we encounter a different file. */
7382                 if (extkeyptr->fileID != filerec->fileID) {
7383                         break;
7384                 }
7385                 /* Check if any of the forks exist in the target space. */
7386                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7387                         if (extrec[i].blockCount == 0) {
7388                                 break;
7389                         }
7390                         endblock = extrec[i].startBlock + extrec[i].blockCount;
7391                         if (endblock > allocLimit) {
7392                                 overlapped = true;
7393                                 goto out;
7394                         }
7395                 }
7396                 /* Look for more records. */
7397                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7398         }
7399
7400 out:
7401         if (lockflags) {
7402                 hfs_systemfile_unlock(hfsmp, lockflags);
7403         }
7404         if (iterator) {
7405                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7406         }
7407         return overlapped;
7408 }
7409
7410
7411 /*
7412  * Calculate the progress of a file system resize operation.
7413  */
7414 __private_extern__
7415 int
7416 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
7417 {
7418         if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
7419                 return (ENXIO);
7420         }
7421
7422         if (hfsmp->hfs_resize_totalblocks > 0) {
7423                 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
7424         } else {
7425                 *progress = 0;
7426         }
7427
7428         return (0);
7429 }
7430
7431
7432 /*
7433  * Creates a UUID from a unique "name" in the HFS UUID Name space.
7434  * See version 3 UUID.
7435  */
7436 static void
7437 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
7438 {
7439         MD5_CTX  md5c;
7440         uint8_t  rawUUID[8];
7441
7442         ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
7443         ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
7444
7445         MD5Init( &md5c );
7446         MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
7447         MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
7448         MD5Final( result, &md5c );
7449
7450         result[6] = 0x30 | ( result[6] & 0x0F );
7451         result[8] = 0x80 | ( result[8] & 0x3F );
7452 }
7453
7454 /*
7455  * Get file system attributes.
7456  */
7457 static int
7458 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7459 {
7460 #define HFS_ATTR_CMN_VALIDMASK ATTR_CMN_VALIDMASK
7461 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
7462 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_ACCTIME))
7463
7464         ExtendedVCB *vcb = VFSTOVCB(mp);
7465         struct hfsmount *hfsmp = VFSTOHFS(mp);
7466         u_int32_t freeCNIDs;
7467
7468         int searchfs_on = 0;
7469         int exchangedata_on = 1;
7470
7471 #if CONFIG_SEARCHFS
7472         searchfs_on = 1;
7473 #endif
7474
7475 #if CONFIG_PROTECT
7476         if (cp_fs_protected(mp)) {
7477                 exchangedata_on = 0;
7478         }
7479 #endif
7480
7481         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
7482
7483         VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
7484         VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
7485         VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
7486         VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
7487         VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
7488         VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
7489         VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
7490         VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
7491         VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
7492         /* XXX needs clarification */
7493         VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
7494         /* Maximum files is constrained by total blocks. */
7495         VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
7496         VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
7497
7498         fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
7499         fsap->f_fsid.val[1] = vfs_typenum(mp);
7500         VFSATTR_SET_SUPPORTED(fsap, f_fsid);
7501
7502         VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
7503         VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
7504
7505         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
7506                 vol_capabilities_attr_t *cap;
7507
7508                 cap = &fsap->f_capabilities;
7509
7510                 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
7511                         /* HFS+ & variants */
7512                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7513                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7514                                 VOL_CAP_FMT_SYMBOLICLINKS |
7515                                 VOL_CAP_FMT_HARDLINKS |
7516                                 VOL_CAP_FMT_JOURNAL |
7517                                 VOL_CAP_FMT_ZERO_RUNS |
7518                                 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
7519                                 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
7520                                 VOL_CAP_FMT_CASE_PRESERVING |
7521                                 VOL_CAP_FMT_FAST_STATFS |
7522                                 VOL_CAP_FMT_2TB_FILESIZE |
7523                                 VOL_CAP_FMT_HIDDEN_FILES |
7524 #if HFS_COMPRESSION
7525                                 VOL_CAP_FMT_PATH_FROM_ID |
7526                                 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7527 #else
7528                                 VOL_CAP_FMT_PATH_FROM_ID;
7529 #endif
7530                 }
7531 #if CONFIG_HFS_STD
7532                 else {
7533                         /* HFS standard */
7534                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7535                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7536                                 VOL_CAP_FMT_CASE_PRESERVING |
7537                                 VOL_CAP_FMT_FAST_STATFS |
7538                                 VOL_CAP_FMT_HIDDEN_FILES |
7539                                 VOL_CAP_FMT_PATH_FROM_ID;
7540                 }
7541 #endif
7542
7543                 /*
7544                  * The capabilities word in 'cap' tell you whether or not
7545                  * this particular filesystem instance has feature X enabled.
7546                  */
7547
7548                 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
7549                         VOL_CAP_INT_ATTRLIST |
7550                         VOL_CAP_INT_NFSEXPORT |
7551                         VOL_CAP_INT_READDIRATTR |
7552                         VOL_CAP_INT_ALLOCATE |
7553                         VOL_CAP_INT_VOL_RENAME |
7554                         VOL_CAP_INT_ADVLOCK |
7555                         VOL_CAP_INT_FLOCK |
7556 #if NAMEDSTREAMS
7557                         VOL_CAP_INT_EXTENDED_ATTR |
7558                         VOL_CAP_INT_NAMEDSTREAMS;
7559 #else
7560                         VOL_CAP_INT_EXTENDED_ATTR;
7561 #endif
7562
7563                 /* HFS may conditionally support searchfs and exchangedata depending on the runtime */
7564
7565                 if (searchfs_on) {
7566                         cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_SEARCHFS;
7567                 }
7568                 if (exchangedata_on) {
7569                         cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXCHANGEDATA;
7570                 }
7571
7572                 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
7573                 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
7574
7575                 cap->valid[VOL_CAPABILITIES_FORMAT] =
7576                         VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7577                         VOL_CAP_FMT_SYMBOLICLINKS |
7578                         VOL_CAP_FMT_HARDLINKS |
7579                         VOL_CAP_FMT_JOURNAL |
7580                         VOL_CAP_FMT_JOURNAL_ACTIVE |
7581                         VOL_CAP_FMT_NO_ROOT_TIMES |
7582                         VOL_CAP_FMT_SPARSE_FILES |
7583                         VOL_CAP_FMT_ZERO_RUNS |
7584                         VOL_CAP_FMT_CASE_SENSITIVE |
7585                         VOL_CAP_FMT_CASE_PRESERVING |
7586                         VOL_CAP_FMT_FAST_STATFS |
7587                         VOL_CAP_FMT_2TB_FILESIZE |
7588                         VOL_CAP_FMT_OPENDENYMODES |
7589                         VOL_CAP_FMT_HIDDEN_FILES |
7590 #if HFS_COMPRESSION
7591                         VOL_CAP_FMT_PATH_FROM_ID |
7592                         VOL_CAP_FMT_DECMPFS_COMPRESSION;
7593 #else
7594                         VOL_CAP_FMT_PATH_FROM_ID;
7595 #endif
7596
7597                 /*
7598                  * Bits in the "valid" field tell you whether or not the on-disk
7599                  * format supports feature X.
7600                  */
7601
7602                 cap->valid[VOL_CAPABILITIES_INTERFACES] =
7603                         VOL_CAP_INT_ATTRLIST |
7604                         VOL_CAP_INT_NFSEXPORT |
7605                         VOL_CAP_INT_READDIRATTR |
7606                         VOL_CAP_INT_COPYFILE |
7607                         VOL_CAP_INT_ALLOCATE |
7608                         VOL_CAP_INT_VOL_RENAME |
7609                         VOL_CAP_INT_ADVLOCK |
7610                         VOL_CAP_INT_FLOCK |
7611                         VOL_CAP_INT_MANLOCK |
7612 #if NAMEDSTREAMS
7613                         VOL_CAP_INT_EXTENDED_ATTR |
7614                         VOL_CAP_INT_NAMEDSTREAMS;
7615 #else
7616                         VOL_CAP_INT_EXTENDED_ATTR;
7617 #endif
7618
7619                 /* HFS always supports exchangedata and searchfs in the on-disk format natively */
7620                 cap->valid[VOL_CAPABILITIES_INTERFACES] |= (VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_EXCHANGEDATA);
7621
7622
7623                 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
7624                 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
7625                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
7626         }
7627         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
7628                 vol_attributes_attr_t *attrp = &fsap->f_attributes;
7629
7630                 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7631                 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7632                 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
7633                 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7634                 attrp->validattr.forkattr = 0;
7635
7636                 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7637                 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7638                 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
7639                 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7640                 attrp->nativeattr.forkattr = 0;
7641                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
7642         }
7643         fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
7644         fsap->f_create_time.tv_nsec = 0;
7645         VFSATTR_SET_SUPPORTED(fsap, f_create_time);
7646         fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
7647         fsap->f_modify_time.tv_nsec = 0;
7648         VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
7649
7650         fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
7651         fsap->f_backup_time.tv_nsec = 0;
7652         VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
7653         if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
7654                 u_int16_t subtype = 0;
7655
7656                 /*
7657                  * Subtypes (flavors) for HFS
7658                  *   0:   Mac OS Extended
7659                  *   1:   Mac OS Extended (Journaled)
7660                  *   2:   Mac OS Extended (Case Sensitive)
7661                  *   3:   Mac OS Extended (Case Sensitive, Journaled)
7662                  *   4 - 127:   Reserved
7663                  * 128:   Mac OS Standard
7664                  *
7665                  */
7666                 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
7667                         if (hfsmp->jnl) {
7668                                 subtype |= HFS_SUBTYPE_JOURNALED;
7669                         }
7670                         if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
7671                                 subtype |= HFS_SUBTYPE_CASESENSITIVE;
7672                         }
7673                 }
7674 #if CONFIG_HFS_STD
7675                 else {
7676                         subtype = HFS_SUBTYPE_STANDARDHFS;
7677                 }
7678 #endif
7679                 fsap->f_fssubtype = subtype;
7680                 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
7681         }
7682
7683         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7684                 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
7685                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7686         }
7687         if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
7688                 hfs_getvoluuid(hfsmp, fsap->f_uuid);
7689                 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
7690         }
7691         return (0);
7692 }
7693
7694 /*
7695  * Perform a volume rename.  Requires the FS' root vp.
7696  */
7697 static int
7698 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
7699 {
7700         ExtendedVCB *vcb = VTOVCB(vp);
7701         struct cnode *cp = VTOC(vp);
7702         struct hfsmount *hfsmp = VTOHFS(vp);
7703         struct cat_desc to_desc;
7704         struct cat_desc todir_desc;
7705         struct cat_desc new_desc;
7706         cat_cookie_t cookie;
7707         int lockflags;
7708         int error = 0;
7709         char converted_volname[256];
7710         size_t volname_length = 0;
7711         size_t conv_volname_length = 0;
7712
7713
7714         /*
7715          * Ignore attempts to rename a volume to a zero-length name.
7716          */
7717         if (name[0] == 0)
7718                 return(0);
7719
7720         bzero(&to_desc, sizeof(to_desc));
7721         bzero(&todir_desc, sizeof(todir_desc));
7722         bzero(&new_desc, sizeof(new_desc));
7723         bzero(&cookie, sizeof(cookie));
7724
7725         todir_desc.cd_parentcnid = kHFSRootParentID;
7726         todir_desc.cd_cnid = kHFSRootFolderID;
7727         todir_desc.cd_flags = CD_ISDIR;
7728
7729         to_desc.cd_nameptr = (const u_int8_t *)name;
7730         to_desc.cd_namelen = strlen(name);
7731         to_desc.cd_parentcnid = kHFSRootParentID;
7732         to_desc.cd_cnid = cp->c_cnid;
7733         to_desc.cd_flags = CD_ISDIR;
7734
7735         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) {
7736                 if ((error = hfs_start_transaction(hfsmp)) == 0) {
7737                         if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
7738                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
7739
7740                                 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
7741
7742                                 /*
7743                                  * If successful, update the name in the VCB, ensure it's terminated.
7744                                  */
7745                                 if (error == 0) {
7746                                         strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
7747
7748                                         volname_length = strlen ((const char*)vcb->vcbVN);
7749 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
7750                                         /* Send the volume name down to CoreStorage if necessary */
7751                                         error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
7752                                         if (error == 0) {
7753                                                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7754                                         }
7755                                         error = 0;
7756                                 }
7757
7758                                 hfs_systemfile_unlock(hfsmp, lockflags);
7759                                 cat_postflight(hfsmp, &cookie, p);
7760
7761                                 if (error)
7762                                         MarkVCBDirty(vcb);
7763                                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7764                         }
7765                         hfs_end_transaction(hfsmp);
7766                 }
7767                 if (!error) {
7768                         /* Release old allocated name buffer */
7769                         if (cp->c_desc.cd_flags & CD_HASBUF) {
7770                                 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7771
7772                                 cp->c_desc.cd_nameptr = 0;
7773                                 cp->c_desc.cd_namelen = 0;
7774                                 cp->c_desc.cd_flags &= ~CD_HASBUF;
7775                                 vfs_removename(tmp_name);
7776                         }
7777                         /* Update cnode's catalog descriptor */
7778                         replace_desc(cp, &new_desc);
7779                         vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7780                         cp->c_touch_chgtime = TRUE;
7781                 }
7782
7783                 hfs_unlock(cp);
7784         }
7785
7786         return(error);
7787 }
7788
7789 /*
7790  * Get file system attributes.
7791  */
7792 static int
7793 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7794 {
7795         kauth_cred_t cred = vfs_context_ucred(context);
7796         int error = 0;
7797
7798         /*
7799          * Must be superuser or owner of filesystem to change volume attributes
7800          */
7801         if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7802                 return(EACCES);
7803
7804         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7805                 vnode_t root_vp;
7806
7807                 error = hfs_vfs_root(mp, &root_vp, context);
7808                 if (error)
7809                         goto out;
7810
7811                 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7812                 (void) vnode_put(root_vp);
7813                 if (error)
7814                         goto out;
7815
7816                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7817         }
7818
7819 out:
7820         return error;
7821 }
7822
7823 /* If a runtime corruption is detected, set the volume inconsistent
7824  * bit in the volume attributes.  The volume inconsistent bit is a persistent
7825  * bit which represents that the volume is corrupt and needs repair.
7826  * The volume inconsistent bit can be set from the kernel when it detects
7827  * runtime corruption or from file system repair utilities like fsck_hfs when
7828  * a repair operation fails.  The bit should be cleared only from file system
7829  * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7830  */
7831 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7832 {
7833         hfs_lock_mount (hfsmp);
7834         if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7835                 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7836                 MarkVCBDirty(hfsmp);
7837         }
7838         if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7839                 /* Log information to ASL log */
7840                 fslog_fs_corrupt(hfsmp->hfs_mp);
7841                 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7842         }
7843         hfs_unlock_mount (hfsmp);
7844 }
7845
7846 /* Replay the journal on the device node provided.  Returns zero if
7847  * journal replay succeeded or no journal was supposed to be replayed.
7848  */
7849 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7850 {
7851         int retval = 0;
7852         int error = 0;
7853         struct mount *mp = NULL;
7854         struct hfs_mount_args *args = NULL;
7855
7856         /* Replay allowed only on raw devices */
7857         if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7858                 retval = EINVAL;
7859                 goto out;
7860         }
7861
7862         /* Create dummy mount structures */
7863         MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7864         if (mp == NULL) {
7865                 retval = ENOMEM;
7866                 goto out;
7867         }
7868         bzero(mp, sizeof(struct mount));
7869         mount_lock_init(mp);
7870
7871         MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7872         if (args == NULL) {
7873                 retval = ENOMEM;
7874                 goto out;
7875         }
7876         bzero(args, sizeof(struct hfs_mount_args));
7877
7878         retval = hfs_mountfs(devvp, mp, args, 1, context);
7879         buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7880
7881         /* FSYNC the devnode to be sure all data has been flushed */
7882         error = VNOP_FSYNC(devvp, MNT_WAIT, context);
7883         if (error) {
7884                 retval = error;
7885         }
7886
7887 out:
7888         if (mp) {
7889                 mount_lock_destroy(mp);
7890                 FREE(mp, M_TEMP);
7891         }
7892         if (args) {
7893                 FREE(args, M_TEMP);
7894         }
7895         return retval;
7896 }
7897
7898 /*
7899  * hfs vfs operations.
7900  */
7901 struct vfsops hfs_vfsops = {
7902         hfs_mount,
7903         hfs_start,
7904         hfs_unmount,
7905         hfs_vfs_root,
7906         hfs_quotactl,
7907         hfs_vfs_getattr,        /* was hfs_statfs */
7908         hfs_sync,
7909         hfs_vfs_vget,
7910         hfs_fhtovp,
7911         hfs_vptofh,
7912         hfs_init,
7913         hfs_sysctl,
7914         hfs_vfs_setattr,
7915         {NULL}
7916 };