bsd/hfs/hfs_vfsops.c

   1 /*
   2  * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1991, 1993, 1994
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      hfs_vfsops.c
  66  *  derived from        @(#)ufs_vfsops.c        8.8 (Berkeley) 5/20/95
  67  *
  68  *      (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
  69  *
  70  *      hfs_vfsops.c -- VFS layer for loadable HFS file system.
  71  *
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/kauth.h>
  76
  77 #include <sys/ubc.h>
  78 #include <sys/ubc_internal.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/mount_internal.h>
  81 #include <sys/sysctl.h>
  82 #include <sys/malloc.h>
  83 #include <sys/stat.h>
  84 #include <sys/quota.h>
  85 #include <sys/disk.h>
  86 #include <sys/paths.h>
  87 #include <sys/utfconv.h>
  88 #include <sys/kdebug.h>
  89 #include <sys/fslog.h>
  90 #include <sys/ubc.h>
  91
  92 #include <kern/locks.h>
  93
  94 #include <vfs/vfs_journal.h>
  95
  96 #include <miscfs/specfs/specdev.h>
  97 #include <hfs/hfs_mount.h>
  98
  99 #include <libkern/crypto/md5.h>
 100 #include <uuid/uuid.h>
 101
 102 #include "hfs.h"
 103 #include "hfs_catalog.h"
 104 #include "hfs_cnode.h"
 105 #include "hfs_dbg.h"
 106 #include "hfs_endian.h"
 107 #include "hfs_hotfiles.h"
 108 #include "hfs_quota.h"
 109
 110 #include "hfscommon/headers/FileMgrInternal.h"
 111 #include "hfscommon/headers/BTreesInternal.h"
 112
 113 #if CONFIG_PROTECT
 114 #include <sys/cprotect.h>
 115 #endif
 116
 117 #if CONFIG_HFS_ALLOC_RBTREE
 118 #include "hfscommon/headers/HybridAllocator.h"
 119 #endif
 120
 121 #define HFS_MOUNT_DEBUG 1
 122
 123 #if     HFS_DIAGNOSTIC
 124 int hfs_dbg_all = 0;
 125 int hfs_dbg_err = 0;
 126 #endif
 127
 128 /* Enable/disable debugging code for live volume resizing */
 129 int hfs_resize_debug = 0;
 130
 131 lck_grp_attr_t *  hfs_group_attr;
 132 lck_attr_t *  hfs_lock_attr;
 133 lck_grp_t *  hfs_mutex_group;
 134 lck_grp_t *  hfs_rwlock_group;
 135 lck_grp_t *  hfs_spinlock_group;
 136
 137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 138 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
 139
 140 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
 141 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 142
 143 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
 144 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
 145 static int hfs_flushfiles(struct mount *, int, struct proc *);
 146 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
 147 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
 148 static int hfs_init(struct vfsconf *vfsp);
 149 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
 150 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
 151 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
 152 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 153 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
 154 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 155 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
 156
 157 void hfs_initialize_allocator (struct hfsmount *hfsmp);
 158 int hfs_teardown_allocator (struct hfsmount *hfsmp);
 159
 160 int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
 161 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
 162 int hfs_reload(struct mount *mp);
 163 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
 164 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
 165 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 166                       user_addr_t newp, size_t newlen, vfs_context_t context);
 167 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 168
 169 /*
 170  * Called by vfs_mountroot when mounting HFS Plus as root.
 171  */
 172
 173 int
 174 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 175 {
 176         struct hfsmount *hfsmp;
 177         ExtendedVCB *vcb;
 178         struct vfsstatfs *vfsp;
 179         int error;
 180
 181         if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
 182                 if (HFS_MOUNT_DEBUG) {
 183                         printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
 184                                         error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
 185                 }
 186                 return (error);
 187         }
 188
 189         /* Init hfsmp */
 190         hfsmp = VFSTOHFS(mp);
 191
 192         hfsmp->hfs_uid = UNKNOWNUID;
 193         hfsmp->hfs_gid = UNKNOWNGID;
 194         hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 195         hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 196
 197         /* Establish the free block reserve. */
 198         vcb = HFSTOVCB(hfsmp);
 199         vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
 200         vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
 201
 202         vfsp = vfs_statfs(mp);
 203         (void)hfs_statfs(mp, vfsp, NULL);
 204
 205         return (0);
 206 }
 207
 208
 209 /*
 210  * VFS Operations.
 211  *
 212  * mount system call
 213  */
 214
 215 int
 216 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
 217 {
 218         struct proc *p = vfs_context_proc(context);
 219         struct hfsmount *hfsmp = NULL;
 220         struct hfs_mount_args args;
 221         int retval = E_NONE;
 222         u_int32_t cmdflags;
 223
 224         if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
 225                 if (HFS_MOUNT_DEBUG) {
 226                         printf("hfs_mount: copyin returned %d for fs\n", retval);
 227                 }
 228                 return (retval);
 229         }
 230         cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
 231         if (cmdflags & MNT_UPDATE) {
 232                 hfsmp = VFSTOHFS(mp);
 233
 234                 /* Reload incore data after an fsck. */
 235                 if (cmdflags & MNT_RELOAD) {
 236                         if (vfs_isrdonly(mp)) {
 237                                 int error = hfs_reload(mp);
 238                                 if (error && HFS_MOUNT_DEBUG) {
 239                                         printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
 240                                 }
 241                                 return error;
 242                         }
 243                         else {
 244                                 if (HFS_MOUNT_DEBUG) {
 245                                         printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
 246                                 }
 247                                 return (EINVAL);
 248                         }
 249                 }
 250
 251                 /* Change to a read-only file system. */
 252                 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 253                     vfs_isrdonly(mp)) {
 254                         int flags;
 255
 256                         /* Set flag to indicate that a downgrade to read-only
 257                          * is in progress and therefore block any further
 258                          * modifications to the file system.
 259                          */
 260                         hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 261                         hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
 262                         hfsmp->hfs_downgrading_proc = current_thread();
 263                         hfs_unlock_global (hfsmp);
 264
 265                         /* use VFS_SYNC to push out System (btree) files */
 266                         retval = VFS_SYNC(mp, MNT_WAIT, context);
 267                         if (retval && ((cmdflags & MNT_FORCE) == 0)) {
 268                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 269                                 hfsmp->hfs_downgrading_proc = NULL;
 270                                 if (HFS_MOUNT_DEBUG) {
 271                                         printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
 272                                 }
 273                                 goto out;
 274                         }
 275
 276                         flags = WRITECLOSE;
 277                         if (cmdflags & MNT_FORCE)
 278                                 flags |= FORCECLOSE;
 279
 280                         if ((retval = hfs_flushfiles(mp, flags, p))) {
 281                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 282                                 hfsmp->hfs_downgrading_proc = NULL;
 283                                 if (HFS_MOUNT_DEBUG) {
 284                                         printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
 285                                 }
 286                                 goto out;
 287                         }
 288
 289                         /* mark the volume cleanly unmounted */
 290                         hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
 291                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 292                         hfsmp->hfs_flags |= HFS_READ_ONLY;
 293
 294                         /* also get the volume bitmap blocks */
 295                         if (!retval) {
 296                                 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
 297                                         retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
 298                                 } else {
 299                                         vnode_get(hfsmp->hfs_devvp);
 300                                         retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
 301                                         vnode_put(hfsmp->hfs_devvp);
 302                                 }
 303                         }
 304                         if (retval) {
 305                                 if (HFS_MOUNT_DEBUG) {
 306                                         printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
 307                                 }
 308                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 309                                 hfsmp->hfs_downgrading_proc = NULL;
 310                                 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 311                                 goto out;
 312                         }
 313                         if (hfsmp->jnl) {
 314                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 315
 316                             journal_close(hfsmp->jnl);
 317                             hfsmp->jnl = NULL;
 318
 319                             // Note: we explicitly don't want to shutdown
 320                             //       access to the jvp because we may need
 321                             //       it later if we go back to being read-write.
 322
 323                                 hfs_unlock_global (hfsmp);
 324                         }
 325
 326 #if CONFIG_HFS_ALLOC_RBTREE
 327                         (void) hfs_teardown_allocator(hfsmp);
 328 #endif
 329                         hfsmp->hfs_downgrading_proc = NULL;
 330                 }
 331
 332                 /* Change to a writable file system. */
 333                 if (vfs_iswriteupgrade(mp)) {
 334 #if CONFIG_HFS_ALLOC_RBTREE
 335                                 thread_t allocator_thread;
 336 #endif
 337
 338                         /*
 339                          * On inconsistent disks, do not allow read-write mount
 340                          * unless it is the boot volume being mounted.
 341                          */
 342                         if (!(vfs_flags(mp) & MNT_ROOTFS) &&
 343                                         (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
 344                                 if (HFS_MOUNT_DEBUG) {
 345                                         printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n",  (hfsmp->vcbVN));
 346                                 }
 347                                 retval = EINVAL;
 348                                 goto out;
 349                         }
 350
 351                         // If the journal was shut-down previously because we were
 352                         // asked to be read-only, let's start it back up again now
 353
 354                         if (   (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
 355                             && hfsmp->jnl == NULL
 356                             && hfsmp->jvp != NULL) {
 357                             int jflags;
 358
 359                             if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
 360                                         jflags = JOURNAL_RESET;
 361                                 } else {
 362                                         jflags = 0;
 363                                 }
 364
 365                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 366
 367                                 hfsmp->jnl = journal_open(hfsmp->jvp,
 368                                                 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
 369                                                 hfsmp->jnl_size,
 370                                                 hfsmp->hfs_devvp,
 371                                                 hfsmp->hfs_logical_block_size,
 372                                                 jflags,
 373                                                 0,
 374                                                 hfs_sync_metadata, hfsmp->hfs_mp);
 375
 376                                 /*
 377                                  * Set up the trim callback function so that we can add
 378                                  * recently freed extents to the free extent cache once
 379                                  * the transaction that freed them is written to the
 380                                  * journal on disk.
 381                                  */
 382                                 if (hfsmp->jnl)
 383                                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 384
 385                                 hfs_unlock_global (hfsmp);
 386
 387                                 if (hfsmp->jnl == NULL) {
 388                                         if (HFS_MOUNT_DEBUG) {
 389                                                 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
 390                                         }
 391                                         retval = EINVAL;
 392                                         goto out;
 393                                 } else {
 394                                         hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
 395                                 }
 396
 397                         }
 398
 399                         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 400                         retval = hfs_erase_unused_nodes(hfsmp);
 401                         if (retval != E_NONE) {
 402                                 if (HFS_MOUNT_DEBUG) {
 403                                         printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
 404                                 }
 405                                 goto out;
 406                         }
 407
 408                         /* If this mount point was downgraded from read-write
 409                          * to read-only, clear that information as we are now
 410                          * moving back to read-write.
 411                          */
 412                         hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 413                         hfsmp->hfs_downgrading_proc = NULL;
 414
 415                         /* mark the volume dirty (clear clean unmount bit) */
 416                         hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 417
 418                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 419                         if (retval != E_NONE) {
 420                                 if (HFS_MOUNT_DEBUG) {
 421                                         printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
 422                                 }
 423                                 goto out;
 424                         }
 425
 426                         /* Only clear HFS_READ_ONLY after a successful write */
 427                         hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 428
 429
 430                         if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
 431                                 /* Setup private/hidden directories for hardlinks. */
 432                                 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 433                                 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 434
 435                                 hfs_remove_orphans(hfsmp);
 436
 437                                 /*
 438                                  * Allow hot file clustering if conditions allow.
 439                                  */
 440                                 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
 441                                                 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
 442                                         (void) hfs_recording_init(hfsmp);
 443                                 }
 444                                 /* Force ACLs on HFS+ file systems. */
 445                                 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
 446                                         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 447                                 }
 448                         }
 449
 450 #if CONFIG_HFS_ALLOC_RBTREE
 451                         /*
 452                          * Like the normal mount case, we need to handle creation of the allocation red-black tree
 453                          * if we're upgrading from read-only to read-write.
 454                          *
 455                          * We spawn a thread to create the pair of red-black trees for this volume.
 456                          * However, in so doing, we must be careful to ensure that if this thread is still
 457                          * running after mount has finished, it doesn't interfere with an unmount. Specifically,
 458                          * we'll need to set a bit that indicates we're in progress building the trees here.
 459                          * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
 460                          * notifies the tree generation code that an unmount is waiting.  Also, mark the extent
 461                          * tree flags that the allocator is enabled for use before we spawn the thread that will start
 462                          * scanning the RB tree.
 463                          *
 464                          * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
 465                          * which has not previously encountered a bad error on the red-black tree code.  Also, don't
 466                          * try to re-build a tree that already exists.
 467                          */
 468
 469                         if (hfsmp->extent_tree_flags == 0) {
 470                                 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
 471                                 /* Initialize EOF counter so that the thread can assume it started at initial values */
 472                                 hfsmp->offset_block_end = 0;
 473
 474                                 InitTree(hfsmp);
 475
 476                                 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
 477                                 thread_deallocate(allocator_thread);
 478                         }
 479
 480 #endif
 481                 }
 482
 483                 /* Update file system parameters. */
 484                 retval = hfs_changefs(mp, &args);
 485                 if (retval &&  HFS_MOUNT_DEBUG) {
 486                         printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
 487                 }
 488
 489         } else /* not an update request */ {
 490
 491                 /* Set the mount flag to indicate that we support volfs  */
 492                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
 493
 494                 retval = hfs_mountfs(devvp, mp, &args, 0, context);
 495                 if (retval && HFS_MOUNT_DEBUG) {
 496                         printf("hfs_mount: hfs_mountfs returned %d\n", retval);
 497                 }
 498 #if CONFIG_PROTECT
 499                 /*
 500                  * If above mount call was successful, and this mount is content protection
 501                  * enabled, then verify the on-disk EA on the root to ensure that the filesystem
 502                  * is of a suitable vintage to allow the mount to proceed.
 503                  */
 504                 if ((retval == 0) && (cp_fs_protected (mp))) {
 505                         int err = 0;
 506                         struct cp_root_xattr xattr;
 507                         bzero (&xattr, sizeof(struct cp_root_xattr));
 508                         hfsmp = vfs_fsprivate(mp);
 509
 510                         /* go get the EA to get the version information */
 511                         err = cp_getrootxattr (hfsmp, &xattr);
 512                         /* If there was no EA there, then write one out. */
 513                         if (err == ENOATTR) {
 514                                 bzero(&xattr, sizeof(struct cp_root_xattr));
 515                                 xattr.major_version = CP_CURRENT_MAJOR_VERS;
 516                                 xattr.minor_version = CP_CURRENT_MINOR_VERS;
 517                                 xattr.flags = 0;
 518
 519                                 err = cp_setrootxattr (hfsmp, &xattr);
 520                         }
 521                         /*
 522                          * For any other error, including having an out of date CP version in the
 523                          * EA, or for an error out of cp_setrootxattr, deny the mount
 524                          * and do not proceed further.
 525                          */
 526                         if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS)  {
 527                                 /* Deny the mount and tear down. */
 528                                 retval = EPERM;
 529                                 (void) hfs_unmount (mp, MNT_FORCE, context);
 530                         }
 531                 }
 532 #endif
 533         }
 534 out:
 535         if (retval == 0) {
 536                 (void)hfs_statfs(mp, vfs_statfs(mp), context);
 537         }
 538         return (retval);
 539 }
 540
 541
 542 struct hfs_changefs_cargs {
 543         struct hfsmount *hfsmp;
 544         int             namefix;
 545         int             permfix;
 546         int             permswitch;
 547 };
 548
 549 static int
 550 hfs_changefs_callback(struct vnode *vp, void *cargs)
 551 {
 552         ExtendedVCB *vcb;
 553         struct cnode *cp;
 554         struct cat_desc cndesc;
 555         struct cat_attr cnattr;
 556         struct hfs_changefs_cargs *args;
 557         int lockflags;
 558         int error;
 559
 560         args = (struct hfs_changefs_cargs *)cargs;
 561
 562         cp = VTOC(vp);
 563         vcb = HFSTOVCB(args->hfsmp);
 564
 565         lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 566         error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
 567         hfs_systemfile_unlock(args->hfsmp, lockflags);
 568         if (error) {
 569                 /*
 570                  * If we couldn't find this guy skip to the next one
 571                  */
 572                 if (args->namefix)
 573                         cache_purge(vp);
 574
 575                 return (VNODE_RETURNED);
 576         }
 577         /*
 578          * Get the real uid/gid and perm mask from disk.
 579          */
 580         if (args->permswitch || args->permfix) {
 581                 cp->c_uid = cnattr.ca_uid;
 582                 cp->c_gid = cnattr.ca_gid;
 583                 cp->c_mode = cnattr.ca_mode;
 584         }
 585         /*
 586          * If we're switching name converters then...
 587          *   Remove the existing entry from the namei cache.
 588          *   Update name to one based on new encoder.
 589          */
 590         if (args->namefix) {
 591                 cache_purge(vp);
 592                 replace_desc(cp, &cndesc);
 593
 594                 if (cndesc.cd_cnid == kHFSRootFolderID) {
 595                         strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
 596                         cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
 597                 }
 598         } else {
 599                 cat_releasedesc(&cndesc);
 600         }
 601         return (VNODE_RETURNED);
 602 }
 603
 604 /* Change fs mount parameters */
 605 static int
 606 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
 607 {
 608         int retval = 0;
 609         int namefix, permfix, permswitch;
 610         struct hfsmount *hfsmp;
 611         ExtendedVCB *vcb;
 612         hfs_to_unicode_func_t   get_unicode_func;
 613         unicode_to_hfs_func_t   get_hfsname_func;
 614         u_int32_t old_encoding = 0;
 615         struct hfs_changefs_cargs cargs;
 616         u_int32_t mount_flags;
 617
 618         hfsmp = VFSTOHFS(mp);
 619         vcb = HFSTOVCB(hfsmp);
 620         mount_flags = (unsigned int)vfs_flags(mp);
 621
 622         hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
 623
 624         permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
 625                        ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
 626                       (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
 627                        (mount_flags & MNT_UNKNOWNPERMISSIONS)));
 628
 629         /* The root filesystem must operate with actual permissions: */
 630         if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
 631                 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));  /* Just say "No". */
 632                 retval = EINVAL;
 633                 goto exit;
 634         }
 635         if (mount_flags & MNT_UNKNOWNPERMISSIONS)
 636                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
 637         else
 638                 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
 639
 640         namefix = permfix = 0;
 641
 642         /*
 643          * Tracking of hot files requires up-to-date access times.  So if
 644          * access time updates are disabled, we must also disable hot files.
 645          */
 646         if (mount_flags & MNT_NOATIME) {
 647                 (void) hfs_recording_suspend(hfsmp);
 648         }
 649
 650         /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
 651         if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
 652                 gTimeZone = args->hfs_timezone;
 653         }
 654
 655         /* Change the default uid, gid and/or mask */
 656         if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
 657                 hfsmp->hfs_uid = args->hfs_uid;
 658                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 659                         ++permfix;
 660         }
 661         if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
 662                 hfsmp->hfs_gid = args->hfs_gid;
 663                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 664                         ++permfix;
 665         }
 666         if (args->hfs_mask != (mode_t)VNOVAL) {
 667                 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
 668                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
 669                         hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
 670                         if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
 671                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
 672                         if (vcb->vcbSigWord == kHFSPlusSigWord)
 673                                 ++permfix;
 674                 }
 675         }
 676
 677         /* Change the hfs encoding value (hfs only) */
 678         if ((vcb->vcbSigWord == kHFSSigWord)    &&
 679             (args->hfs_encoding != (u_int32_t)VNOVAL)              &&
 680             (hfsmp->hfs_encoding != args->hfs_encoding)) {
 681
 682                 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
 683                 if (retval)
 684                         goto exit;
 685
 686                 /*
 687                  * Connect the new hfs_get_unicode converter but leave
 688                  * the old hfs_get_hfsname converter in place so that
 689                  * we can lookup existing vnodes to get their correctly
 690                  * encoded names.
 691                  *
 692                  * When we're all finished, we can then connect the new
 693                  * hfs_get_hfsname converter and release our interest
 694                  * in the old converters.
 695                  */
 696                 hfsmp->hfs_get_unicode = get_unicode_func;
 697                 old_encoding = hfsmp->hfs_encoding;
 698                 hfsmp->hfs_encoding = args->hfs_encoding;
 699                 ++namefix;
 700         }
 701
 702         if (!(namefix || permfix || permswitch))
 703                 goto exit;
 704
 705         /* XXX 3762912 hack to support HFS filesystem 'owner' */
 706         if (permfix)
 707                 vfs_setowner(mp,
 708                     hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
 709                     hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
 710
 711         /*
 712          * For each active vnode fix things that changed
 713          *
 714          * Note that we can visit a vnode more than once
 715          * and we can race with fsync.
 716          *
 717          * hfs_changefs_callback will be called for each vnode
 718          * hung off of this mount point
 719          *
 720          * The vnode will be properly referenced and unreferenced
 721          * around the callback
 722          */
 723         cargs.hfsmp = hfsmp;
 724         cargs.namefix = namefix;
 725         cargs.permfix = permfix;
 726         cargs.permswitch = permswitch;
 727
 728         vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
 729
 730         /*
 731          * If we're switching name converters we can now
 732          * connect the new hfs_get_hfsname converter and
 733          * release our interest in the old converters.
 734          */
 735         if (namefix) {
 736                 hfsmp->hfs_get_hfsname = get_hfsname_func;
 737                 vcb->volumeNameEncodingHint = args->hfs_encoding;
 738                 (void) hfs_relconverter(old_encoding);
 739         }
 740 exit:
 741         hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
 742         return (retval);
 743 }
 744
 745
 746 struct hfs_reload_cargs {
 747         struct hfsmount *hfsmp;
 748         int             error;
 749 };
 750
 751 static int
 752 hfs_reload_callback(struct vnode *vp, void *cargs)
 753 {
 754         struct cnode *cp;
 755         struct hfs_reload_cargs *args;
 756         int lockflags;
 757
 758         args = (struct hfs_reload_cargs *)cargs;
 759         /*
 760          * flush all the buffers associated with this node
 761          */
 762         (void) buf_invalidateblks(vp, 0, 0, 0);
 763
 764         cp = VTOC(vp);
 765         /*
 766          * Remove any directory hints
 767          */
 768         if (vnode_isdir(vp))
 769                 hfs_reldirhints(cp, 0);
 770
 771         /*
 772          * Re-read cnode data for all active vnodes (non-metadata files).
 773          */
 774         if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
 775                 struct cat_fork *datafork;
 776                 struct cat_desc desc;
 777
 778                 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
 779
 780                 /* lookup by fileID since name could have changed */
 781                 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 782                 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
 783                 hfs_systemfile_unlock(args->hfsmp, lockflags);
 784                 if (args->error) {
 785                         return (VNODE_RETURNED_DONE);
 786                 }
 787
 788                 /* update cnode's catalog descriptor */
 789                 (void) replace_desc(cp, &desc);
 790         }
 791         return (VNODE_RETURNED);
 792 }
 793
 794 /*
 795  * Reload all incore data for a filesystem (used after running fsck on
 796  * the root filesystem and finding things to fix). The filesystem must
 797  * be mounted read-only.
 798  *
 799  * Things to do to update the mount:
 800  *      invalidate all cached meta-data.
 801  *      invalidate all inactive vnodes.
 802  *      invalidate all cached file data.
 803  *      re-read volume header from disk.
 804  *      re-load meta-file info (extents, file size).
 805  *      re-load B-tree header data.
 806  *      re-read cnode data for all active vnodes.
 807  */
 808 int
 809 hfs_reload(struct mount *mountp)
 810 {
 811         register struct vnode *devvp;
 812         struct buf *bp;
 813         int error, i;
 814         struct hfsmount *hfsmp;
 815         struct HFSPlusVolumeHeader *vhp;
 816         ExtendedVCB *vcb;
 817         struct filefork *forkp;
 818         struct cat_desc cndesc;
 819         struct hfs_reload_cargs args;
 820         daddr64_t priIDSector;
 821
 822         hfsmp = VFSTOHFS(mountp);
 823         vcb = HFSTOVCB(hfsmp);
 824
 825         if (vcb->vcbSigWord == kHFSSigWord)
 826                 return (EINVAL);        /* rooting from HFS is not supported! */
 827
 828         /*
 829          * Invalidate all cached meta-data.
 830          */
 831         devvp = hfsmp->hfs_devvp;
 832         if (buf_invalidateblks(devvp, 0, 0, 0))
 833                 panic("hfs_reload: dirty1");
 834
 835         args.hfsmp = hfsmp;
 836         args.error = 0;
 837         /*
 838          * hfs_reload_callback will be called for each vnode
 839          * hung off of this mount point that can't be recycled...
 840          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 841          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 842          * properly referenced and unreferenced around the callback
 843          */
 844         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
 845
 846         if (args.error)
 847                 return (args.error);
 848
 849         /*
 850          * Re-read VolumeHeader from disk.
 851          */
 852         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 853                         HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 854
 855         error = (int)buf_meta_bread(hfsmp->hfs_devvp,
 856                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
 857                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
 858         if (error) {
 859                 if (bp != NULL)
 860                         buf_brelse(bp);
 861                 return (error);
 862         }
 863
 864         vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 865
 866         /* Do a quick sanity check */
 867         if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
 868              SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
 869             (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
 870              SWAP_BE16(vhp->version) != kHFSXVersion) ||
 871             SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
 872                 buf_brelse(bp);
 873                 return (EIO);
 874         }
 875
 876         vcb->vcbLsMod           = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 877         vcb->vcbAtrb            = SWAP_BE32 (vhp->attributes);
 878         vcb->vcbJinfoBlock  = SWAP_BE32(vhp->journalInfoBlock);
 879         vcb->vcbClpSiz          = SWAP_BE32 (vhp->rsrcClumpSize);
 880         vcb->vcbNxtCNID         = SWAP_BE32 (vhp->nextCatalogID);
 881         vcb->vcbVolBkUp         = to_bsd_time(SWAP_BE32(vhp->backupDate));
 882         vcb->vcbWrCnt           = SWAP_BE32 (vhp->writeCount);
 883         vcb->vcbFilCnt          = SWAP_BE32 (vhp->fileCount);
 884         vcb->vcbDirCnt          = SWAP_BE32 (vhp->folderCount);
 885         HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
 886         vcb->totalBlocks        = SWAP_BE32 (vhp->totalBlocks);
 887         vcb->freeBlocks         = SWAP_BE32 (vhp->freeBlocks);
 888         vcb->encodingsBitmap    = SWAP_BE64 (vhp->encodingsBitmap);
 889         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 890         vcb->localCreateDate    = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
 891
 892         /*
 893          * Re-load meta-file vnode data (extent info, file size, etc).
 894          */
 895         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 896         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 897                 forkp->ff_extents[i].startBlock =
 898                         SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 899                 forkp->ff_extents[i].blockCount =
 900                         SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 901         }
 902         forkp->ff_size      = SWAP_BE64 (vhp->extentsFile.logicalSize);
 903         forkp->ff_blocks    = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 904         forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
 905
 906
 907         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 908         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 909                 forkp->ff_extents[i].startBlock =
 910                         SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 911                 forkp->ff_extents[i].blockCount =
 912                         SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 913         }
 914         forkp->ff_size      = SWAP_BE64 (vhp->catalogFile.logicalSize);
 915         forkp->ff_blocks    = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 916         forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
 917
 918         if (hfsmp->hfs_attribute_vp) {
 919                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 920                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 921                         forkp->ff_extents[i].startBlock =
 922                                 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 923                         forkp->ff_extents[i].blockCount =
 924                                 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 925                 }
 926                 forkp->ff_size      = SWAP_BE64 (vhp->attributesFile.logicalSize);
 927                 forkp->ff_blocks    = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 928                 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
 929         }
 930
 931         forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
 932         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 933                 forkp->ff_extents[i].startBlock =
 934                         SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 935                 forkp->ff_extents[i].blockCount =
 936                         SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 937         }
 938         forkp->ff_size      = SWAP_BE64 (vhp->allocationFile.logicalSize);
 939         forkp->ff_blocks    = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 940         forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
 941
 942         buf_brelse(bp);
 943         vhp = NULL;
 944
 945         /*
 946          * Re-load B-tree header data
 947          */
 948         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 949         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 950                 return (error);
 951
 952         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 953         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 954                 return (error);
 955
 956         if (hfsmp->hfs_attribute_vp) {
 957                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 958                 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 959                         return (error);
 960         }
 961
 962         /* Reload the volume name */
 963         if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
 964                 return (error);
 965         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 966         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 967         cat_releasedesc(&cndesc);
 968
 969         /* Re-establish private/hidden directories. */
 970         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 971         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 972
 973         /* In case any volume information changed to trigger a notification */
 974         hfs_generate_volume_notifications(hfsmp);
 975
 976         return (0);
 977 }
 978
 979
 980
 981 static void
 982 hfs_syncer(void *arg0, void *unused)
 983 {
 984 #pragma unused(unused)
 985
 986     struct hfsmount *hfsmp = arg0;
 987     clock_sec_t secs;
 988     clock_usec_t usecs;
 989     uint32_t delay = HFS_META_DELAY;
 990     uint64_t now;
 991     static int no_max=1;
 992
 993     clock_get_calendar_microtime(&secs, &usecs);
 994     now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 995
 996     //
 997     // If the amount of pending writes is more than our limit, wait
 998     // for 2/3 of it to drain and then flush the journal.
 999     //
1000     if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1001             int counter=0;
1002             uint64_t pending_io, start, rate = 0;
1003
1004             no_max = 0;
1005
1006             hfs_start_transaction(hfsmp);   // so we hold off any new i/o's
1007
1008             pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1009
1010             clock_get_calendar_microtime(&secs, &usecs);
1011             start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1012
1013             while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1014                     tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1015             }
1016
1017             if (counter >= 500) {
1018                     printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1019             }
1020
1021             if (hfsmp->jnl) {
1022                     journal_flush(hfsmp->jnl, FALSE);
1023             } else {
1024                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1025             }
1026
1027             clock_get_calendar_microtime(&secs, &usecs);
1028             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1029             hfsmp->hfs_last_sync_time = now;
1030             if (now != start) {
1031                     rate = ((pending_io * 1000000ULL) / (now - start));     // yields bytes per second
1032             }
1033
1034             hfs_end_transaction(hfsmp);
1035
1036             //
1037             // If a reasonable amount of time elapsed then check the
1038             // i/o rate.  If it's taking less than 1 second or more
1039             // than 2 seconds, adjust hfs_max_pending_io so that we
1040             // will allow about 1.5 seconds of i/o to queue up.
1041             //
1042             if (((now - start) >= 300000) && (rate != 0)) {
1043                     uint64_t scale = (pending_io * 100) / rate;
1044
1045                     if (scale < 100 || scale > 200) {
1046                             // set it so that it should take about 1.5 seconds to drain
1047                             hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1048                     }
1049             }
1050
1051     } else if (   ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1052                || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1053                    && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1054                    && (hfsmp->hfs_active_threads == 0)
1055                    && (hfsmp->hfs_global_lock_nesting == 0))) {
1056
1057             //
1058             // Flush the journal if more than 5 seconds elapsed since
1059             // the last sync OR we have not sync'ed recently and the
1060             // last sync request time was more than 100 milliseconds
1061             // ago and no one is in the middle of a transaction right
1062             // now.  Else we defer the sync and reschedule it.
1063             //
1064             if (hfsmp->jnl) {
1065                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1066
1067                     journal_flush(hfsmp->jnl, FALSE);
1068
1069                         hfs_unlock_global (hfsmp);
1070             } else {
1071                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1072             }
1073
1074             clock_get_calendar_microtime(&secs, &usecs);
1075             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1076             hfsmp->hfs_last_sync_time = now;
1077
1078     } else if (hfsmp->hfs_active_threads == 0) {
1079             uint64_t deadline;
1080
1081             clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1082             thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1083
1084             // note: we intentionally return early here and do not
1085             // decrement the sync_scheduled and sync_incomplete
1086             // variables because we rescheduled the timer.
1087
1088             return;
1089     }
1090
1091     //
1092     // NOTE: we decrement these *after* we're done the journal_flush() since
1093     // it can take a significant amount of time and so we don't want more
1094     // callbacks scheduled until we're done this one.
1095     //
1096     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1097     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1098     wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1099 }
1100
1101
1102 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1103
1104 /*
1105  * Initialization code for Red-Black Tree Allocator
1106  *
1107  * This function will build the two red-black trees necessary for allocating space
1108  * from the metadata zone as well as normal allocations.  Currently, we use
1109  * an advisory read to get most of the data into the buffer cache.
1110  * This function is intended to be run in a separate thread so as not to slow down mount.
1111  *
1112  */
1113
1114 void
1115 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1116
1117 #if CONFIG_HFS_ALLOC_RBTREE
1118         u_int32_t err;
1119
1120         /*
1121          * Take the allocation file lock.  Journal transactions will block until
1122          * we're done here.
1123          */
1124         int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1125
1126         /*
1127          * GenerateTree assumes that the bitmap lock is held when you call the function.
1128          * It will drop and re-acquire the lock periodically as needed to let other allocations
1129          * through.  It returns with the bitmap lock held. Since we only maintain one tree,
1130          * we don't need to specify a start block (always starts at 0).
1131          */
1132         err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1133         if (err) {
1134                 goto bailout;
1135         }
1136         /* Mark offset tree as built */
1137         hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1138
1139 bailout:
1140         /*
1141          * GenerateTree may drop the bitmap lock during operation in order to give other
1142          * threads a chance to allocate blocks, but it will always return with the lock held, so
1143          * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1144          */
1145         hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1146         if (err != 0) {
1147                 /* Wakeup any waiters on the allocation bitmap lock */
1148                 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1149         }
1150
1151         hfs_systemfile_unlock(hfsmp, flags);
1152 #else
1153 #pragma unused (hfsmp)
1154 #endif
1155 }
1156
1157
1158 /*
1159  * Teardown code for the Red-Black Tree allocator.
1160  * This function consolidates the code which serializes with respect
1161  * to a thread that may be potentially still building the tree when we need to begin
1162  * tearing it down.   Since the red-black tree may not be live when we enter this function
1163  * we return:
1164  *              1 -> Tree was live.
1165  *              0 -> Tree was not active at time of call.
1166  */
1167
1168 int
1169 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1170         int rb_used = 0;
1171
1172 #if CONFIG_HFS_ALLOC_RBTREE
1173
1174         int flags = 0;
1175
1176         /*
1177          * Check to see if the tree-generation is still on-going.
1178          * If it is, then block until it's done.
1179          */
1180
1181         flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1182
1183
1184         while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1185                 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1186
1187                 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1188                                          &hfsmp->extent_tree_flags, THREAD_UNINT);
1189         }
1190
1191         if (hfs_isrbtree_active (hfsmp)) {
1192                 rb_used = 1;
1193
1194                 /* Tear down the RB Trees while we have the bitmap locked */
1195                 DestroyTrees(hfsmp);
1196
1197         }
1198
1199         hfs_systemfile_unlock(hfsmp, flags);
1200 #else
1201         #pragma unused (hfsmp)
1202 #endif
1203         return rb_used;
1204
1205 }
1206
1207
1208 static int hfs_root_unmounted_cleanly = 0;
1209
1210 SYSCTL_DECL(_vfs_generic);
1211 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1212
1213 /*
1214  * Common code for mount and mountroot
1215  */
1216 int
1217 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1218             int journal_replay_only, vfs_context_t context)
1219 {
1220         struct proc *p = vfs_context_proc(context);
1221         int retval = E_NONE;
1222         struct hfsmount *hfsmp = NULL;
1223         struct buf *bp;
1224         dev_t dev;
1225         HFSMasterDirectoryBlock *mdbp = NULL;
1226         int ronly;
1227 #if QUOTA
1228         int i;
1229 #endif
1230         int mntwrapper;
1231         kauth_cred_t cred;
1232         u_int64_t disksize;
1233         daddr64_t log_blkcnt;
1234         u_int32_t log_blksize;
1235         u_int32_t phys_blksize;
1236         u_int32_t minblksize;
1237         u_int32_t iswritable;
1238         daddr64_t mdb_offset;
1239         int isvirtual = 0;
1240         int isroot = 0;
1241         int isssd;
1242 #if CONFIG_HFS_ALLOC_RBTREE
1243         thread_t allocator_thread;
1244 #endif
1245
1246         if (args == NULL) {
1247                 /* only hfs_mountroot passes us NULL as the 'args' argument */
1248                 isroot = 1;
1249         }
1250
1251         ronly = vfs_isrdonly(mp);
1252         dev = vnode_specrdev(devvp);
1253         cred = p ? vfs_context_ucred(context) : NOCRED;
1254         mntwrapper = 0;
1255
1256         bp = NULL;
1257         hfsmp = NULL;
1258         mdbp = NULL;
1259         minblksize = kHFSBlockSize;
1260
1261         /* Advisory locking should be handled at the VFS layer */
1262         vfs_setlocklocal(mp);
1263
1264         /* Get the logical block size (treated as physical block size everywhere) */
1265         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1266                 if (HFS_MOUNT_DEBUG) {
1267                         printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1268                 }
1269                 retval = ENXIO;
1270                 goto error_exit;
1271         }
1272         if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1273                 printf("hfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
1274                 retval = ENXIO;
1275                 goto error_exit;
1276         }
1277
1278         /* Get the physical block size. */
1279         retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1280         if (retval) {
1281                 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1282                         if (HFS_MOUNT_DEBUG) {
1283                                 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1284                         }
1285                         retval = ENXIO;
1286                         goto error_exit;
1287                 }
1288                 /* If device does not support this ioctl, assume that physical
1289                  * block size is same as logical block size
1290                  */
1291                 phys_blksize = log_blksize;
1292         }
1293         if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1294                 printf("hfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
1295                 retval = ENXIO;
1296                 goto error_exit;
1297         }
1298
1299         /* Switch to 512 byte sectors (temporarily) */
1300         if (log_blksize > 512) {
1301                 u_int32_t size512 = 512;
1302
1303                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1304                         if (HFS_MOUNT_DEBUG) {
1305                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1306                         }
1307                         retval = ENXIO;
1308                         goto error_exit;
1309                 }
1310         }
1311         /* Get the number of 512 byte physical blocks. */
1312         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1313                 /* resetting block size may fail if getting block count did */
1314                 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1315                 if (HFS_MOUNT_DEBUG) {
1316                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1317                 }
1318                 retval = ENXIO;
1319                 goto error_exit;
1320         }
1321         /* Compute an accurate disk size (i.e. within 512 bytes) */
1322         disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1323
1324         /*
1325          * On Tiger it is not necessary to switch the device
1326          * block size to be 4k if there are more than 31-bits
1327          * worth of blocks but to insure compatibility with
1328          * pre-Tiger systems we have to do it.
1329          *
1330          * If the device size is not a multiple of 4K (8 * 512), then
1331          * switching the logical block size isn't going to help because
1332          * we will be unable to write the alternate volume header.
1333          * In this case, just leave the logical block size unchanged.
1334          */
1335         if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1336                 minblksize = log_blksize = 4096;
1337                 if (phys_blksize < log_blksize)
1338                         phys_blksize = log_blksize;
1339         }
1340
1341         /*
1342          * The cluster layer is not currently prepared to deal with a logical
1343          * block size larger than the system's page size.  (It can handle
1344          * blocks per page, but not multiple pages per block.)  So limit the
1345          * logical block size to the page size.
1346          */
1347         if (log_blksize > PAGE_SIZE)
1348                 log_blksize = PAGE_SIZE;
1349
1350         /* Now switch to our preferred physical block size. */
1351         if (log_blksize > 512) {
1352                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1353                         if (HFS_MOUNT_DEBUG) {
1354                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1355                         }
1356                         retval = ENXIO;
1357                         goto error_exit;
1358                 }
1359                 /* Get the count of physical blocks. */
1360                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1361                         if (HFS_MOUNT_DEBUG) {
1362                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1363                         }
1364                         retval = ENXIO;
1365                         goto error_exit;
1366                 }
1367         }
1368         /*
1369          * At this point:
1370          *   minblksize is the minimum physical block size
1371          *   log_blksize has our preferred physical block size
1372          *   log_blkcnt has the total number of physical blocks
1373          */
1374
1375         mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1376         if ((retval = (int)buf_meta_bread(devvp,
1377                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1378                                 phys_blksize, cred, &bp))) {
1379                 if (HFS_MOUNT_DEBUG) {
1380                         printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1381                 }
1382                 goto error_exit;
1383         }
1384         MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1385         if (mdbp == NULL) {
1386                 retval = ENOMEM;
1387                 if (HFS_MOUNT_DEBUG) {
1388                         printf("hfs_mountfs: MALLOC failed\n");
1389                 }
1390                 goto error_exit;
1391         }
1392         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1393         buf_brelse(bp);
1394         bp = NULL;
1395
1396         MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1397         if (hfsmp == NULL) {
1398                 if (HFS_MOUNT_DEBUG) {
1399                         printf("hfs_mountfs: MALLOC (2) failed\n");
1400                 }
1401                 retval = ENOMEM;
1402                 goto error_exit;
1403         }
1404         bzero(hfsmp, sizeof(struct hfsmount));
1405
1406         hfs_chashinit_finish(hfsmp);
1407
1408         /*
1409          * See if the disk is a solid state device.  We need this to decide what to do about
1410          * hotfiles.
1411          */
1412         if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1413                 if (isssd) {
1414                         hfsmp->hfs_flags |= HFS_SSD;
1415                 }
1416         }
1417
1418
1419         /*
1420          *  Init the volume information structure
1421          */
1422
1423         lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1424         lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1425         lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1426         lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1427         lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1428
1429         vfs_setfsprivate(mp, hfsmp);
1430         hfsmp->hfs_mp = mp;                     /* Make VFSTOHFS work */
1431         hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1432         hfsmp->hfs_devvp = devvp;
1433         vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
1434         hfsmp->hfs_logical_block_size = log_blksize;
1435         hfsmp->hfs_logical_block_count = log_blkcnt;
1436         hfsmp->hfs_physical_block_size = phys_blksize;
1437         hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1438         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1439         if (ronly)
1440                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1441         if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1442                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1443
1444 #if QUOTA
1445         for (i = 0; i < MAXQUOTAS; i++)
1446                 dqfileinit(&hfsmp->hfs_qfiles[i]);
1447 #endif
1448
1449         if (args) {
1450                 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1451                 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1452                 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1453                 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1454                 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                               /* tell the VFS */
1455                 if (args->hfs_mask != (mode_t)VNOVAL) {
1456                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1457                         if (args->flags & HFSFSMNT_NOXONFILES) {
1458                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1459                         } else {
1460                                 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1461                         }
1462                 } else {
1463                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1464                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1465                 }
1466                 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1467                         mntwrapper = 1;
1468         } else {
1469                 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1470                 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1471                         hfsmp->hfs_uid = UNKNOWNUID;
1472                         hfsmp->hfs_gid = UNKNOWNGID;
1473                         vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                       /* tell the VFS */
1474                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1475                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1476                 }
1477         }
1478
1479         /* Find out if disk media is writable. */
1480         if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1481                 if (iswritable)
1482                         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1483                 else
1484                         hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1485         }
1486
1487         // record the current time at which we're mounting this volume
1488         struct timeval tv;
1489         microtime(&tv);
1490         hfsmp->hfs_mount_time = tv.tv_sec;
1491
1492         /* Mount a standard HFS disk */
1493         if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1494             (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1495
1496                 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1497                 if (vfs_isrdwr(mp)) {
1498                         retval = EROFS;
1499                         goto error_exit;
1500                 }
1501
1502                 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1503
1504                 /* Treat it as if it's read-only and not writeable */
1505                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1506                 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1507
1508                 /* If only journal replay is requested, exit immediately */
1509                 if (journal_replay_only) {
1510                         retval = 0;
1511                         goto error_exit;
1512                 }
1513
1514                 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1515                         retval = EINVAL;  /* Cannot root from HFS standard disks */
1516                         goto error_exit;
1517                 }
1518                 /* HFS disks can only use 512 byte physical blocks */
1519                 if (log_blksize > kHFSBlockSize) {
1520                         log_blksize = kHFSBlockSize;
1521                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1522                                 retval = ENXIO;
1523                                 goto error_exit;
1524                         }
1525                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1526                                 retval = ENXIO;
1527                                 goto error_exit;
1528                         }
1529                         hfsmp->hfs_logical_block_size = log_blksize;
1530                         hfsmp->hfs_logical_block_count = log_blkcnt;
1531                         hfsmp->hfs_physical_block_size = log_blksize;
1532                         hfsmp->hfs_log_per_phys = 1;
1533                 }
1534                 if (args) {
1535                         hfsmp->hfs_encoding = args->hfs_encoding;
1536                         HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1537
1538                         /* establish the timezone */
1539                         gTimeZone = args->hfs_timezone;
1540                 }
1541
1542                 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1543                                         &hfsmp->hfs_get_hfsname);
1544                 if (retval)
1545                         goto error_exit;
1546
1547                 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1548                 if (retval)
1549                         (void) hfs_relconverter(hfsmp->hfs_encoding);
1550
1551         } else /* Mount an HFS Plus disk */ {
1552                 HFSPlusVolumeHeader *vhp;
1553                 off_t embeddedOffset;
1554                 int   jnl_disable = 0;
1555
1556                 /* Get the embedded Volume Header */
1557                 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1558                         embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1559                         embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1560                                           (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1561
1562                         /*
1563                          * If the embedded volume doesn't start on a block
1564                          * boundary, then switch the device to a 512-byte
1565                          * block size so everything will line up on a block
1566                          * boundary.
1567                          */
1568                         if ((embeddedOffset % log_blksize) != 0) {
1569                                 printf("hfs_mountfs: embedded volume offset not"
1570                                     " a multiple of physical block size (%d);"
1571                                     " switching to 512\n", log_blksize);
1572                                 log_blksize = 512;
1573                                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1574                                     (caddr_t)&log_blksize, FWRITE, context)) {
1575
1576                                         if (HFS_MOUNT_DEBUG) {
1577                                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1578                                         }
1579                                         retval = ENXIO;
1580                                         goto error_exit;
1581                                 }
1582                                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1583                                     (caddr_t)&log_blkcnt, 0, context)) {
1584                                         if (HFS_MOUNT_DEBUG) {
1585                                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1586                                         }
1587                                         retval = ENXIO;
1588                                         goto error_exit;
1589                                 }
1590                                 /* Note: relative block count adjustment */
1591                                 hfsmp->hfs_logical_block_count *=
1592                                     hfsmp->hfs_logical_block_size / log_blksize;
1593
1594                                 /* Update logical /physical block size */
1595                                 hfsmp->hfs_logical_block_size = log_blksize;
1596                                 hfsmp->hfs_physical_block_size = log_blksize;
1597                                 phys_blksize = log_blksize;
1598                                 hfsmp->hfs_log_per_phys = 1;
1599                         }
1600
1601                         disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1602                                    (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1603
1604                         hfsmp->hfs_logical_block_count = disksize / log_blksize;
1605
1606                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1607                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1608                                         phys_blksize, cred, &bp);
1609                         if (retval) {
1610                                 if (HFS_MOUNT_DEBUG) {
1611                                         printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1612                                 }
1613                                 goto error_exit;
1614                         }
1615                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1616                         buf_brelse(bp);
1617                         bp = NULL;
1618                         vhp = (HFSPlusVolumeHeader*) mdbp;
1619
1620                 } else /* pure HFS+ */ {
1621                         embeddedOffset = 0;
1622                         vhp = (HFSPlusVolumeHeader*) mdbp;
1623                 }
1624
1625                 if (isroot) {
1626                         hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1627                 }
1628
1629                 /*
1630                  * On inconsistent disks, do not allow read-write mount
1631                  * unless it is the boot volume being mounted.  We also
1632                  * always want to replay the journal if the journal_replay_only
1633                  * flag is set because that will (most likely) get the
1634                  * disk into a consistent state before fsck_hfs starts
1635                  * looking at it.
1636                  */
1637                 if (  !(vfs_flags(mp) & MNT_ROOTFS)
1638                    && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1639                    && !journal_replay_only
1640                    && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1641
1642                         if (HFS_MOUNT_DEBUG) {
1643                                 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1644                         }
1645                         retval = EINVAL;
1646                         goto error_exit;
1647                 }
1648
1649
1650                 // XXXdbg
1651                 //
1652                 hfsmp->jnl = NULL;
1653                 hfsmp->jvp = NULL;
1654                 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1655                     args->journal_disable) {
1656                     jnl_disable = 1;
1657                 }
1658
1659                 //
1660                 // We only initialize the journal here if the last person
1661                 // to mount this volume was journaling aware.  Otherwise
1662                 // we delay journal initialization until later at the end
1663                 // of hfs_MountHFSPlusVolume() because the last person who
1664                 // mounted it could have messed things up behind our back
1665                 // (so we need to go find the .journal file, make sure it's
1666                 // the right size, re-sync up if it was moved, etc).
1667                 //
1668                 if (   (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1669                         && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1670                         && !jnl_disable) {
1671
1672                         // if we're able to init the journal, mark the mount
1673                         // point as journaled.
1674                         //
1675                         if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1676                                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1677                         } else {
1678                                 if (retval == EROFS) {
1679                                         // EROFS is a special error code that means the volume has an external
1680                                         // journal which we couldn't find.  in that case we do not want to
1681                                         // rewrite the volume header - we'll just refuse to mount the volume.
1682                                         if (HFS_MOUNT_DEBUG) {
1683                                                 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1684                                         }
1685                                         retval = EINVAL;
1686                                         goto error_exit;
1687                                 }
1688
1689                                 // if the journal failed to open, then set the lastMountedVersion
1690                                 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1691                                 // of just bailing out because the volume is journaled.
1692                                 if (!ronly) {
1693                                         if (HFS_MOUNT_DEBUG) {
1694                                                 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1695                                         }
1696
1697                                         HFSPlusVolumeHeader *jvhp;
1698
1699                                     hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1700
1701                                     if (mdb_offset == 0) {
1702                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1703                                     }
1704
1705                                     bp = NULL;
1706                                     retval = (int)buf_meta_bread(devvp,
1707                                                     HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1708                                                     phys_blksize, cred, &bp);
1709                                     if (retval == 0) {
1710                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1711
1712                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1713                                                 printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1714                                             jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1715                                             buf_bwrite(bp);
1716                                         } else {
1717                                             buf_brelse(bp);
1718                                         }
1719                                         bp = NULL;
1720                                     } else if (bp) {
1721                                         buf_brelse(bp);
1722                                         // clear this so the error exit path won't try to use it
1723                                         bp = NULL;
1724                                     }
1725                                 }
1726
1727                                 // if this isn't the root device just bail out.
1728                                 // If it is the root device we just continue on
1729                                 // in the hopes that fsck_hfs will be able to
1730                                 // fix any damage that exists on the volume.
1731                                 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1732                                         if (HFS_MOUNT_DEBUG) {
1733                                                 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1734                                         }
1735                                     retval = EINVAL;
1736                                     goto error_exit;
1737                                 }
1738                         }
1739                 }
1740                 // XXXdbg
1741
1742                 /* Either the journal is replayed successfully, or there
1743                  * was nothing to replay, or no journal exists.  In any case,
1744                  * return success.
1745                  */
1746                 if (journal_replay_only) {
1747                         retval = 0;
1748                         goto error_exit;
1749                 }
1750
1751                 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1752
1753                 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1754                 /*
1755                  * If the backend didn't like our physical blocksize
1756                  * then retry with physical blocksize of 512.
1757                  */
1758                 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1759                         printf("hfs_mountfs: could not use physical block size "
1760                                 "(%d) switching to 512\n", log_blksize);
1761                         log_blksize = 512;
1762                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1763                                 if (HFS_MOUNT_DEBUG) {
1764                                         printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1765                                 }
1766                                 retval = ENXIO;
1767                                 goto error_exit;
1768                         }
1769                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1770                                 if (HFS_MOUNT_DEBUG) {
1771                                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1772                                 }
1773                                 retval = ENXIO;
1774                                 goto error_exit;
1775                         }
1776                         devvp->v_specsize = log_blksize;
1777                         /* Note: relative block count adjustment (in case this is an embedded volume). */
1778                         hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1779                         hfsmp->hfs_logical_block_size = log_blksize;
1780                         hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1781
1782                         if (hfsmp->jnl && hfsmp->jvp == devvp) {
1783                             // close and re-open this with the new block size
1784                             journal_close(hfsmp->jnl);
1785                             hfsmp->jnl = NULL;
1786                             if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1787                                         vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1788                                 } else {
1789                                         // if the journal failed to open, then set the lastMountedVersion
1790                                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
1791                                         // of just bailing out because the volume is journaled.
1792                                         if (!ronly) {
1793                                                 if (HFS_MOUNT_DEBUG) {
1794                                                         printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1795                                                 }
1796                                         HFSPlusVolumeHeader *jvhp;
1797
1798                                         hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1799
1800                                         if (mdb_offset == 0) {
1801                                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1802                                         }
1803
1804                                                 bp = NULL;
1805                                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1806                                                         phys_blksize, cred, &bp);
1807                                         if (retval == 0) {
1808                                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1809
1810                                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1811                                                                 printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1812                                                         jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1813                                                         buf_bwrite(bp);
1814                                                         } else {
1815                                                         buf_brelse(bp);
1816                                                         }
1817                                                         bp = NULL;
1818                                         } else if (bp) {
1819                                                         buf_brelse(bp);
1820                                                         // clear this so the error exit path won't try to use it
1821                                                         bp = NULL;
1822                                         }
1823                                         }
1824
1825                                         // if this isn't the root device just bail out.
1826                                         // If it is the root device we just continue on
1827                                         // in the hopes that fsck_hfs will be able to
1828                                         // fix any damage that exists on the volume.
1829                                         if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1830                                                 if (HFS_MOUNT_DEBUG) {
1831                                                         printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1832                                                 }
1833                                         retval = EINVAL;
1834                                         goto error_exit;
1835                                         }
1836                                 }
1837                         }
1838
1839                         /* Try again with a smaller block size... */
1840                         retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1841                         if (retval && HFS_MOUNT_DEBUG) {
1842                                 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1843                         }
1844                 }
1845                 if (retval)
1846                         (void) hfs_relconverter(0);
1847         }
1848
1849         // save off a snapshot of the mtime from the previous mount
1850         // (for matador).
1851         hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1852
1853         if ( retval ) {
1854                 if (HFS_MOUNT_DEBUG) {
1855                         printf("hfs_mountfs: encountered failure %d \n", retval);
1856                 }
1857                 goto error_exit;
1858         }
1859
1860         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1861         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1862         vfs_setmaxsymlen(mp, 0);
1863
1864         mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1865 #if NAMEDSTREAMS
1866         mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1867 #endif
1868         if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1869                 /* Tell VFS that we support directory hard links. */
1870                 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1871         } else {
1872                 /* HFS standard doesn't support extended readdir! */
1873                 mount_set_noreaddirext (mp);
1874         }
1875
1876         if (args) {
1877                 /*
1878                  * Set the free space warning levels for a non-root volume:
1879                  *
1880                  * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1881                  * is less.  Set the "warning" limit to 2% of the volume size or 150MB,
1882                  * whichever is less.  And last, set the "desired" freespace level to
1883                  * to 3% of the volume size or 200MB, whichever is less.
1884                  */
1885                 hfsmp->hfs_freespace_notify_dangerlimit =
1886                         MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1887                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1888                 hfsmp->hfs_freespace_notify_warninglimit =
1889                         MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1890                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1891                 hfsmp->hfs_freespace_notify_desiredlevel =
1892                         MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1893                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1894         } else {
1895                 /*
1896                  * Set the free space warning levels for the root volume:
1897                  *
1898                  * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1899                  * is less.  Set the "warning" limit to 10% of the volume size or 1GB,
1900                  * whichever is less.  And last, set the "desired" freespace level to
1901                  * to 11% of the volume size or 1.25GB, whichever is less.
1902                  */
1903                 hfsmp->hfs_freespace_notify_dangerlimit =
1904                         MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1905                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1906                 hfsmp->hfs_freespace_notify_warninglimit =
1907                         MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1908                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1909                 hfsmp->hfs_freespace_notify_desiredlevel =
1910                         MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1911                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1912         };
1913
1914         /* Check if the file system exists on virtual device, like disk image */
1915         if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1916                 if (isvirtual) {
1917                         hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1918                 }
1919         }
1920
1921         /* do not allow ejectability checks on the root device */
1922         if (isroot == 0) {
1923                 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1924                                 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1925                         hfsmp->hfs_max_pending_io = 4096*1024;   // a reasonable value to start with.
1926                         hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1927                         if (hfsmp->hfs_syncer == NULL) {
1928                                 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1929                                                 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1930                         }
1931                 }
1932         }
1933
1934 #if CONFIG_HFS_ALLOC_RBTREE
1935         /*
1936          * We spawn a thread to create the pair of red-black trees for this volume.
1937          * However, in so doing, we must be careful to ensure that if this thread is still
1938          * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1939          * we'll need to set a bit that indicates we're in progress building the trees here.
1940          * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1941          * notifies the tree generation code that an unmount is waiting.  Also mark the bit that
1942          * indicates the tree is live and operating.
1943          *
1944          * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1945          */
1946
1947         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1948                 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1949
1950                 /* Initialize EOF counter so that the thread can assume it started at initial values */
1951                 hfsmp->offset_block_end = 0;
1952                 InitTree(hfsmp);
1953
1954                 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1955                 thread_deallocate(allocator_thread);
1956         }
1957
1958 #endif
1959
1960         /*
1961          * Start looking for free space to drop below this level and generate a
1962          * warning immediately if needed:
1963          */
1964         hfsmp->hfs_notification_conditions = 0;
1965         hfs_generate_volume_notifications(hfsmp);
1966
1967         if (ronly == 0) {
1968                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1969         }
1970         FREE(mdbp, M_TEMP);
1971         return (0);
1972
1973 error_exit:
1974         if (bp)
1975                 buf_brelse(bp);
1976         if (mdbp)
1977                 FREE(mdbp, M_TEMP);
1978
1979         if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1980                 vnode_clearmountedon(hfsmp->jvp);
1981                 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1982                 hfsmp->jvp = NULL;
1983         }
1984         if (hfsmp) {
1985                 if (hfsmp->hfs_devvp) {
1986                         vnode_rele(hfsmp->hfs_devvp);
1987                 }
1988                 hfs_delete_chash(hfsmp);
1989
1990                 FREE(hfsmp, M_HFSMNT);
1991                 vfs_setfsprivate(mp, NULL);
1992         }
1993         return (retval);
1994 }
1995
1996
1997 /*
1998  * Make a filesystem operational.
1999  * Nothing to do at the moment.
2000  */
2001 /* ARGSUSED */
2002 static int
2003 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2004 {
2005         return (0);
2006 }
2007
2008
2009 /*
2010  * unmount system call
2011  */
2012 int
2013 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2014 {
2015         struct proc *p = vfs_context_proc(context);
2016         struct hfsmount *hfsmp = VFSTOHFS(mp);
2017         int retval = E_NONE;
2018         int flags;
2019         int force;
2020         int started_tr = 0;
2021         int rb_used = 0;
2022
2023         flags = 0;
2024         force = 0;
2025         if (mntflags & MNT_FORCE) {
2026                 flags |= FORCECLOSE;
2027                 force = 1;
2028         }
2029
2030         if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2031                 return (retval);
2032
2033         if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2034                 (void) hfs_recording_suspend(hfsmp);
2035
2036         /*
2037          * Cancel any pending timers for this volume.  Then wait for any timers
2038          * which have fired, but whose callbacks have not yet completed.
2039          */
2040         if (hfsmp->hfs_syncer)
2041         {
2042                 struct timespec ts = {0, 100000000};    /* 0.1 seconds */
2043
2044                 /*
2045                  * Cancel any timers that have been scheduled, but have not
2046                  * fired yet.  NOTE: The kernel considers a timer complete as
2047                  * soon as it starts your callback, so the kernel does not
2048                  * keep track of the number of callbacks in progress.
2049                  */
2050                 if (thread_call_cancel(hfsmp->hfs_syncer))
2051                         OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2052                 thread_call_free(hfsmp->hfs_syncer);
2053                 hfsmp->hfs_syncer = NULL;
2054
2055                 /*
2056                  * This waits for all of the callbacks that were entered before
2057                  * we did thread_call_cancel above, but have not completed yet.
2058                  */
2059                 while(hfsmp->hfs_sync_incomplete > 0)
2060                 {
2061                         msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2062                 }
2063
2064                 if (hfsmp->hfs_sync_incomplete < 0)
2065                         panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2066         }
2067
2068 #if CONFIG_HFS_ALLOC_RBTREE
2069         rb_used = hfs_teardown_allocator(hfsmp);
2070 #endif
2071
2072         /*
2073          * Flush out the b-trees, volume bitmap and Volume Header
2074          */
2075         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2076                 retval = hfs_start_transaction(hfsmp);
2077                 if (retval == 0) {
2078                     started_tr = 1;
2079                 } else if (!force) {
2080                     goto err_exit;
2081                 }
2082
2083                 if (hfsmp->hfs_startup_vp) {
2084                         (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2085                         retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2086                         hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2087                         if (retval && !force)
2088                                 goto err_exit;
2089                 }
2090
2091                 if (hfsmp->hfs_attribute_vp) {
2092                         (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2093                         retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2094                         hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2095                         if (retval && !force)
2096                                 goto err_exit;
2097                 }
2098
2099                 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2100                 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2101                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2102                 if (retval && !force)
2103                         goto err_exit;
2104
2105                 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2106                 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2107                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2108                 if (retval && !force)
2109                         goto err_exit;
2110
2111                 if (hfsmp->hfs_allocation_vp) {
2112                         (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2113                         retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2114                         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2115                         if (retval && !force)
2116                                 goto err_exit;
2117                 }
2118
2119                 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2120                         retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2121                         if (retval && !force)
2122                                 goto err_exit;
2123                 }
2124
2125                 /* If runtime corruption was detected, indicate that the volume
2126                  * was not unmounted cleanly.
2127                  */
2128                 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2129                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2130                 } else {
2131                         HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2132                 }
2133
2134
2135                 if (rb_used) {
2136                         /* If the rb-tree was live, just set min_start to 0 */
2137                         hfsmp->nextAllocation = 0;
2138                 }
2139                 else {
2140                         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2141                                 int i;
2142                                 u_int32_t min_start = hfsmp->totalBlocks;
2143
2144                                 // set the nextAllocation pointer to the smallest free block number
2145                                 // we've seen so on the next mount we won't rescan unnecessarily
2146                                 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2147                                 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2148                                         if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2149                                                 min_start = hfsmp->vcbFreeExt[i].startBlock;
2150                                         }
2151                                 }
2152                                 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2153                                 if (min_start < hfsmp->nextAllocation) {
2154                                         hfsmp->nextAllocation = min_start;
2155                                 }
2156                         }
2157                 }
2158
2159
2160                 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2161                 if (retval) {
2162                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2163                         if (!force)
2164                                 goto err_exit;  /* could not flush everything */
2165                 }
2166
2167                 if (started_tr) {
2168                     hfs_end_transaction(hfsmp);
2169                     started_tr = 0;
2170                 }
2171         }
2172
2173         if (hfsmp->jnl) {
2174                 hfs_journal_flush(hfsmp, FALSE);
2175         }
2176
2177         /*
2178          *      Invalidate our caches and release metadata vnodes
2179          */
2180         (void) hfsUnmount(hfsmp, p);
2181
2182         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2183                 (void) hfs_relconverter(hfsmp->hfs_encoding);
2184
2185         // XXXdbg
2186         if (hfsmp->jnl) {
2187             journal_close(hfsmp->jnl);
2188             hfsmp->jnl = NULL;
2189         }
2190
2191         VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2192
2193         if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2194             vnode_clearmountedon(hfsmp->jvp);
2195             retval = VNOP_CLOSE(hfsmp->jvp,
2196                                hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2197                                vfs_context_kernel());
2198             vnode_put(hfsmp->jvp);
2199             hfsmp->jvp = NULL;
2200         }
2201         // XXXdbg
2202
2203         /*
2204          * Last chance to dump unreferenced system files.
2205          */
2206         (void) vflush(mp, NULLVP, FORCECLOSE);
2207
2208 #if HFS_SPARSE_DEV
2209         /* Drop our reference on the backing fs (if any). */
2210         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2211                 struct vnode * tmpvp;
2212
2213                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2214                 tmpvp = hfsmp->hfs_backingfs_rootvp;
2215                 hfsmp->hfs_backingfs_rootvp = NULLVP;
2216                 vnode_rele(tmpvp);
2217         }
2218 #endif /* HFS_SPARSE_DEV */
2219         lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2220         lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2221         vnode_rele(hfsmp->hfs_devvp);
2222
2223         hfs_delete_chash(hfsmp);
2224         FREE(hfsmp, M_HFSMNT);
2225
2226         return (0);
2227
2228   err_exit:
2229         if (started_tr) {
2230                 hfs_end_transaction(hfsmp);
2231         }
2232         return retval;
2233 }
2234
2235
2236 /*
2237  * Return the root of a filesystem.
2238  */
2239 static int
2240 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2241 {
2242         return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2243 }
2244
2245
2246 /*
2247  * Do operations associated with quotas
2248  */
2249 #if !QUOTA
2250 static int
2251 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2252 {
2253         return (ENOTSUP);
2254 }
2255 #else
2256 static int
2257 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2258 {
2259         struct proc *p = vfs_context_proc(context);
2260         int cmd, type, error;
2261
2262         if (uid == ~0U)
2263                 uid = kauth_cred_getuid(vfs_context_ucred(context));
2264         cmd = cmds >> SUBCMDSHIFT;
2265
2266         switch (cmd) {
2267         case Q_SYNC:
2268         case Q_QUOTASTAT:
2269                 break;
2270         case Q_GETQUOTA:
2271                 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2272                         break;
2273                 /* fall through */
2274         default:
2275                 if ( (error = vfs_context_suser(context)) )
2276                         return (error);
2277         }
2278
2279         type = cmds & SUBCMDMASK;
2280         if ((u_int)type >= MAXQUOTAS)
2281                 return (EINVAL);
2282         if (vfs_busy(mp, LK_NOWAIT))
2283                 return (0);
2284
2285         switch (cmd) {
2286
2287         case Q_QUOTAON:
2288                 error = hfs_quotaon(p, mp, type, datap);
2289                 break;
2290
2291         case Q_QUOTAOFF:
2292                 error = hfs_quotaoff(p, mp, type);
2293                 break;
2294
2295         case Q_SETQUOTA:
2296                 error = hfs_setquota(mp, uid, type, datap);
2297                 break;
2298
2299         case Q_SETUSE:
2300                 error = hfs_setuse(mp, uid, type, datap);
2301                 break;
2302
2303         case Q_GETQUOTA:
2304                 error = hfs_getquota(mp, uid, type, datap);
2305                 break;
2306
2307         case Q_SYNC:
2308                 error = hfs_qsync(mp);
2309                 break;
2310
2311         case Q_QUOTASTAT:
2312                 error = hfs_quotastat(mp, type, datap);
2313                 break;
2314
2315         default:
2316                 error = EINVAL;
2317                 break;
2318         }
2319         vfs_unbusy(mp);
2320
2321         return (error);
2322 }
2323 #endif /* QUOTA */
2324
2325 /* Subtype is composite of bits */
2326 #define HFS_SUBTYPE_JOURNALED      0x01
2327 #define HFS_SUBTYPE_CASESENSITIVE  0x02
2328 /* bits 2 - 6 reserved */
2329 #define HFS_SUBTYPE_STANDARDHFS    0x80
2330
2331 /*
2332  * Get file system statistics.
2333  */
2334 int
2335 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2336 {
2337         ExtendedVCB *vcb = VFSTOVCB(mp);
2338         struct hfsmount *hfsmp = VFSTOHFS(mp);
2339         u_int32_t freeCNIDs;
2340         u_int16_t subtype = 0;
2341
2342         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2343
2344         sbp->f_bsize = (u_int32_t)vcb->blockSize;
2345         sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2346         sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2347         sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2348         sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2349         sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2));  /* max files is constrained by total blocks */
2350         sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2351
2352         /*
2353          * Subtypes (flavors) for HFS
2354          *   0:   Mac OS Extended
2355          *   1:   Mac OS Extended (Journaled)
2356          *   2:   Mac OS Extended (Case Sensitive)
2357          *   3:   Mac OS Extended (Case Sensitive, Journaled)
2358          *   4 - 127:   Reserved
2359          * 128:   Mac OS Standard
2360          *
2361          */
2362         if (hfsmp->hfs_flags & HFS_STANDARD) {
2363                 subtype = HFS_SUBTYPE_STANDARDHFS;
2364         } else /* HFS Plus */ {
2365                 if (hfsmp->jnl)
2366                         subtype |= HFS_SUBTYPE_JOURNALED;
2367                 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2368                         subtype |= HFS_SUBTYPE_CASESENSITIVE;
2369         }
2370         sbp->f_fssubtype = subtype;
2371
2372         return (0);
2373 }
2374
2375
2376 //
2377 // XXXdbg -- this is a callback to be used by the journal to
2378 //           get meta data blocks flushed out to disk.
2379 //
2380 // XXXdbg -- be smarter and don't flush *every* block on each
2381 //           call.  try to only flush some so we don't wind up
2382 //           being too synchronous.
2383 //
2384 __private_extern__
2385 void
2386 hfs_sync_metadata(void *arg)
2387 {
2388         struct mount *mp = (struct mount *)arg;
2389         struct hfsmount *hfsmp;
2390         ExtendedVCB *vcb;
2391         buf_t   bp;
2392         int  retval;
2393         daddr64_t priIDSector;
2394         hfsmp = VFSTOHFS(mp);
2395         vcb = HFSTOVCB(hfsmp);
2396
2397         // now make sure the super block is flushed
2398         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2399                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2400
2401         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2402                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2403                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
2404         if ((retval != 0 ) && (retval != ENXIO)) {
2405                 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2406                        (int)priIDSector, retval);
2407         }
2408
2409         if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2410             buf_bwrite(bp);
2411         } else if (bp) {
2412             buf_brelse(bp);
2413         }
2414
2415         // the alternate super block...
2416         // XXXdbg - we probably don't need to do this each and every time.
2417         //          hfs_btreeio.c:FlushAlternate() should flag when it was
2418         //          written...
2419         if (hfsmp->hfs_alt_id_sector) {
2420                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2421                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2422                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2423                 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2424                     buf_bwrite(bp);
2425                 } else if (bp) {
2426                     buf_brelse(bp);
2427                 }
2428         }
2429 }
2430
2431
2432 struct hfs_sync_cargs {
2433         kauth_cred_t cred;
2434         struct proc  *p;
2435         int    waitfor;
2436         int    error;
2437 };
2438
2439
2440 static int
2441 hfs_sync_callback(struct vnode *vp, void *cargs)
2442 {
2443         struct cnode *cp;
2444         struct hfs_sync_cargs *args;
2445         int error;
2446
2447         args = (struct hfs_sync_cargs *)cargs;
2448
2449         if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2450                 return (VNODE_RETURNED);
2451         }
2452         cp = VTOC(vp);
2453
2454         if ((cp->c_flag & C_MODIFIED) ||
2455             (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2456             vnode_hasdirtyblks(vp)) {
2457                 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2458
2459                 if (error)
2460                         args->error = error;
2461         }
2462         hfs_unlock(cp);
2463         return (VNODE_RETURNED);
2464 }
2465
2466
2467
2468 /*
2469  * Go through the disk queues to initiate sandbagged IO;
2470  * go through the inodes to write those that have been modified;
2471  * initiate the writing of the super block if it has been modified.
2472  *
2473  * Note: we are always called with the filesystem marked `MPBUSY'.
2474  */
2475 int
2476 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2477 {
2478         struct proc *p = vfs_context_proc(context);
2479         struct cnode *cp;
2480         struct hfsmount *hfsmp;
2481         ExtendedVCB *vcb;
2482         struct vnode *meta_vp[4];
2483         int i;
2484         int error, allerror = 0;
2485         struct hfs_sync_cargs args;
2486
2487         hfsmp = VFSTOHFS(mp);
2488
2489         /*
2490          * hfs_changefs might be manipulating vnodes so back off
2491          */
2492         if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2493                 return (0);
2494
2495         if (hfsmp->hfs_flags & HFS_READ_ONLY)
2496                 return (EROFS);
2497
2498         /* skip over frozen volumes */
2499         if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2500                 return 0;
2501
2502         args.cred = kauth_cred_get();
2503         args.waitfor = waitfor;
2504         args.p = p;
2505         args.error = 0;
2506         /*
2507          * hfs_sync_callback will be called for each vnode
2508          * hung off of this mount point... the vnode will be
2509          * properly referenced and unreferenced around the callback
2510          */
2511         vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2512
2513         if (args.error)
2514                 allerror = args.error;
2515
2516         vcb = HFSTOVCB(hfsmp);
2517
2518         meta_vp[0] = vcb->extentsRefNum;
2519         meta_vp[1] = vcb->catalogRefNum;
2520         meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
2521         meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2522
2523         /* Now sync our three metadata files */
2524         for (i = 0; i < 4; ++i) {
2525                 struct vnode *btvp;
2526
2527                 btvp = meta_vp[i];;
2528                 if ((btvp==0) || (vnode_mount(btvp) != mp))
2529                         continue;
2530
2531                 /* XXX use hfs_systemfile_lock instead ? */
2532                 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2533                 cp = VTOC(btvp);
2534
2535                 if (((cp->c_flag &  C_MODIFIED) == 0) &&
2536                     (cp->c_touch_acctime == 0) &&
2537                     (cp->c_touch_chgtime == 0) &&
2538                     (cp->c_touch_modtime == 0) &&
2539                     vnode_hasdirtyblks(btvp) == 0) {
2540                         hfs_unlock(VTOC(btvp));
2541                         continue;
2542                 }
2543                 error = vnode_get(btvp);
2544                 if (error) {
2545                         hfs_unlock(VTOC(btvp));
2546                         continue;
2547                 }
2548                 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2549                         allerror = error;
2550
2551                 hfs_unlock(cp);
2552                 vnode_put(btvp);
2553         };
2554
2555         /*
2556          * Force stale file system control information to be flushed.
2557          */
2558         if (vcb->vcbSigWord == kHFSSigWord) {
2559                 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2560                         allerror = error;
2561                 }
2562         }
2563 #if QUOTA
2564         hfs_qsync(mp);
2565 #endif /* QUOTA */
2566
2567         hfs_hotfilesync(hfsmp, vfs_context_kernel());
2568
2569         /*
2570          * Write back modified superblock.
2571          */
2572         if (IsVCBDirty(vcb)) {
2573                 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2574                 if (error)
2575                         allerror = error;
2576         }
2577
2578         if (hfsmp->jnl) {
2579             hfs_journal_flush(hfsmp, FALSE);
2580         }
2581
2582         {
2583                 clock_sec_t secs;
2584                 clock_usec_t usecs;
2585                 uint64_t now;
2586
2587                 clock_get_calendar_microtime(&secs, &usecs);
2588                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2589                 hfsmp->hfs_last_sync_time = now;
2590         }
2591
2592         lck_rw_unlock_shared(&hfsmp->hfs_insync);
2593         return (allerror);
2594 }
2595
2596
2597 /*
2598  * File handle to vnode
2599  *
2600  * Have to be really careful about stale file handles:
2601  * - check that the cnode id is valid
2602  * - call hfs_vget() to get the locked cnode
2603  * - check for an unallocated cnode (i_mode == 0)
2604  * - check that the given client host has export rights and return
2605  *   those rights via. exflagsp and credanonp
2606  */
2607 static int
2608 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2609 {
2610         struct hfsfid *hfsfhp;
2611         struct vnode *nvp;
2612         int result;
2613
2614         *vpp = NULL;
2615         hfsfhp = (struct hfsfid *)fhp;
2616
2617         if (fhlen < (int)sizeof(struct hfsfid))
2618                 return (EINVAL);
2619
2620         result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2621         if (result) {
2622                 if (result == ENOENT)
2623                         result = ESTALE;
2624                 return result;
2625         }
2626
2627         /*
2628          * We used to use the create time as the gen id of the file handle,
2629          * but it is not static enough because it can change at any point
2630          * via system calls.  We still don't have another volume ID or other
2631          * unique identifier to use for a generation ID across reboots that
2632          * persists until the file is removed.  Using only the CNID exposes
2633          * us to the potential wrap-around case, but as of 2/2008, it would take
2634          * over 2 months to wrap around if the machine did nothing but allocate
2635          * CNIDs.  Using some kind of wrap counter would only be effective if
2636          * each file had the wrap counter associated with it.  For now,
2637          * we use only the CNID to identify the file as it's good enough.
2638          */
2639
2640         *vpp = nvp;
2641
2642         hfs_unlock(VTOC(nvp));
2643         return (0);
2644 }
2645
2646
2647 /*
2648  * Vnode pointer to File handle
2649  */
2650 /* ARGSUSED */
2651 static int
2652 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2653 {
2654         struct cnode *cp;
2655         struct hfsfid *hfsfhp;
2656
2657         if (ISHFS(VTOVCB(vp)))
2658                 return (ENOTSUP);       /* hfs standard is not exportable */
2659
2660         if (*fhlenp < (int)sizeof(struct hfsfid))
2661                 return (EOVERFLOW);
2662
2663         cp = VTOC(vp);
2664         hfsfhp = (struct hfsfid *)fhp;
2665         /* only the CNID is used to identify the file now */
2666         hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2667         hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2668         *fhlenp = sizeof(struct hfsfid);
2669
2670         return (0);
2671 }
2672
2673
2674 /*
2675  * Initial HFS filesystems, done only once.
2676  */
2677 static int
2678 hfs_init(__unused struct vfsconf *vfsp)
2679 {
2680         static int done = 0;
2681
2682         if (done)
2683                 return (0);
2684         done = 1;
2685         hfs_chashinit();
2686         hfs_converterinit();
2687
2688         BTReserveSetup();
2689
2690
2691         hfs_lock_attr    = lck_attr_alloc_init();
2692         hfs_group_attr   = lck_grp_attr_alloc_init();
2693         hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2694         hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2695         hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2696
2697 #if HFS_COMPRESSION
2698     decmpfs_init();
2699 #endif
2700
2701         return (0);
2702 }
2703
2704 static int
2705 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2706 {
2707         struct hfsmount * hfsmp;
2708         char fstypename[MFSNAMELEN];
2709
2710         if (vp == NULL)
2711                 return (EINVAL);
2712
2713         if (!vnode_isvroot(vp))
2714                 return (EINVAL);
2715
2716         vnode_vfsname(vp, fstypename);
2717         if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2718                 return (EINVAL);
2719
2720         hfsmp = VTOHFS(vp);
2721
2722         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2723                 return (EINVAL);
2724
2725         *hfsmpp = hfsmp;
2726
2727         return (0);
2728 }
2729
2730 // XXXdbg
2731 #include <sys/filedesc.h>
2732
2733 /*
2734  * HFS filesystem related variables.
2735  */
2736 int
2737 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2738                         user_addr_t newp, size_t newlen, vfs_context_t context)
2739 {
2740         struct proc *p = vfs_context_proc(context);
2741         int error;
2742         struct hfsmount *hfsmp;
2743
2744         /* all sysctl names at this level are terminal */
2745
2746         if (name[0] == HFS_ENCODINGBIAS) {
2747                 int bias;
2748
2749                 bias = hfs_getencodingbias();
2750                 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2751                 if (error == 0 && newp)
2752                         hfs_setencodingbias(bias);
2753                 return (error);
2754
2755         } else if (name[0] == HFS_EXTEND_FS) {
2756         u_int64_t  newsize;
2757                 vnode_t vp = vfs_context_cwd(context);
2758
2759                 if (newp == USER_ADDR_NULL || vp == NULLVP)
2760                         return (EINVAL);
2761                 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2762                         return (error);
2763                 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2764                 if (error)
2765                         return (error);
2766
2767                 error = hfs_extendfs(hfsmp, newsize, context);
2768                 return (error);
2769
2770         } else if (name[0] == HFS_ENCODINGHINT) {
2771                 size_t bufsize;
2772                 size_t bytes;
2773                 u_int32_t hint;
2774                 u_int16_t *unicode_name = NULL;
2775                 char *filename = NULL;
2776
2777                 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2778                         return (EINVAL);
2779
2780                 bufsize = MAX(newlen * 3, MAXPATHLEN);
2781                 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2782                 if (filename == NULL) {
2783                         error = ENOMEM;
2784                         goto encodinghint_exit;
2785                 }
2786                 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2787                 if (filename == NULL) {
2788                         error = ENOMEM;
2789                         goto encodinghint_exit;
2790                 }
2791
2792                 error = copyin(newp, (caddr_t)filename, newlen);
2793                 if (error == 0) {
2794                         error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2795                                                &bytes, bufsize, 0, UTF_DECOMPOSED);
2796                         if (error == 0) {
2797                                 hint = hfs_pickencoding(unicode_name, bytes / 2);
2798                                 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2799                         }
2800                 }
2801
2802 encodinghint_exit:
2803                 if (unicode_name)
2804                         FREE(unicode_name, M_TEMP);
2805                 if (filename)
2806                         FREE(filename, M_TEMP);
2807                 return (error);
2808
2809         } else if (name[0] == HFS_ENABLE_JOURNALING) {
2810                 // make the file system journaled...
2811                 vnode_t vp = vfs_context_cwd(context);
2812                 vnode_t jvp;
2813                 ExtendedVCB *vcb;
2814                 struct cat_attr jnl_attr, jinfo_attr;
2815                 struct cat_fork jnl_fork, jinfo_fork;
2816                 void *jnl = NULL;
2817                 int lockflags;
2818
2819                 /* Only root can enable journaling */
2820                 if (!is_suser()) {
2821                         return (EPERM);
2822                 }
2823                 if (vp == NULLVP)
2824                         return EINVAL;
2825
2826                 hfsmp = VTOHFS(vp);
2827                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2828                         return EROFS;
2829                 }
2830                 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2831                         printf("hfs: can't make a plain hfs volume journaled.\n");
2832                         return EINVAL;
2833                 }
2834
2835                 if (hfsmp->jnl) {
2836                     printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2837                     return EAGAIN;
2838                 }
2839
2840                 vcb = HFSTOVCB(hfsmp);
2841                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2842                 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2843                         BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2844
2845                         printf("hfs: volume has a btree w/non-contiguous nodes.  can not enable journaling.\n");
2846                         hfs_systemfile_unlock(hfsmp, lockflags);
2847                         return EINVAL;
2848                 }
2849                 hfs_systemfile_unlock(hfsmp, lockflags);
2850
2851                 // make sure these both exist!
2852                 if (   GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2853                         || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2854
2855                         return EINVAL;
2856                 }
2857
2858                 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2859
2860                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2861                            (off_t)name[2], (off_t)name[3]);
2862
2863                 //
2864                 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2865                 //          enabling the journal on a separate device so it is safe
2866                 //          to just copy hfs_devvp here.  If hfs_util gets the ability
2867                 //          to dynamically enable the journal on a separate device then
2868                 //          we will have to do the same thing as hfs_early_journal_init()
2869                 //          to locate and open the journal device.
2870                 //
2871                 jvp = hfsmp->hfs_devvp;
2872                 jnl = journal_create(jvp,
2873                                                          (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2874                                                          + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2875                                                          (off_t)((unsigned)name[3]),
2876                                                          hfsmp->hfs_devvp,
2877                                                          hfsmp->hfs_logical_block_size,
2878                                                          0,
2879                                                          0,
2880                                                          hfs_sync_metadata, hfsmp->hfs_mp);
2881
2882                 /*
2883                  * Set up the trim callback function so that we can add
2884                  * recently freed extents to the free extent cache once
2885                  * the transaction that freed them is written to the
2886                  * journal on disk.
2887                  */
2888                 if (jnl)
2889                         journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2890
2891                 if (jnl == NULL) {
2892                         printf("hfs: FAILED to create the journal!\n");
2893                         if (jvp && jvp != hfsmp->hfs_devvp) {
2894                                 vnode_clearmountedon(jvp);
2895                                 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2896                         }
2897                         jvp = NULL;
2898
2899                         return EINVAL;
2900                 }
2901
2902                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2903
2904                 /*
2905                  * Flush all dirty metadata buffers.
2906                  */
2907                 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2908                 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2909                 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2910                 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2911                 if (hfsmp->hfs_attribute_vp)
2912                         buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2913
2914                 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2915                 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2916                 hfsmp->jvp = jvp;
2917                 hfsmp->jnl = jnl;
2918
2919                 // save this off for the hack-y check in hfs_remove()
2920                 hfsmp->jnl_start        = (u_int32_t)name[2];
2921                 hfsmp->jnl_size         = (off_t)((unsigned)name[3]);
2922                 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2923                 hfsmp->hfs_jnlfileid    = jnl_attr.ca_fileid;
2924
2925                 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2926
2927                 hfs_unlock_global (hfsmp);
2928                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2929
2930                 {
2931                         fsid_t fsid;
2932
2933                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2934                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2935                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2936                 }
2937                 return 0;
2938         } else if (name[0] == HFS_DISABLE_JOURNALING) {
2939                 // clear the journaling bit
2940                 vnode_t vp = vfs_context_cwd(context);
2941
2942                 /* Only root can disable journaling */
2943                 if (!is_suser()) {
2944                         return (EPERM);
2945                 }
2946                 if (vp == NULLVP)
2947                         return EINVAL;
2948
2949                 hfsmp = VTOHFS(vp);
2950
2951                 /*
2952                  * Disabling journaling is disallowed on volumes with directory hard links
2953                  * because we have not tested the relevant code path.
2954                  */
2955                 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2956                         printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2957                         return EPERM;
2958                 }
2959
2960                 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2961
2962                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2963
2964                 // Lights out for you buddy!
2965                 journal_close(hfsmp->jnl);
2966                 hfsmp->jnl = NULL;
2967
2968                 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2969                         vnode_clearmountedon(hfsmp->jvp);
2970                         VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2971                         vnode_put(hfsmp->jvp);
2972                 }
2973                 hfsmp->jvp = NULL;
2974                 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2975                 hfsmp->jnl_start        = 0;
2976                 hfsmp->hfs_jnlinfoblkid = 0;
2977                 hfsmp->hfs_jnlfileid    = 0;
2978
2979                 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2980
2981                 hfs_unlock_global (hfsmp);
2982
2983                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2984
2985                 {
2986                         fsid_t fsid;
2987
2988                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2989                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2990                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2991                 }
2992                 return 0;
2993         } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2994                 vnode_t vp = vfs_context_cwd(context);
2995                 off_t jnl_start, jnl_size;
2996
2997                 if (vp == NULLVP)
2998                         return EINVAL;
2999
3000                 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3001                 if (proc_is64bit(current_proc()))
3002                         return EINVAL;
3003
3004                 hfsmp = VTOHFS(vp);
3005             if (hfsmp->jnl == NULL) {
3006                         jnl_start = 0;
3007                         jnl_size  = 0;
3008             } else {
3009                         jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3010                         jnl_size  = (off_t)hfsmp->jnl_size;
3011             }
3012
3013             if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3014                         return error;
3015                 }
3016             if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3017                         return error;
3018                 }
3019
3020                 return 0;
3021         } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3022
3023             return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3024
3025         } else if (name[0] == VFS_CTL_QUERY) {
3026         struct sysctl_req *req;
3027         union union_vfsidctl vc;
3028         struct mount *mp;
3029             struct vfsquery vq;
3030
3031                 req = CAST_DOWN(struct sysctl_req *, oldp);     /* we're new style vfs sysctl. */
3032
3033         error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3034                 if (error) return (error);
3035
3036                 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3037         if (mp == NULL) return (ENOENT);
3038
3039                 hfsmp = VFSTOHFS(mp);
3040                 bzero(&vq, sizeof(vq));
3041                 vq.vq_flags = hfsmp->hfs_notification_conditions;
3042                 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3043         } else if (name[0] == HFS_REPLAY_JOURNAL) {
3044                 vnode_t devvp = NULL;
3045                 int device_fd;
3046                 if (namelen != 2) {
3047                         return (EINVAL);
3048                 }
3049                 device_fd = name[1];
3050                 error = file_vnode(device_fd, &devvp);
3051                 if (error) {
3052                         return error;
3053                 }
3054                 error = vnode_getwithref(devvp);
3055                 if (error) {
3056                         file_drop(device_fd);
3057                         return error;
3058                 }
3059                 error = hfs_journal_replay(devvp, context);
3060                 file_drop(device_fd);
3061                 vnode_put(devvp);
3062                 return error;
3063         } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3064                 hfs_resize_debug = 1;
3065                 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3066                 return 0;
3067         }
3068
3069         return (ENOTSUP);
3070 }
3071
3072 /*
3073  * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3074  * the build_path ioctl.  We use it to leverage the code below that updates
3075  * the origin list cache if necessary
3076  */
3077
3078 int
3079 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3080 {
3081         int error;
3082         int lockflags;
3083         struct hfsmount *hfsmp;
3084
3085         hfsmp = VFSTOHFS(mp);
3086
3087         error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3088         if (error)
3089                 return (error);
3090
3091         /*
3092          * ADLs may need to have their origin state updated
3093          * since build_path needs a valid parent.  The same is true
3094          * for hardlinked files as well.  There isn't a race window here
3095          * in re-acquiring the cnode lock since we aren't pulling any data
3096          * out of the cnode; instead, we're going to the catalog.
3097          */
3098         if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3099             (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3100                 cnode_t *cp = VTOC(*vpp);
3101                 struct cat_desc cdesc;
3102
3103                 if (!hfs_haslinkorigin(cp)) {
3104                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3105                         error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3106                         hfs_systemfile_unlock(hfsmp, lockflags);
3107                         if (error == 0) {
3108                                 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3109                                         (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3110                                         hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3111                                 }
3112                                 cat_releasedesc(&cdesc);
3113                         }
3114                 }
3115                 hfs_unlock(cp);
3116         }
3117         return (0);
3118 }
3119
3120
3121 /*
3122  * Look up an HFS object by ID.
3123  *
3124  * The object is returned with an iocount reference and the cnode locked.
3125  *
3126  * If the object is a file then it will represent the data fork.
3127  */
3128 int
3129 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3130 {
3131         struct vnode *vp = NULLVP;
3132         struct cat_desc cndesc;
3133         struct cat_attr cnattr;
3134         struct cat_fork cnfork;
3135         u_int32_t linkref = 0;
3136         int error;
3137
3138         /* Check for cnids that should't be exported. */
3139         if ((cnid < kHFSFirstUserCatalogNodeID) &&
3140             (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3141                 return (ENOENT);
3142         }
3143         /* Don't export our private directories. */
3144         if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3145             cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3146                 return (ENOENT);
3147         }
3148         /*
3149          * Check the hash first
3150          */
3151         vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3152         if (vp) {
3153                 *vpp = vp;
3154                 return(0);
3155         }
3156
3157         bzero(&cndesc, sizeof(cndesc));
3158         bzero(&cnattr, sizeof(cnattr));
3159         bzero(&cnfork, sizeof(cnfork));
3160
3161         /*
3162          * Not in hash, lookup in catalog
3163          */
3164         if (cnid == kHFSRootParentID) {
3165                 static char hfs_rootname[] = "/";
3166
3167                 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3168                 cndesc.cd_namelen = 1;
3169                 cndesc.cd_parentcnid = kHFSRootParentID;
3170                 cndesc.cd_cnid = kHFSRootFolderID;
3171                 cndesc.cd_flags = CD_ISDIR;
3172
3173                 cnattr.ca_fileid = kHFSRootFolderID;
3174                 cnattr.ca_linkcount = 1;
3175                 cnattr.ca_entries = 1;
3176                 cnattr.ca_dircount = 1;
3177                 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3178         } else {
3179                 int lockflags;
3180                 cnid_t pid;
3181                 const char *nameptr;
3182
3183                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3184                 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3185                 hfs_systemfile_unlock(hfsmp, lockflags);
3186
3187                 if (error) {
3188                         *vpp = NULL;
3189                         return (error);
3190                 }
3191
3192                 /*
3193                  * Check for a raw hardlink inode and save its linkref.
3194                  */
3195                 pid = cndesc.cd_parentcnid;
3196                 nameptr = (const char *)cndesc.cd_nameptr;
3197
3198                 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3199                     (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3200                         linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3201
3202                 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3203                            (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3204                         linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3205
3206                 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3207                            (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3208                         *vpp = NULL;
3209                         cat_releasedesc(&cndesc);
3210                         return (ENOENT);  /* open unlinked file */
3211                 }
3212         }
3213
3214         /*
3215          * Finish initializing cnode descriptor for hardlinks.
3216          *
3217          * We need a valid name and parent for reverse lookups.
3218          */
3219         if (linkref) {
3220                 cnid_t nextlinkid;
3221                 cnid_t prevlinkid;
3222                 struct cat_desc linkdesc;
3223                 int lockflags;
3224
3225                 cnattr.ca_linkref = linkref;
3226
3227                 /*
3228                  * Pick up the first link in the chain and get a descriptor for it.
3229                  * This allows blind volfs paths to work for hardlinks.
3230                  */
3231                 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
3232                     (nextlinkid != 0)) {
3233                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3234                         error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3235                         hfs_systemfile_unlock(hfsmp, lockflags);
3236                         if (error == 0) {
3237                                 cat_releasedesc(&cndesc);
3238                                 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3239                         }
3240                 }
3241         }
3242
3243         if (linkref) {
3244                 int newvnode_flags = 0;
3245
3246                 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3247                                                                 &cnfork, &vp, &newvnode_flags);
3248                 if (error == 0) {
3249                         VTOC(vp)->c_flag |= C_HARDLINK;
3250                         vnode_setmultipath(vp);
3251                 }
3252         } else {
3253                 struct componentname cn;
3254                 int newvnode_flags = 0;
3255
3256                 /* Supply hfs_getnewvnode with a component name. */
3257                 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3258                 cn.cn_nameiop = LOOKUP;
3259                 cn.cn_flags = ISLASTCN | HASBUF;
3260                 cn.cn_context = NULL;
3261                 cn.cn_pnlen = MAXPATHLEN;
3262                 cn.cn_nameptr = cn.cn_pnbuf;
3263                 cn.cn_namelen = cndesc.cd_namelen;
3264                 cn.cn_hash = 0;
3265                 cn.cn_consume = 0;
3266                 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3267
3268                 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3269                                                                 &cnfork, &vp, &newvnode_flags);
3270
3271                 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3272                         hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3273                 }
3274                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3275         }
3276         cat_releasedesc(&cndesc);
3277
3278         *vpp = vp;
3279         if (vp && skiplock) {
3280                 hfs_unlock(VTOC(vp));
3281         }
3282         return (error);
3283 }
3284
3285
3286 /*
3287  * Flush out all the files in a filesystem.
3288  */
3289 static int
3290 #if QUOTA
3291 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3292 #else
3293 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3294 #endif /* QUOTA */
3295 {
3296         struct hfsmount *hfsmp;
3297         struct vnode *skipvp = NULLVP;
3298         int error;
3299 #if QUOTA
3300         int quotafilecnt;
3301         int i;
3302 #endif
3303
3304         hfsmp = VFSTOHFS(mp);
3305
3306 #if QUOTA
3307         /*
3308          * The open quota files have an indirect reference on
3309          * the root directory vnode.  We must account for this
3310          * extra reference when doing the intial vflush.
3311          */
3312         quotafilecnt = 0;
3313         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3314
3315                 /* Find out how many quota files we have open. */
3316                 for (i = 0; i < MAXQUOTAS; i++) {
3317                         if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3318                                 ++quotafilecnt;
3319                 }
3320
3321                 /* Obtain the root vnode so we can skip over it. */
3322                 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3323         }
3324 #endif /* QUOTA */
3325
3326         error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3327         if (error != 0)
3328                 return(error);
3329
3330         error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3331
3332 #if QUOTA
3333         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3334                 if (skipvp) {
3335                         /*
3336                          * See if there are additional references on the
3337                          * root vp besides the ones obtained from the open
3338                          * quota files and the hfs_chash_getvnode call above.
3339                          */
3340                         if ((error == 0) &&
3341                             (vnode_isinuse(skipvp,  quotafilecnt))) {
3342                                 error = EBUSY;  /* root directory is still open */
3343                         }
3344                         hfs_unlock(VTOC(skipvp));
3345                         vnode_put(skipvp);
3346                 }
3347                 if (error && (flags & FORCECLOSE) == 0)
3348                         return (error);
3349
3350                 for (i = 0; i < MAXQUOTAS; i++) {
3351                         if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3352                                 continue;
3353                         hfs_quotaoff(p, mp, i);
3354                 }
3355                 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3356         }
3357 #endif /* QUOTA */
3358
3359         return (error);
3360 }
3361
3362 /*
3363  * Update volume encoding bitmap (HFS Plus only)
3364  */
3365 __private_extern__
3366 void
3367 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3368 {
3369 #define  kIndexMacUkrainian     48  /* MacUkrainian encoding is 152 */
3370 #define  kIndexMacFarsi         49  /* MacFarsi encoding is 140 */
3371
3372         u_int32_t       index;
3373
3374         switch (encoding) {
3375         case kTextEncodingMacUkrainian:
3376                 index = kIndexMacUkrainian;
3377                 break;
3378         case kTextEncodingMacFarsi:
3379                 index = kIndexMacFarsi;
3380                 break;
3381         default:
3382                 index = encoding;
3383                 break;
3384         }
3385
3386         if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3387                 HFS_MOUNT_LOCK(hfsmp, TRUE)
3388                 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3389                 MarkVCBDirty(hfsmp);
3390                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3391         }
3392 }
3393
3394 /*
3395  * Update volume stats
3396  *
3397  * On journal volumes this will cause a volume header flush
3398  */
3399 int
3400 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3401 {
3402         struct timeval tv;
3403
3404         microtime(&tv);
3405
3406         lck_mtx_lock(&hfsmp->hfs_mutex);
3407
3408         MarkVCBDirty(hfsmp);
3409         hfsmp->hfs_mtime = tv.tv_sec;
3410
3411         switch (op) {
3412         case VOL_UPDATE:
3413                 break;
3414         case VOL_MKDIR:
3415                 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3416                         ++hfsmp->hfs_dircount;
3417                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3418                         ++hfsmp->vcbNmRtDirs;
3419                 break;
3420         case VOL_RMDIR:
3421                 if (hfsmp->hfs_dircount != 0)
3422                         --hfsmp->hfs_dircount;
3423                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3424                         --hfsmp->vcbNmRtDirs;
3425                 break;
3426         case VOL_MKFILE:
3427                 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3428                         ++hfsmp->hfs_filecount;
3429                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3430                         ++hfsmp->vcbNmFls;
3431                 break;
3432         case VOL_RMFILE:
3433                 if (hfsmp->hfs_filecount != 0)
3434                         --hfsmp->hfs_filecount;
3435                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3436                         --hfsmp->vcbNmFls;
3437                 break;
3438         }
3439
3440         lck_mtx_unlock(&hfsmp->hfs_mutex);
3441
3442         if (hfsmp->jnl) {
3443                 hfs_flushvolumeheader(hfsmp, 0, 0);
3444         }
3445
3446         return (0);
3447 }
3448
3449
3450 static int
3451 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3452 {
3453         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3454         struct filefork *fp;
3455         HFSMasterDirectoryBlock *mdb;
3456         struct buf *bp = NULL;
3457         int retval;
3458         int sectorsize;
3459         ByteCount namelen;
3460
3461         sectorsize = hfsmp->hfs_logical_block_size;
3462         retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3463         if (retval) {
3464                 if (bp)
3465                         buf_brelse(bp);
3466                 return retval;
3467         }
3468
3469         lck_mtx_lock(&hfsmp->hfs_mutex);
3470
3471         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3472
3473         mdb->drCrDate   = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3474         mdb->drLsMod    = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3475         mdb->drAtrb     = SWAP_BE16 (vcb->vcbAtrb);
3476         mdb->drNmFls    = SWAP_BE16 (vcb->vcbNmFls);
3477         mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3478         mdb->drClpSiz   = SWAP_BE32 (vcb->vcbClpSiz);
3479         mdb->drNxtCNID  = SWAP_BE32 (vcb->vcbNxtCNID);
3480         mdb->drFreeBks  = SWAP_BE16 (vcb->freeBlocks);
3481
3482         namelen = strlen((char *)vcb->vcbVN);
3483         retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3484         /* Retry with MacRoman in case that's how it was exported. */
3485         if (retval)
3486                 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3487
3488         mdb->drVolBkUp  = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3489         mdb->drWrCnt    = SWAP_BE32 (vcb->vcbWrCnt);
3490         mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3491         mdb->drFilCnt   = SWAP_BE32 (vcb->vcbFilCnt);
3492         mdb->drDirCnt   = SWAP_BE32 (vcb->vcbDirCnt);
3493
3494         bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3495
3496         fp = VTOF(vcb->extentsRefNum);
3497         mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3498         mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3499         mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3500         mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3501         mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3502         mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3503         mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3504         mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3505         FTOC(fp)->c_flag &= ~C_MODIFIED;
3506
3507         fp = VTOF(vcb->catalogRefNum);
3508         mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3509         mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3510         mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3511         mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3512         mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3513         mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3514         mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3515         mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3516         FTOC(fp)->c_flag &= ~C_MODIFIED;
3517
3518         MarkVCBClean( vcb );
3519
3520         lck_mtx_unlock(&hfsmp->hfs_mutex);
3521
3522         /* If requested, flush out the alternate MDB */
3523         if (altflush) {
3524                 struct buf *alt_bp = NULL;
3525
3526                 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3527                         bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3528
3529                         (void) VNOP_BWRITE(alt_bp);
3530                 } else if (alt_bp)
3531                         buf_brelse(alt_bp);
3532         }
3533
3534         if (waitfor != MNT_WAIT)
3535                 buf_bawrite(bp);
3536         else
3537                 retval = VNOP_BWRITE(bp);
3538
3539         return (retval);
3540 }
3541
3542 /*
3543  *  Flush any dirty in-memory mount data to the on-disk
3544  *  volume header.
3545  *
3546  *  Note: the on-disk volume signature is intentionally
3547  *  not flushed since the on-disk "H+" and "HX" signatures
3548  *  are always stored in-memory as "H+".
3549  */
3550 int
3551 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3552 {
3553         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3554         struct filefork *fp;
3555         HFSPlusVolumeHeader *volumeHeader, *altVH;
3556         int retval;
3557         struct buf *bp, *alt_bp;
3558         int i;
3559         daddr64_t priIDSector;
3560         int critical;
3561         u_int16_t  signature;
3562         u_int16_t  hfsversion;
3563
3564         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3565                 return(0);
3566         }
3567         if (hfsmp->hfs_flags & HFS_STANDARD) {
3568                 return hfs_flushMDB(hfsmp, waitfor, altflush);
3569         }
3570         critical = altflush;
3571         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3572                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3573
3574         if (hfs_start_transaction(hfsmp) != 0) {
3575             return EINVAL;
3576         }
3577
3578         bp = NULL;
3579         alt_bp = NULL;
3580
3581         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3582                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3583                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
3584         if (retval) {
3585                 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3586                 goto err_exit;
3587         }
3588
3589         volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3590                         HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3591
3592         /*
3593          * Sanity check what we just read.  If it's bad, try the alternate
3594          * instead.
3595          */
3596         signature = SWAP_BE16 (volumeHeader->signature);
3597         hfsversion   = SWAP_BE16 (volumeHeader->version);
3598         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3599             (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3600             (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3601                 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3602                       vcb->vcbVN, signature, hfsversion,
3603                       SWAP_BE32 (volumeHeader->blockSize),
3604                       hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3605                 hfs_mark_volume_inconsistent(hfsmp);
3606
3607                 if (hfsmp->hfs_alt_id_sector) {
3608                         retval = buf_meta_bread(hfsmp->hfs_devvp,
3609                             HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3610                             hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3611                         if (retval) {
3612                                 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3613                                 goto err_exit;
3614                         }
3615
3616                         altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3617                                 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3618                         signature = SWAP_BE16(altVH->signature);
3619                         hfsversion = SWAP_BE16(altVH->version);
3620
3621                         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3622                             (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3623                             (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3624                                 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3625                                     vcb->vcbVN, signature, hfsversion,
3626                                     SWAP_BE32(altVH->blockSize));
3627                                 retval = EIO;
3628                                 goto err_exit;
3629                         }
3630
3631                         /* The alternate is plausible, so use it. */
3632                         bcopy(altVH, volumeHeader, kMDBSize);
3633                         buf_brelse(alt_bp);
3634                         alt_bp = NULL;
3635                 } else {
3636                         /* No alternate VH, nothing more we can do. */
3637                         retval = EIO;
3638                         goto err_exit;
3639                 }
3640         }
3641
3642         if (hfsmp->jnl) {
3643                 journal_modify_block_start(hfsmp->jnl, bp);
3644         }
3645
3646         /*
3647          * For embedded HFS+ volumes, update create date if it changed
3648          * (ie from a setattrlist call)
3649          */
3650         if ((vcb->hfsPlusIOPosOffset != 0) &&
3651             (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3652                 struct buf *bp2;
3653                 HFSMasterDirectoryBlock *mdb;
3654
3655                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3656                                 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3657                                 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3658                 if (retval) {
3659                         if (bp2)
3660                                 buf_brelse(bp2);
3661                         retval = 0;
3662                 } else {
3663                         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3664                                 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3665
3666                         if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3667                           {
3668                                 if (hfsmp->jnl) {
3669                                     journal_modify_block_start(hfsmp->jnl, bp2);
3670                                 }
3671
3672                                 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);       /* pick up the new create date */
3673
3674                                 if (hfsmp->jnl) {
3675                                         journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3676                                 } else {
3677                                         (void) VNOP_BWRITE(bp2);                /* write out the changes */
3678                                 }
3679                           }
3680                         else
3681                           {
3682                                 buf_brelse(bp2);                                                /* just release it */
3683                           }
3684                   }
3685         }
3686
3687         lck_mtx_lock(&hfsmp->hfs_mutex);
3688
3689         /* Note: only update the lower 16 bits worth of attributes */
3690         volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
3691         volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3692         if (hfsmp->jnl) {
3693                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3694         } else {
3695                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3696         }
3697         volumeHeader->createDate        = SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
3698         volumeHeader->modifyDate        = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3699         volumeHeader->backupDate        = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3700         volumeHeader->fileCount         = SWAP_BE32 (vcb->vcbFilCnt);
3701         volumeHeader->folderCount       = SWAP_BE32 (vcb->vcbDirCnt);
3702         volumeHeader->totalBlocks       = SWAP_BE32 (vcb->totalBlocks);
3703         volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks);
3704         volumeHeader->nextAllocation    = SWAP_BE32 (vcb->nextAllocation);
3705         volumeHeader->rsrcClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3706         volumeHeader->dataClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3707         volumeHeader->nextCatalogID     = SWAP_BE32 (vcb->vcbNxtCNID);
3708         volumeHeader->writeCount        = SWAP_BE32 (vcb->vcbWrCnt);
3709         volumeHeader->encodingsBitmap   = SWAP_BE64 (vcb->encodingsBitmap);
3710
3711         if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3712                 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3713                 critical = 1;
3714         }
3715
3716         /*
3717          * System files are only dirty when altflush is set.
3718          */
3719         if (altflush == 0) {
3720                 goto done;
3721         }
3722
3723         /* Sync Extents over-flow file meta data */
3724         fp = VTOF(vcb->extentsRefNum);
3725         if (FTOC(fp)->c_flag & C_MODIFIED) {
3726                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3727                         volumeHeader->extentsFile.extents[i].startBlock =
3728                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3729                         volumeHeader->extentsFile.extents[i].blockCount =
3730                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3731                 }
3732                 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3733                 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3734                 volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3735                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3736         }
3737
3738         /* Sync Catalog file meta data */
3739         fp = VTOF(vcb->catalogRefNum);
3740         if (FTOC(fp)->c_flag & C_MODIFIED) {
3741                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3742                         volumeHeader->catalogFile.extents[i].startBlock =
3743                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3744                         volumeHeader->catalogFile.extents[i].blockCount =
3745                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3746                 }
3747                 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3748                 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3749                 volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3750                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3751         }
3752
3753         /* Sync Allocation file meta data */
3754         fp = VTOF(vcb->allocationsRefNum);
3755         if (FTOC(fp)->c_flag & C_MODIFIED) {
3756                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3757                         volumeHeader->allocationFile.extents[i].startBlock =
3758                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3759                         volumeHeader->allocationFile.extents[i].blockCount =
3760                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3761                 }
3762                 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3763                 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3764                 volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3765                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3766         }
3767
3768         /* Sync Attribute file meta data */
3769         if (hfsmp->hfs_attribute_vp) {
3770                 fp = VTOF(hfsmp->hfs_attribute_vp);
3771                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3772                         volumeHeader->attributesFile.extents[i].startBlock =
3773                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3774                         volumeHeader->attributesFile.extents[i].blockCount =
3775                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3776                 }
3777                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3778                 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3779                 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3780                 volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3781         }
3782
3783         /* Sync Startup file meta data */
3784         if (hfsmp->hfs_startup_vp) {
3785                 fp = VTOF(hfsmp->hfs_startup_vp);
3786                 if (FTOC(fp)->c_flag & C_MODIFIED) {
3787                         for (i = 0; i < kHFSPlusExtentDensity; i++) {
3788                                 volumeHeader->startupFile.extents[i].startBlock =
3789                                         SWAP_BE32 (fp->ff_extents[i].startBlock);
3790                                 volumeHeader->startupFile.extents[i].blockCount =
3791                                         SWAP_BE32 (fp->ff_extents[i].blockCount);
3792                         }
3793                         volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3794                         volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3795                         volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3796                         FTOC(fp)->c_flag &= ~C_MODIFIED;
3797                 }
3798         }
3799
3800 done:
3801         MarkVCBClean(hfsmp);
3802         lck_mtx_unlock(&hfsmp->hfs_mutex);
3803
3804         /* If requested, flush out the alternate volume header */
3805         if (altflush && hfsmp->hfs_alt_id_sector) {
3806                 if (buf_meta_bread(hfsmp->hfs_devvp,
3807                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3808                                 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3809                         if (hfsmp->jnl) {
3810                                 journal_modify_block_start(hfsmp->jnl, alt_bp);
3811                         }
3812
3813                         bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3814                                         HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3815                                         kMDBSize);
3816
3817                         if (hfsmp->jnl) {
3818                                 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3819                         } else {
3820                                 (void) VNOP_BWRITE(alt_bp);
3821                         }
3822                 } else if (alt_bp)
3823                         buf_brelse(alt_bp);
3824         }
3825
3826         if (hfsmp->jnl) {
3827                 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3828         } else {
3829                 if (waitfor != MNT_WAIT)
3830                         buf_bawrite(bp);
3831                 else {
3832                     retval = VNOP_BWRITE(bp);
3833                     /* When critical data changes, flush the device cache */
3834                     if (critical && (retval == 0)) {
3835                         (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3836                                          NULL, FWRITE, NULL);
3837                     }
3838                 }
3839         }
3840         hfs_end_transaction(hfsmp);
3841
3842         return (retval);
3843
3844 err_exit:
3845         if (alt_bp)
3846                 buf_brelse(alt_bp);
3847         if (bp)
3848                 buf_brelse(bp);
3849         hfs_end_transaction(hfsmp);
3850         return retval;
3851 }
3852
3853
3854 /*
3855  * Extend a file system.
3856  */
3857 int
3858 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3859 {
3860         struct proc *p = vfs_context_proc(context);
3861         kauth_cred_t cred = vfs_context_ucred(context);
3862         struct  vnode *vp;
3863         struct  vnode *devvp;
3864         struct  buf *bp;
3865         struct  filefork *fp = NULL;
3866         ExtendedVCB  *vcb;
3867         struct  cat_fork forkdata;
3868         u_int64_t  oldsize;
3869         u_int64_t  newblkcnt;
3870         u_int64_t  prev_phys_block_count;
3871         u_int32_t  addblks;
3872         u_int64_t  sectorcnt;
3873         u_int32_t  sectorsize;
3874         u_int32_t  phys_sectorsize;
3875         daddr64_t  prev_alt_sector;
3876         daddr_t    bitmapblks;
3877         int  lockflags = 0;
3878         int  error;
3879         int64_t oldBitmapSize;
3880         Boolean  usedExtendFileC = false;
3881         int transaction_begun = 0;
3882
3883         devvp = hfsmp->hfs_devvp;
3884         vcb = HFSTOVCB(hfsmp);
3885
3886         /*
3887          * - HFS Plus file systems only.
3888          * - Journaling must be enabled.
3889          * - No embedded volumes.
3890          */
3891         if ((vcb->vcbSigWord == kHFSSigWord) ||
3892              (hfsmp->jnl == NULL) ||
3893              (vcb->hfsPlusIOPosOffset != 0)) {
3894                 return (EPERM);
3895         }
3896         /*
3897          * If extending file system by non-root, then verify
3898          * ownership and check permissions.
3899          */
3900         if (suser(cred, NULL)) {
3901                 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3902
3903                 if (error)
3904                         return (error);
3905                 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3906                 if (error == 0) {
3907                         error = hfs_write_access(vp, cred, p, false);
3908                 }
3909                 hfs_unlock(VTOC(vp));
3910                 vnode_put(vp);
3911                 if (error)
3912                         return (error);
3913
3914                 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3915                 if (error)
3916                         return (error);
3917         }
3918         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3919                 return (ENXIO);
3920         }
3921         if (sectorsize != hfsmp->hfs_logical_block_size) {
3922                 return (ENXIO);
3923         }
3924         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3925                 return (ENXIO);
3926         }
3927         if ((sectorsize * sectorcnt) < newsize) {
3928                 printf("hfs_extendfs: not enough space on device\n");
3929                 return (ENOSPC);
3930         }
3931         error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3932         if (error) {
3933                 if ((error != ENOTSUP) && (error != ENOTTY)) {
3934                         return (ENXIO);
3935                 }
3936                 /* If ioctl is not supported, force physical and logical sector size to be same */
3937                 phys_sectorsize = sectorsize;
3938         }
3939         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3940
3941         /*
3942          * Validate new size.
3943          */
3944         if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3945                 printf("hfs_extendfs: invalid size\n");
3946                 return (EINVAL);
3947         }
3948         newblkcnt = newsize / vcb->blockSize;
3949         if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3950                 return (EOVERFLOW);
3951
3952         addblks = newblkcnt - vcb->totalBlocks;
3953
3954         if (hfs_resize_debug) {
3955                 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3956                 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3957         }
3958         printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3959
3960         HFS_MOUNT_LOCK(hfsmp, TRUE);
3961         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3962                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3963                 error = EALREADY;
3964                 goto out;
3965         }
3966         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3967         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3968
3969         /* Start with a clean journal. */
3970         hfs_journal_flush(hfsmp, TRUE);
3971
3972         /*
3973          * Enclose changes inside a transaction.
3974          */
3975         if (hfs_start_transaction(hfsmp) != 0) {
3976                 error = EINVAL;
3977                 goto out;
3978         }
3979         transaction_begun = 1;
3980
3981         /*
3982          * Note: we take the attributes lock in case we have an attribute data vnode
3983          * which needs to change size.
3984          */
3985         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3986         vp = vcb->allocationsRefNum;
3987         fp = VTOF(vp);
3988         bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3989
3990         /*
3991          * Calculate additional space required (if any) by allocation bitmap.
3992          */
3993         oldBitmapSize = fp->ff_size;
3994         bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3995         if (bitmapblks > (daddr_t)fp->ff_blocks)
3996                 bitmapblks -= fp->ff_blocks;
3997         else
3998                 bitmapblks = 0;
3999
4000         /*
4001          * The allocation bitmap can contain unused bits that are beyond end of
4002          * current volume's allocation blocks.  Usually they are supposed to be
4003          * zero'ed out but there can be cases where they might be marked as used.
4004          * After extending the file system, those bits can represent valid
4005          * allocation blocks, so we mark all the bits from the end of current
4006          * volume to end of allocation bitmap as "free".
4007          */
4008         BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4009                         (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4010
4011         if (bitmapblks > 0) {
4012                 daddr64_t blkno;
4013                 daddr_t blkcnt;
4014                 off_t bytesAdded;
4015
4016                 /*
4017                  * Get the bitmap's current size (in allocation blocks) so we know
4018                  * where to start zero filling once the new space is added.  We've
4019                  * got to do this before the bitmap is grown.
4020                  */
4021                 blkno  = (daddr64_t)fp->ff_blocks;
4022
4023                 /*
4024                  * Try to grow the allocation file in the normal way, using allocation
4025                  * blocks already existing in the file system.  This way, we might be
4026                  * able to grow the bitmap contiguously, or at least in the metadata
4027                  * zone.
4028                  */
4029                 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4030                                 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4031                                 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4032
4033                 if (error == 0) {
4034                         usedExtendFileC = true;
4035                 } else {
4036                         /*
4037                          * If the above allocation failed, fall back to allocating the new
4038                          * extent of the bitmap from the space we're going to add.  Since those
4039                          * blocks don't yet belong to the file system, we have to update the
4040                          * extent list directly, and manually adjust the file size.
4041                          */
4042                         bytesAdded = 0;
4043                         error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4044                         if (error) {
4045                                 printf("hfs_extendfs: error %d adding extents\n", error);
4046                                 goto out;
4047                         }
4048                         fp->ff_blocks += bitmapblks;
4049                         VTOC(vp)->c_blocks = fp->ff_blocks;
4050                         VTOC(vp)->c_flag |= C_MODIFIED;
4051                 }
4052
4053                 /*
4054                  * Update the allocation file's size to include the newly allocated
4055                  * blocks.  Note that ExtendFileC doesn't do this, which is why this
4056                  * statement is outside the above "if" statement.
4057                  */
4058                 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4059
4060                 /*
4061                  * Zero out the new bitmap blocks.
4062                  */
4063                 {
4064
4065                         bp = NULL;
4066                         blkcnt = bitmapblks;
4067                         while (blkcnt > 0) {
4068                                 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4069                                 if (error) {
4070                                         if (bp) {
4071                                                 buf_brelse(bp);
4072                                         }
4073                                         break;
4074                                 }
4075                                 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4076                                 buf_markaged(bp);
4077                                 error = (int)buf_bwrite(bp);
4078                                 if (error)
4079                                         break;
4080                                 --blkcnt;
4081                                 ++blkno;
4082                         }
4083                 }
4084                 if (error) {
4085                         printf("hfs_extendfs: error %d  clearing blocks\n", error);
4086                         goto out;
4087                 }
4088                 /*
4089                  * Mark the new bitmap space as allocated.
4090                  *
4091                  * Note that ExtendFileC will have marked any blocks it allocated, so
4092                  * this is only needed if we used AddFileExtent.  Also note that this
4093                  * has to come *after* the zero filling of new blocks in the case where
4094                  * we used AddFileExtent (since the part of the bitmap we're touching
4095                  * is in those newly allocated blocks).
4096                  */
4097                 if (!usedExtendFileC) {
4098                         error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4099                         if (error) {
4100                                 printf("hfs_extendfs: error %d setting bitmap\n", error);
4101                                 goto out;
4102                         }
4103                         vcb->freeBlocks -= bitmapblks;
4104                 }
4105         }
4106         /*
4107          * Mark the new alternate VH as allocated.
4108          */
4109         if (vcb->blockSize == 512)
4110                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4111         else
4112                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4113         if (error) {
4114                 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4115                 goto out;
4116         }
4117         /*
4118          * Mark the old alternate VH as free.
4119          */
4120         if (vcb->blockSize == 512)
4121                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4122         else
4123                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4124         /*
4125          * Adjust file system variables for new space.
4126          */
4127         prev_phys_block_count = hfsmp->hfs_logical_block_count;
4128         prev_alt_sector = hfsmp->hfs_alt_id_sector;
4129
4130         vcb->totalBlocks += addblks;
4131         vcb->freeBlocks += addblks;
4132         hfsmp->hfs_logical_block_count = newsize / sectorsize;
4133         hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4134                                   HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4135         MarkVCBDirty(vcb);
4136         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4137         if (error) {
4138                 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4139                 /*
4140                  * Restore to old state.
4141                  */
4142                 if (usedExtendFileC) {
4143                         (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4144                                                                  FTOC(fp)->c_fileid, false);
4145                 } else {
4146                         fp->ff_blocks -= bitmapblks;
4147                         fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4148                         /*
4149                          * No need to mark the excess blocks free since those bitmap blocks
4150                          * are no longer part of the bitmap.  But we do need to undo the
4151                          * effect of the "vcb->freeBlocks -= bitmapblks" above.
4152                          */
4153                         vcb->freeBlocks += bitmapblks;
4154                 }
4155                 vcb->totalBlocks -= addblks;
4156                 vcb->freeBlocks -= addblks;
4157                 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4158                 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4159                 MarkVCBDirty(vcb);
4160                 if (vcb->blockSize == 512) {
4161                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4162                                 hfs_mark_volume_inconsistent(hfsmp);
4163                         }
4164                 } else {
4165                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4166                                 hfs_mark_volume_inconsistent(hfsmp);
4167                         }
4168                 }
4169                 goto out;
4170         }
4171         /*
4172          * Invalidate the old alternate volume header.
4173          */
4174         bp = NULL;
4175         if (prev_alt_sector) {
4176                 if (buf_meta_bread(hfsmp->hfs_devvp,
4177                                 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4178                                 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4179                         journal_modify_block_start(hfsmp->jnl, bp);
4180
4181                         bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4182
4183                         journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4184                 } else if (bp) {
4185                         buf_brelse(bp);
4186                 }
4187         }
4188
4189         /*
4190          * Update the metadata zone size based on current volume size
4191          */
4192         hfs_metadatazone_init(hfsmp, false);
4193
4194         /*
4195          * Adjust the size of hfsmp->hfs_attrdata_vp
4196          */
4197         if (hfsmp->hfs_attrdata_vp) {
4198                 struct cnode *attr_cp;
4199                 struct filefork *attr_fp;
4200
4201                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4202                         attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4203                         attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4204
4205                         attr_cp->c_blocks = newblkcnt;
4206                         attr_fp->ff_blocks = newblkcnt;
4207                         attr_fp->ff_extents[0].blockCount = newblkcnt;
4208                         attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4209                         ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4210                         vnode_put(hfsmp->hfs_attrdata_vp);
4211                 }
4212         }
4213
4214         /*
4215          * Update the R/B Tree if necessary.  Since we don't have to drop the systemfile
4216          * locks in the middle of these operations like we do in the truncate case
4217          * where we have to relocate files, we can only update the red-black tree
4218          * if there were actual changes made to the bitmap.  Also, we can't really scan the
4219          * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4220          * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4221          * not currently controlled by the tree.
4222          *
4223          * We only update hfsmp->allocLimit if totalBlocks actually increased.
4224          */
4225
4226         if (error == 0) {
4227                 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4228         }
4229
4230         /* Log successful extending */
4231         printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4232                hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4233
4234 out:
4235         if (error && fp) {
4236                 /* Restore allocation fork. */
4237                 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4238                 VTOC(vp)->c_blocks = fp->ff_blocks;
4239
4240         }
4241
4242         HFS_MOUNT_LOCK(hfsmp, TRUE);
4243         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4244         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4245         if (lockflags) {
4246                 hfs_systemfile_unlock(hfsmp, lockflags);
4247         }
4248         if (transaction_begun) {
4249                 hfs_end_transaction(hfsmp);
4250                 hfs_journal_flush(hfsmp, FALSE);
4251                 /* Just to be sure, sync all data to the disk */
4252                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4253         }
4254
4255         return MacToVFSError(error);
4256 }
4257
4258 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
4259
4260 /*
4261  * Truncate a file system (while still mounted).
4262  */
4263 int
4264 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4265 {
4266         struct  buf *bp = NULL;
4267         u_int64_t oldsize;
4268         u_int32_t newblkcnt;
4269         u_int32_t reclaimblks = 0;
4270         int lockflags = 0;
4271         int transaction_begun = 0;
4272         Boolean updateFreeBlocks = false;
4273         Boolean disable_sparse = false;
4274         int error = 0;
4275
4276         lck_mtx_lock(&hfsmp->hfs_mutex);
4277         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4278                 lck_mtx_unlock(&hfsmp->hfs_mutex);
4279                 return (EALREADY);
4280         }
4281         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4282         hfsmp->hfs_resize_blocksmoved = 0;
4283         hfsmp->hfs_resize_totalblocks = 0;
4284         hfsmp->hfs_resize_progress = 0;
4285         lck_mtx_unlock(&hfsmp->hfs_mutex);
4286
4287         /*
4288          * - Journaled HFS Plus volumes only.
4289          * - No embedded volumes.
4290          */
4291         if ((hfsmp->jnl == NULL) ||
4292             (hfsmp->hfsPlusIOPosOffset != 0)) {
4293                 error = EPERM;
4294                 goto out;
4295         }
4296         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4297         newblkcnt = newsize / hfsmp->blockSize;
4298         reclaimblks = hfsmp->totalBlocks - newblkcnt;
4299
4300         if (hfs_resize_debug) {
4301                 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4302                 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4303         }
4304
4305         /* Make sure new size is valid. */
4306         if ((newsize < HFS_MIN_SIZE) ||
4307             (newsize >= oldsize) ||
4308             (newsize % hfsmp->hfs_logical_block_size) ||
4309             (newsize % hfsmp->hfs_physical_block_size)) {
4310                 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4311                 error = EINVAL;
4312                 goto out;
4313         }
4314
4315         /*
4316          * Make sure that the file system has enough free blocks reclaim.
4317          *
4318          * Before resize, the disk is divided into four zones -
4319          *      A. Allocated_Stationary - These are allocated blocks that exist
4320          *         before the new end of disk.  These blocks will not be
4321          *         relocated or modified during resize.
4322          *      B. Free_Stationary - These are free blocks that exist before the
4323          *         new end of disk.  These blocks can be used for any new
4324          *         allocations during resize, including allocation for relocating
4325          *         data from the area of disk being reclaimed.
4326          *      C. Allocated_To-Reclaim - These are allocated blocks that exist
4327          *         beyond the new end of disk.  These blocks need to be reclaimed
4328          *         during resize by allocating equal number of blocks in Free
4329          *         Stationary zone and copying the data.
4330          *      D. Free_To-Reclaim - These are free blocks that exist beyond the
4331          *         new end of disk.  Nothing special needs to be done to reclaim
4332          *         them.
4333          *
4334          * Total number of blocks on the disk before resize:
4335          * ------------------------------------------------
4336          *      Total Blocks = Allocated_Stationary + Free_Stationary +
4337          *                     Allocated_To-Reclaim + Free_To-Reclaim
4338          *
4339          * Total number of blocks that need to be reclaimed:
4340          * ------------------------------------------------
4341          *      Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4342          *
4343          * Note that the check below also makes sure that we have enough space
4344          * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4345          * Therefore we do not need to check total number of blocks to relocate
4346          * later in the code.
4347          *
4348          * The condition below gets converted to:
4349          *
4350          * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4351          *
4352          * which is equivalent to:
4353          *
4354          *              Allocated To-Reclaim >= Free Stationary
4355          */
4356         if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4357                 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4358                 error = ENOSPC;
4359                 goto out;
4360         }
4361
4362         /* Start with a clean journal. */
4363         hfs_journal_flush(hfsmp, TRUE);
4364
4365         if (hfs_start_transaction(hfsmp) != 0) {
4366                 error = EINVAL;
4367                 goto out;
4368         }
4369         transaction_begun = 1;
4370
4371         /* Take the bitmap lock to update the alloc limit field */
4372         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4373
4374         /*
4375          * Prevent new allocations from using the part we're trying to truncate.
4376          *
4377          * NOTE: allocLimit is set to the allocation block number where the new
4378          * alternate volume header will be.  That way there will be no files to
4379          * interfere with allocating the new alternate volume header, and no files
4380          * in the allocation blocks beyond (i.e. the blocks we're trying to
4381          * truncate away.
4382          *
4383          * Also shrink the red-black tree if needed.
4384          */
4385         if (hfsmp->blockSize == 512) {
4386                 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4387         }
4388         else {
4389                 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4390         }
4391
4392         /* Sparse devices use first fit allocation which is not ideal
4393          * for volume resize which requires best fit allocation.  If a
4394          * sparse device is being truncated, disable the sparse device
4395          * property temporarily for the duration of resize.  Also reset
4396          * the free extent cache so that it is rebuilt as sorted by
4397          * totalBlocks instead of startBlock.
4398          *
4399          * Note that this will affect all allocations on the volume and
4400          * ideal fix would be just to modify resize-related allocations,
4401          * but it will result in complexity like handling of two free
4402          * extent caches sorted differently, etc.  So we stick to this
4403          * solution for now.
4404          */
4405         HFS_MOUNT_LOCK(hfsmp, TRUE);
4406         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4407                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4408                 ResetVCBFreeExtCache(hfsmp);
4409                 disable_sparse = true;
4410         }
4411
4412         /*
4413          * Update the volume free block count to reflect the total number
4414          * of free blocks that will exist after a successful resize.
4415          * Relocation of extents will result in no net change in the total
4416          * free space on the disk.  Therefore the code that allocates
4417          * space for new extent and deallocates the old extent explicitly
4418          * prevents updating the volume free block count.  It will also
4419          * prevent false disk full error when the number of blocks in
4420          * an extent being relocated is more than the free blocks that
4421          * will exist after the volume is resized.
4422          */
4423         hfsmp->freeBlocks -= reclaimblks;
4424         updateFreeBlocks = true;
4425         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4426
4427         if (lockflags) {
4428                 hfs_systemfile_unlock(hfsmp, lockflags);
4429                 lockflags = 0;
4430         }
4431
4432         /*
4433          * Update the metadata zone size to match the new volume size,
4434          * and if it too less, metadata zone might be disabled.
4435          */
4436         hfs_metadatazone_init(hfsmp, false);
4437
4438         /*
4439          * If some files have blocks at or beyond the location of the
4440          * new alternate volume header, recalculate free blocks and
4441          * reclaim blocks.  Otherwise just update free blocks count.
4442          *
4443          * The current allocLimit is set to the location of new alternate
4444          * volume header, and reclaimblks are the total number of blocks
4445          * that need to be reclaimed.  So the check below is really
4446          * ignoring the blocks allocated for old alternate volume header.
4447          */
4448         if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4449                 /*
4450                  * hfs_reclaimspace will use separate transactions when
4451                  * relocating files (so we don't overwhelm the journal).
4452                  */
4453                 hfs_end_transaction(hfsmp);
4454                 transaction_begun = 0;
4455
4456                 /* Attempt to reclaim some space. */
4457                 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4458                 if (error != 0) {
4459                         printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4460                         error = ENOSPC;
4461                         goto out;
4462                 }
4463                 if (hfs_start_transaction(hfsmp) != 0) {
4464                         error = EINVAL;
4465                         goto out;
4466                 }
4467                 transaction_begun = 1;
4468
4469                 /* Check if we're clear now. */
4470                 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4471                 if (error != 0) {
4472                         printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4473                         error = EAGAIN;  /* tell client to try again */
4474                         goto out;
4475                 }
4476         }
4477
4478         /*
4479          * Note: we take the attributes lock in case we have an attribute data vnode
4480          * which needs to change size.
4481          */
4482         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4483
4484         /*
4485          * Allocate last 1KB for alternate volume header.
4486          */
4487         error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4488         if (error) {
4489                 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4490                 goto out;
4491         }
4492
4493         /*
4494          * Mark the old alternate volume header as free.
4495          * We don't bother shrinking allocation bitmap file.
4496          */
4497         if (hfsmp->blockSize == 512)
4498                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4499         else
4500                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4501
4502         /*
4503          * Invalidate the existing alternate volume header.
4504          *
4505          * Don't include this in a transaction (don't call journal_modify_block)
4506          * since this block will be outside of the truncated file system!
4507          */
4508         if (hfsmp->hfs_alt_id_sector) {
4509                 error = buf_meta_bread(hfsmp->hfs_devvp,
4510                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4511                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4512                 if (error == 0) {
4513                         bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4514                         (void) VNOP_BWRITE(bp);
4515                 } else {
4516                         if (bp) {
4517                                 buf_brelse(bp);
4518                         }
4519                 }
4520                 bp = NULL;
4521         }
4522
4523         /* Log successful shrinking. */
4524         printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4525                hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4526
4527         /*
4528          * Adjust file system variables and flush them to disk.
4529          */
4530         hfsmp->totalBlocks = newblkcnt;
4531         hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4532         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4533         MarkVCBDirty(hfsmp);
4534         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4535         if (error)
4536                 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4537
4538         /*
4539          * Adjust the size of hfsmp->hfs_attrdata_vp
4540          */
4541         if (hfsmp->hfs_attrdata_vp) {
4542                 struct cnode *cp;
4543                 struct filefork *fp;
4544
4545                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4546                         cp = VTOC(hfsmp->hfs_attrdata_vp);
4547                         fp = VTOF(hfsmp->hfs_attrdata_vp);
4548
4549                         cp->c_blocks = newblkcnt;
4550                         fp->ff_blocks = newblkcnt;
4551                         fp->ff_extents[0].blockCount = newblkcnt;
4552                         fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4553                         ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4554                         vnode_put(hfsmp->hfs_attrdata_vp);
4555                 }
4556         }
4557
4558 out:
4559         /*
4560          * Update the allocLimit to acknowledge the last one or two blocks now.
4561          * Add it to the tree as well if necessary.
4562          */
4563         UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4564
4565         HFS_MOUNT_LOCK(hfsmp, TRUE);
4566         if (disable_sparse == true) {
4567                 /* Now that resize is completed, set the volume to be sparse
4568                  * device again so that all further allocations will be first
4569                  * fit instead of best fit.  Reset free extent cache so that
4570                  * it is rebuilt.
4571                  */
4572                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4573                 ResetVCBFreeExtCache(hfsmp);
4574         }
4575
4576         if (error && (updateFreeBlocks == true)) {
4577                 hfsmp->freeBlocks += reclaimblks;
4578         }
4579
4580         if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4581                 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4582         }
4583         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4584         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4585
4586         /* On error, reset the metadata zone for original volume size */
4587         if (error && (updateFreeBlocks == true)) {
4588                 hfs_metadatazone_init(hfsmp, false);
4589         }
4590
4591         if (lockflags) {
4592                 hfs_systemfile_unlock(hfsmp, lockflags);
4593         }
4594         if (transaction_begun) {
4595                 hfs_end_transaction(hfsmp);
4596                 hfs_journal_flush(hfsmp, FALSE);
4597                 /* Just to be sure, sync all data to the disk */
4598                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4599         }
4600
4601         return MacToVFSError(error);
4602 }
4603
4604
4605 /*
4606  * Invalidate the physical block numbers associated with buffer cache blocks
4607  * in the given extent of the given vnode.
4608  */
4609 struct hfs_inval_blk_no {
4610         daddr64_t sectorStart;
4611         daddr64_t sectorCount;
4612 };
4613 static int
4614 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4615 {
4616         daddr64_t blkno;
4617         struct hfs_inval_blk_no *args;
4618
4619         blkno = buf_blkno(bp);
4620         args = args_in;
4621
4622         if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4623                 buf_setblkno(bp, buf_lblkno(bp));
4624
4625         return BUF_RETURNED;
4626 }
4627 static void
4628 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4629 {
4630         struct hfs_inval_blk_no args;
4631         args.sectorStart = sectorStart;
4632         args.sectorCount = sectorCount;
4633
4634         buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4635 }
4636
4637
4638 /*
4639  * Copy the contents of an extent to a new location.  Also invalidates the
4640  * physical block number of any buffer cache block in the copied extent
4641  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4642  * determine the new physical block number).
4643  */
4644 static int
4645 hfs_copy_extent(
4646         struct hfsmount *hfsmp,
4647         struct vnode *vp,               /* The file whose extent is being copied. */
4648         u_int32_t oldStart,             /* The start of the source extent. */
4649         u_int32_t newStart,             /* The start of the destination extent. */
4650         u_int32_t blockCount,   /* The number of allocation blocks to copy. */
4651         vfs_context_t context)
4652 {
4653         int err = 0;
4654         size_t bufferSize;
4655         void *buffer = NULL;
4656         struct vfsioattr ioattr;
4657         buf_t bp = NULL;
4658         off_t resid;
4659         size_t ioSize;
4660         u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
4661         daddr64_t srcSector, destSector;
4662         u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4663 #if CONFIG_PROTECT
4664         int cpenabled = 0;
4665 #endif
4666
4667         /*
4668          * Sanity check that we have locked the vnode of the file we're copying.
4669          *
4670          * But since hfs_systemfile_lock() doesn't actually take the lock on
4671          * the allocation file if a journal is active, ignore the check if the
4672          * file being copied is the allocation file.
4673          */
4674         struct cnode *cp = VTOC(vp);
4675         if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4676                 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4677
4678 #if CONFIG_PROTECT
4679         /* Prepare the CP blob and get it ready for use */
4680         if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4681                         cp_fs_protected (hfsmp->hfs_mp)) {
4682                 int cp_err = 0;
4683                 cp_err = cp_handle_relocate (cp);
4684                 if (cp_err) {
4685                         /*
4686                          * can't copy the file because we couldn't set up keys.
4687                          * bail out
4688                          */
4689                         return cp_err;
4690                 }
4691                 else {
4692                         cpenabled = 1;
4693                 }
4694         }
4695 #endif
4696
4697         /*
4698          * Determine the I/O size to use
4699          *
4700          * NOTE: Many external drives will result in an ioSize of 128KB.
4701          * TODO: Should we use a larger buffer, doing several consecutive
4702          * reads, then several consecutive writes?
4703          */
4704         vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4705         bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4706         if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4707                 return ENOMEM;
4708
4709         /* Get a buffer for doing the I/O */
4710         bp = buf_alloc(hfsmp->hfs_devvp);
4711         buf_setdataptr(bp, (uintptr_t)buffer);
4712
4713         resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4714         srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4715         destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4716         while (resid > 0) {
4717                 ioSize = MIN(bufferSize, (size_t) resid);
4718                 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4719
4720                 /* Prepare the buffer for reading */
4721                 buf_reset(bp, B_READ);
4722                 buf_setsize(bp, ioSize);
4723                 buf_setcount(bp, ioSize);
4724                 buf_setblkno(bp, srcSector);
4725                 buf_setlblkno(bp, srcSector);
4726
4727                 /* Attach the CP to the buffer */
4728 #if CONFIG_PROTECT
4729                 if (cpenabled) {
4730                         buf_setcpaddr (bp, cp->c_cpentry);
4731                 }
4732 #endif
4733
4734                 /* Do the read */
4735                 err = VNOP_STRATEGY(bp);
4736                 if (!err)
4737                         err = buf_biowait(bp);
4738                 if (err) {
4739                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4740                         break;
4741                 }
4742
4743                 /* Prepare the buffer for writing */
4744                 buf_reset(bp, B_WRITE);
4745                 buf_setsize(bp, ioSize);
4746                 buf_setcount(bp, ioSize);
4747                 buf_setblkno(bp, destSector);
4748                 buf_setlblkno(bp, destSector);
4749                 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4750                         buf_markfua(bp);
4751
4752 #if CONFIG_PROTECT
4753                 /* Attach the CP to the buffer */
4754                 if (cpenabled) {
4755                         buf_setcpaddr (bp, cp->c_cpentry);
4756                 }
4757 #endif
4758
4759                 /* Do the write */
4760                 vnode_startwrite(hfsmp->hfs_devvp);
4761                 err = VNOP_STRATEGY(bp);
4762                 if (!err)
4763                         err = buf_biowait(bp);
4764                 if (err) {
4765                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4766                         break;
4767                 }
4768
4769                 resid -= ioSize;
4770                 srcSector += ioSizeSectors;
4771                 destSector += ioSizeSectors;
4772         }
4773         if (bp)
4774                 buf_free(bp);
4775         if (buffer)
4776                 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4777
4778         /* Make sure all writes have been flushed to disk. */
4779         if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4780                 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4781                 if (err) {
4782                         printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4783                         err = 0;        /* Don't fail the copy. */
4784                 }
4785         }
4786
4787         if (!err)
4788                 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4789
4790         return err;
4791 }
4792
4793
4794 /* Structure to store state of reclaiming extents from a
4795  * given file.  hfs_reclaim_file()/hfs_reclaim_xattr()
4796  * initializes the values in this structure which are then
4797  * used by code that reclaims and splits the extents.
4798  */
4799 struct hfs_reclaim_extent_info {
4800         struct vnode *vp;
4801         u_int32_t fileID;
4802         u_int8_t forkType;
4803         u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
4804         u_int8_t is_sysfile;                 /* Extent belongs to system file */
4805         u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
4806         u_int8_t extent_index;
4807         int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
4808         u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
4809         u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
4810         u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
4811         struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
4812         union record {
4813                 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4814                 HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
4815         } record;
4816         HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
4817                                               * For catalog extent record, points to the correct
4818                                               * extent information in filefork.  For overflow extent
4819                                               * record, or xattr record, points to extent record
4820                                               * in the structure above
4821                                               */
4822         struct cat_desc *dirlink_desc;
4823         struct cat_attr *dirlink_attr;
4824         struct filefork *dirlink_fork;        /* For directory hard links, fp points actually to this */
4825         struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr()
4826                                                * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4827                                                * use it for writing updated extent record
4828                                                */
4829         struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
4830         u_int16_t recordlen;
4831         int overflow_count;                   /* For debugging, counter for overflow extent record */
4832         FCB *fcb;                             /* Pointer to the current btree being traversed */
4833 };
4834
4835 /*
4836  * Split the current extent into two extents, with first extent
4837  * to contain given number of allocation blocks.  Splitting of
4838  * extent creates one new extent entry which can result in
4839  * shifting of many entries through all the extent records of a
4840  * file, and/or creating a new extent record in the overflow
4841  * extent btree.
4842  *
4843  * Example:
4844  * The diagram below represents two consecutive extent records,
4845  * for simplicity, lets call them record X and X+1 respectively.
4846  * Interesting extent entries have been denoted by letters.
4847  * If the letter is unchanged before and after split, it means
4848  * that the extent entry was not modified during the split.
4849  * A '.' means that the entry remains unchanged after the split
4850  * and is not relevant for our example.  A '0' means that the
4851  * extent entry is empty.
4852  *
4853  * If there isn't sufficient contiguous free space to relocate
4854  * an extent (extent "C" below), we will have to break the one
4855  * extent into multiple smaller extents, and relocate each of
4856  * the smaller extents individually.  The way we do this is by
4857  * finding the largest contiguous free space that is currently
4858  * available (N allocation blocks), and then convert extent "C"
4859  * into two extents, C1 and C2, that occupy exactly the same
4860  * allocation blocks as extent C.  Extent C1 is the first
4861  * N allocation blocks of extent C, and extent C2 is the remainder
4862  * of extent C.  Then we can relocate extent C1 since we know
4863  * we have enough contiguous free space to relocate it in its
4864  * entirety.  We then repeat the process starting with extent C2.
4865  *
4866  * In record X, only the entries following entry C are shifted, and
4867  * the original entry C is replaced with two entries C1 and C2 which
4868  * are actually two extent entries for contiguous allocation blocks.
4869  *
4870  * Note that the entry E from record X is shifted into record X+1 as
4871  * the new first entry.  Since the first entry of record X+1 is updated,
4872  * the FABN will also get updated with the blockCount of entry E.
4873  * This also results in shifting of all extent entries in record X+1.
4874  * Note that the number of empty entries after the split has been
4875  * changed from 3 to 2.
4876  *
4877  * Before:
4878  *               record X                           record X+1
4879  *  ---------------------===---------     ---------------------------------
4880  *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
4881  *  ---------------------===---------     ---------------------------------
4882  *
4883  * After:
4884  *  ---------------------=======-----     ---------------------------------
4885  *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
4886  *  ---------------------=======-----     ---------------------------------
4887  *
4888  *  C1.startBlock = C.startBlock
4889  *  C1.blockCount = N
4890  *
4891  *  C2.startBlock = C.startBlock + N
4892  *  C2.blockCount = C.blockCount - N
4893  *
4894  *                                        FABN = old FABN - E.blockCount
4895  *
4896  * Inputs:
4897  *      extent_info - This is the structure that contains state about
4898  *                    the current file, extent, and extent record that
4899  *                    is being relocated.  This structure is shared
4900  *                    among code that traverses through all the extents
4901  *                    of the file, code that relocates extents, and
4902  *                    code that splits the extent.
4903  * Output:
4904  *      Zero on success, non-zero on failure.
4905  */
4906 static int
4907 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4908 {
4909         int error = 0;
4910         int index = extent_info->extent_index;
4911         int i;
4912         HFSPlusExtentDescriptor shift_extent;
4913         HFSPlusExtentDescriptor last_extent;
4914         HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4915         HFSPlusExtentRecord *extents_rec = NULL;
4916         HFSPlusExtentKey *extents_key = NULL;
4917         HFSPlusAttrRecord *xattr_rec = NULL;
4918         HFSPlusAttrKey *xattr_key = NULL;
4919         struct BTreeIterator iterator;
4920         struct FSBufferDescriptor btdata;
4921         uint16_t reclen;
4922         uint32_t read_recStartBlock;    /* Starting allocation block number to read old extent record */
4923         uint32_t write_recStartBlock;   /* Starting allocation block number to insert newly updated extent record */
4924         Boolean create_record = false;
4925         Boolean is_xattr;
4926
4927         is_xattr = extent_info->is_xattr;
4928         extents = extent_info->extents;
4929
4930         if (hfs_resize_debug) {
4931                 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4932         }
4933
4934         /* Determine the starting allocation block number for the following
4935          * overflow extent record, if any, before the current record
4936          * gets modified.
4937          */
4938         read_recStartBlock = extent_info->recStartBlock;
4939         for (i = 0; i < kHFSPlusExtentDensity; i++) {
4940                 if (extents[i].blockCount == 0) {
4941                         break;
4942                 }
4943                 read_recStartBlock += extents[i].blockCount;
4944         }
4945
4946         /* Shift and split */
4947         if (index == kHFSPlusExtentDensity-1) {
4948                 /* The new extent created after split will go into following overflow extent record */
4949                 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4950                 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4951
4952                 /* Last extent in the record will be split, so nothing to shift */
4953         } else {
4954                 /* Splitting of extents can result in at most of one
4955                  * extent entry to be shifted into following overflow extent
4956                  * record.  So, store the last extent entry for later.
4957                  */
4958                 shift_extent = extents[kHFSPlusExtentDensity-1];
4959
4960                 /* Start shifting extent information from the end of the extent
4961                  * record to the index where we want to insert the new extent.
4962                  * Note that kHFSPlusExtentDensity-1 is already saved above, and
4963                  * does not need to be shifted.  The extent entry that is being
4964                  * split does not get shifted.
4965                  */
4966                 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4967                         if (hfs_resize_debug) {
4968                                 if (extents[i].blockCount) {
4969                                         printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4970                                 }
4971                         }
4972                         extents[i+1] = extents[i];
4973                 }
4974         }
4975
4976         if (index == kHFSPlusExtentDensity-1) {
4977                 /* The second half of the extent being split will be the overflow
4978                  * entry that will go into following overflow extent record.  The
4979                  * value has been stored in 'shift_extent' above, so there is
4980                  * nothing to be done here.
4981                  */
4982         } else {
4983                 /* Update the values in the second half of the extent being split
4984                  * before updating the first half of the split.  Note that the
4985                  * extent to split or first half of the split is at index 'index'
4986                  * and a new extent or second half of the split will be inserted at
4987                  * 'index+1' or into following overflow extent record.
4988                  */
4989                 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
4990                 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
4991         }
4992         /* Update the extent being split, only the block count will change */
4993         extents[index].blockCount = newBlockCount;
4994
4995         if (hfs_resize_debug) {
4996                 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
4997                 if (index != kHFSPlusExtentDensity-1) {
4998                         printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
4999                 } else {
5000                         printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
5001                 }
5002         }
5003
5004         /* If the newly split extent is for large EAs or in overflow extent
5005          * record, so update it directly in the btree using the iterator
5006          * information from the shared extent_info structure
5007          */
5008         if (extent_info->catalog_fp == NULL) {
5009                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5010                                 &(extent_info->btdata), extent_info->recordlen);
5011                 if (error) {
5012                         printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5013                         goto out;
5014                 }
5015         }
5016
5017         /* No extent entry to be shifted into another extent overflow record */
5018         if (shift_extent.blockCount == 0) {
5019                 if (hfs_resize_debug) {
5020                         printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5021                 }
5022                 error = 0;
5023                 goto out;
5024         }
5025
5026         /* The overflow extent entry has to be shifted into an extent
5027          * overflow record.  This would mean that we have to shift
5028          * extent entries from all overflow records by one.  We will
5029          * start iteration from the first record to the last record,
5030          * and shift the extent entry from one record to another.
5031          * We might have to create a new record for the last extent
5032          * entry for the file.
5033          */
5034
5035         /* Initialize iterator to search the next record */
5036         bzero(&iterator, sizeof(iterator));
5037         if (is_xattr) {
5038                 /* Copy the key from the iterator that was to update the modified attribute record. */
5039                 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5040                 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5041                 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5042
5043                 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5044                                 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5045                 if (xattr_rec == NULL) {
5046                         error = ENOMEM;
5047                         goto out;
5048                 }
5049                 btdata.bufferAddress = xattr_rec;
5050                 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5051                 btdata.itemCount = 1;
5052                 extents = xattr_rec->overflowExtents.extents;
5053         } else {
5054                 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5055                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5056                 extents_key->forkType = extent_info->forkType;
5057                 extents_key->fileID = extent_info->fileID;
5058                 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5059
5060                 MALLOC(extents_rec, HFSPlusExtentRecord *,
5061                                 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5062                 if (extents_rec == NULL) {
5063                         error = ENOMEM;
5064                         goto out;
5065                 }
5066                 btdata.bufferAddress = extents_rec;
5067                 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5068                 btdata.itemCount = 1;
5069                 extents = extents_rec[0];
5070         }
5071
5072         /* An extent entry still needs to be shifted into following overflow
5073          * extent record.  This will result in the starting allocation block
5074          * number of the extent record being changed which is part of the key
5075          * for the extent record.  Since the extent record key is changing,
5076          * the record can not be updated, instead has to be deleted and
5077          * inserted again.
5078          */
5079         while (shift_extent.blockCount) {
5080                 if (hfs_resize_debug) {
5081                         printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5082                 }
5083
5084                 /* Search if there is any existing overflow extent record.
5085                  * For this, the logical start block number in the key is
5086                  * the value calculated based on the logical start block
5087                  * number of the current extent record and the total number
5088                  * of blocks existing in the current extent record.
5089                  */
5090                 if (is_xattr) {
5091                         xattr_key->startBlock = read_recStartBlock;
5092                 } else {
5093                         extents_key->startBlock = read_recStartBlock;
5094                 }
5095                 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5096                 if (error) {
5097                         if (error != btNotFound) {
5098                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5099                                 goto out;
5100                         }
5101                         create_record = true;
5102                 }
5103
5104                 /* The extra extent entry from the previous record is being inserted
5105                  * as the first entry in the current extent record.  This will change
5106                  * the file allocation block number (FABN) of the current extent
5107                  * record, which is the startBlock value from the extent record key.
5108                  * Since one extra entry is being inserted in the record, the new
5109                  * FABN for the record will less than old FABN by the number of blocks
5110                  * in the new extent entry being inserted at the start.  We have to
5111                  * do this before we update read_recStartBlock to point at the
5112                  * startBlock of the following record.
5113                  */
5114                 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5115                 if (hfs_resize_debug) {
5116                         if (create_record) {
5117                                 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5118                         }
5119                 }
5120
5121                 /* Now update the read_recStartBlock to account for total number
5122                  * of blocks in this extent record.  It will now point to the
5123                  * starting allocation block number for the next extent record.
5124                  */
5125                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5126                         if (extents[i].blockCount == 0) {
5127                                 break;
5128                         }
5129                         read_recStartBlock += extents[i].blockCount;
5130                 }
5131
5132                 if (create_record == true) {
5133                         /* Initialize new record content with only one extent entry */
5134                         bzero(extents, sizeof(HFSPlusExtentRecord));
5135                         /* The new record will contain only one extent entry */
5136                         extents[0] = shift_extent;
5137                         /* There are no more overflow extents to be shifted */
5138                         shift_extent.startBlock = shift_extent.blockCount = 0;
5139
5140                         if (is_xattr) {
5141                                 xattr_rec->recordType = kHFSPlusAttrExtents;
5142                                 xattr_rec->overflowExtents.reserved = 0;
5143                                 reclen = sizeof(HFSPlusAttrExtents);
5144                         } else {
5145                                 reclen = sizeof(HFSPlusExtentRecord);
5146                         }
5147                 } else {
5148                         /* The overflow extent entry from previous record will be
5149                          * the first entry in this extent record.  If the last
5150                          * extent entry in this record is valid, it will be shifted
5151                          * into the following extent record as its first entry.  So
5152                          * save the last entry before shifting entries in current
5153                          * record.
5154                          */
5155                         last_extent = extents[kHFSPlusExtentDensity-1];
5156
5157                         /* Shift all entries by one index towards the end */
5158                         for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5159                                 extents[i+1] = extents[i];
5160                         }
5161
5162                         /* Overflow extent entry saved from previous record
5163                          * is now the first entry in the current record.
5164                          */
5165                         extents[0] = shift_extent;
5166
5167                         if (hfs_resize_debug) {
5168                                 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5169                         }
5170
5171                         /* The last entry from current record will be the
5172                          * overflow entry which will be the first entry for
5173                          * the following extent record.
5174                          */
5175                         shift_extent = last_extent;
5176
5177                         /* Since the key->startBlock is being changed for this record,
5178                          * it should be deleted and inserted with the new key.
5179                          */
5180                         error = BTDeleteRecord(extent_info->fcb, &iterator);
5181                         if (error) {
5182                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5183                                 goto out;
5184                         }
5185                         if (hfs_resize_debug) {
5186                                 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5187                         }
5188                 }
5189
5190                 /* Insert the newly created or modified extent record */
5191                 bzero(&iterator.hint, sizeof(iterator.hint));
5192                 if (is_xattr) {
5193                         xattr_key->startBlock = write_recStartBlock;
5194                 } else {
5195                         extents_key->startBlock = write_recStartBlock;
5196                 }
5197                 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5198                 if (error) {
5199                         printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5200                         goto out;
5201                 }
5202                 if (hfs_resize_debug) {
5203                         printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5204                 }
5205         }
5206         BTFlushPath(extent_info->fcb);
5207 out:
5208         if (extents_rec) {
5209                 FREE (extents_rec, M_TEMP);
5210         }
5211         if (xattr_rec) {
5212                 FREE (xattr_rec, M_TEMP);
5213         }
5214         return error;
5215 }
5216
5217
5218 /*
5219  * Relocate an extent if it lies beyond the expected end of volume.
5220  *
5221  * This function is called for every extent of the file being relocated.
5222  * It allocates space for relocation, copies the data, deallocates
5223  * the old extent, and update corresponding on-disk extent.  If the function
5224  * does not find contiguous space to  relocate an extent, it splits the
5225  * extent in smaller size to be able to relocate it out of the area of
5226  * disk being reclaimed.  As an optimization, if an extent lies partially
5227  * in the area of the disk being reclaimed, it is split so that we only
5228  * have to relocate the area that was overlapping with the area of disk
5229  * being reclaimed.
5230  *
5231  * Note that every extent is relocated in its own transaction so that
5232  * they do not overwhelm the journal.  This function handles the extent
5233  * record that exists in the catalog record, extent record from overflow
5234  * extents btree, and extents for large EAs.
5235  *
5236  * Inputs:
5237  *      extent_info - This is the structure that contains state about
5238  *                    the current file, extent, and extent record that
5239  *                    is being relocated.  This structure is shared
5240  *                    among code that traverses through all the extents
5241  *                    of the file, code that relocates extents, and
5242  *                    code that splits the extent.
5243  */
5244 static int
5245 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5246 {
5247         int error = 0;
5248         int index;
5249         struct cnode *cp;
5250         u_int32_t oldStartBlock;
5251         u_int32_t oldBlockCount;
5252         u_int32_t newStartBlock;
5253         u_int32_t newBlockCount;
5254         u_int32_t alloc_flags;
5255         int blocks_allocated = false;
5256
5257         index = extent_info->extent_index;
5258         cp = VTOC(extent_info->vp);
5259
5260         oldStartBlock = extent_info->extents[index].startBlock;
5261         oldBlockCount = extent_info->extents[index].blockCount;
5262
5263         if (0 && hfs_resize_debug) {
5264                 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5265         }
5266
5267         /* Check if the current extent lies completely within allocLimit */
5268         if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5269                 extent_info->cur_blockCount += oldBlockCount;
5270                 return error;
5271         }
5272
5273         /* Every extent should be relocated in its own transaction
5274          * to make sure that we don't overflow the journal buffer.
5275          */
5276         error = hfs_start_transaction(hfsmp);
5277         if (error) {
5278                 return error;
5279         }
5280         extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5281
5282         /* Check if the extent lies partially in the area to reclaim,
5283          * i.e. it starts before allocLimit and ends beyond allocLimit.
5284          * We have already skipped extents that lie completely within
5285          * allocLimit in the check above, so we only check for the
5286          * startBlock.  If it lies partially, split it so that we
5287          * only relocate part of the extent.
5288          */
5289         if (oldStartBlock < allocLimit) {
5290                 newBlockCount = allocLimit - oldStartBlock;
5291                 error = hfs_split_extent(extent_info, newBlockCount);
5292                 if (error == 0) {
5293                         /* After successful split, the current extent does not
5294                          * need relocation, so just return back.
5295                          */
5296                         goto out;
5297                 }
5298                 /* Ignore error and try relocating the entire extent instead */
5299         }
5300
5301         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5302         if (extent_info->is_sysfile) {
5303                 alloc_flags |= HFS_ALLOC_METAZONE;
5304         }
5305
5306         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5307                         &newStartBlock, &newBlockCount);
5308         if ((extent_info->is_sysfile == false) &&
5309             ((error == dskFulErr) || (error == ENOSPC))) {
5310                 /* For non-system files, try reallocating space in metadata zone */
5311                 alloc_flags |= HFS_ALLOC_METAZONE;
5312                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5313                                 alloc_flags, &newStartBlock, &newBlockCount);
5314         }
5315         if ((error == dskFulErr) || (error == ENOSPC)) {
5316                 /* We did not find desired contiguous space for this extent.
5317                  * So try to allocate the maximum contiguous space available.
5318                  */
5319                 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5320
5321                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5322                                 alloc_flags, &newStartBlock, &newBlockCount);
5323                 if (error) {
5324                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5325                         goto out;
5326                 }
5327                 blocks_allocated = true;
5328
5329                 error = hfs_split_extent(extent_info, newBlockCount);
5330                 if (error) {
5331                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5332                         goto out;
5333                 }
5334                 oldBlockCount = newBlockCount;
5335         }
5336         if (error) {
5337                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5338                 goto out;
5339         }
5340         blocks_allocated = true;
5341
5342         /* Copy data from old location to new location */
5343         error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5344                         newStartBlock, newBlockCount, context);
5345         if (error) {
5346                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5347                 goto out;
5348         }
5349
5350         /* Update the extent record with the new start block information */
5351         extent_info->extents[index].startBlock = newStartBlock;
5352
5353         /* Sync the content back to the disk */
5354         if (extent_info->catalog_fp) {
5355                 /* Update the extents in catalog record */
5356                 if (extent_info->is_dirlink) {
5357                         error = cat_update_dirlink(hfsmp, extent_info->forkType,
5358                                         extent_info->dirlink_desc, extent_info->dirlink_attr,
5359                                         &(extent_info->dirlink_fork->ff_data));
5360                 } else {
5361                         cp->c_flag |= C_MODIFIED;
5362                         /* If this is a system file, sync volume headers on disk */
5363                         if (extent_info->is_sysfile) {
5364                                 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5365                         }
5366                 }
5367         } else {
5368                 /* Replace record for extents overflow or extents-based xattrs */
5369                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5370                                 &(extent_info->btdata), extent_info->recordlen);
5371         }
5372         if (error) {
5373                 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5374                 goto out;
5375         }
5376
5377         /* Deallocate the old extent */
5378         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5379         if (error) {
5380                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5381                 goto out;
5382         }
5383         extent_info->blocks_relocated += newBlockCount;
5384
5385         if (hfs_resize_debug) {
5386                 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5387         }
5388
5389 out:
5390         if (error != 0) {
5391                 if (blocks_allocated == true) {
5392                         BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5393                 }
5394         } else {
5395                 /* On success, increment the total allocation blocks processed */
5396                 extent_info->cur_blockCount += newBlockCount;
5397         }
5398
5399         hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5400
5401         /* For a non-system file, if an extent entry from catalog record
5402          * was modified, sync the in-memory changes to the catalog record
5403          * on disk before ending the transaction.
5404          */
5405         if ((error == 0) &&
5406             (extent_info->overflow_count < kHFSPlusExtentDensity) &&
5407             (extent_info->is_sysfile == false)) {
5408                 (void) hfs_update(extent_info->vp, MNT_WAIT);
5409         }
5410
5411         hfs_end_transaction(hfsmp);
5412
5413         return error;
5414 }
5415
5416 /* Report intermediate progress during volume resize */
5417 static void
5418 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5419 {
5420         u_int32_t cur_progress;
5421
5422         hfs_resize_progress(hfsmp, &cur_progress);
5423         if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5424                 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5425                 hfsmp->hfs_resize_progress = cur_progress;
5426         }
5427         return;
5428 }
5429
5430 /*
5431  * Reclaim space at the end of a volume for given file and forktype.
5432  *
5433  * This routine attempts to move any extent which contains allocation blocks
5434  * at or after "allocLimit."  A separate transaction is used for every extent
5435  * that needs to be moved.  If there is not contiguous space available for
5436  * moving an extent, it can be split into smaller extents.  The contents of
5437  * any moved extents are read and written via the volume's device vnode --
5438  * NOT via "vp."  During the move, moved blocks which are part of a transaction
5439  * have their physical block numbers invalidated so they will eventually be
5440  * written to their new locations.
5441  *
5442  * This function is also called for directory hard links.  Directory hard links
5443  * are regular files with no data fork and resource fork that contains alias
5444  * information for backward compatibility with pre-Leopard systems.  However
5445  * non-Mac OS X implementation can add/modify data fork or resource fork
5446  * information to directory hard links, so we check, and if required, relocate
5447  * both data fork and resource fork.
5448  *
5449  * Inputs:
5450  *    hfsmp       The volume being resized.
5451  *    vp          The vnode for the system file.
5452  *    fileID      ID of the catalog record that needs to be relocated
5453  *    forktype    The type of fork that needs relocated,
5454  *                      kHFSResourceForkType for resource fork,
5455  *                      kHFSDataForkType for data fork
5456  *    allocLimit  Allocation limit for the new volume size,
5457  *                do not use this block or beyond.  All extents
5458  *                that use this block or any blocks beyond this limit
5459  *                will be relocated.
5460  *
5461  * Side Effects:
5462  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5463  * blocks that were relocated.
5464  */
5465 static int
5466 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5467                 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5468 {
5469         int error = 0;
5470         struct hfs_reclaim_extent_info *extent_info;
5471         int i;
5472         int lockflags = 0;
5473         struct cnode *cp;
5474         struct filefork *fp;
5475         int took_truncate_lock = false;
5476         int release_desc = false;
5477         HFSPlusExtentKey *key;
5478
5479         /* If there is no vnode for this file, then there's nothing to do. */
5480         if (vp == NULL) {
5481                 return 0;
5482         }
5483
5484         cp = VTOC(vp);
5485
5486         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5487                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5488         if (extent_info == NULL) {
5489                 return ENOMEM;
5490         }
5491         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5492         extent_info->vp = vp;
5493         extent_info->fileID = fileID;
5494         extent_info->forkType = forktype;
5495         extent_info->is_sysfile = vnode_issystem(vp);
5496         if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5497                 extent_info->is_dirlink = true;
5498         }
5499         /* We always need allocation bitmap and extent btree lock */
5500         lockflags = SFL_BITMAP | SFL_EXTENTS;
5501         if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5502                 lockflags |= SFL_CATALOG;
5503         } else if (fileID == kHFSAttributesFileID) {
5504                 lockflags |= SFL_ATTRIBUTE;
5505         } else if (fileID == kHFSStartupFileID) {
5506                 lockflags |= SFL_STARTUP;
5507         }
5508         extent_info->lockflags = lockflags;
5509         extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5510
5511         /* Flush data associated with current file on disk.
5512          *
5513          * If the current vnode is directory hard link, no flushing of
5514          * journal or vnode is required.  The current kernel does not
5515          * modify data/resource fork of directory hard links, so nothing
5516          * will be in the cache.  If a directory hard link is newly created,
5517          * the resource fork data is written directly using devvp and
5518          * the code that actually relocates data (hfs_copy_extent()) also
5519          * uses devvp for its I/O --- so they will see a consistent copy.
5520          */
5521         if (extent_info->is_sysfile) {
5522                 /* If the current vnode is system vnode, flush journal
5523                  * to make sure that all data is written to the disk.
5524                  */
5525                 error = hfs_journal_flush(hfsmp, TRUE);
5526                 if (error) {
5527                         printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5528                         goto out;
5529                 }
5530         } else if (extent_info->is_dirlink == false) {
5531                 /* Flush all blocks associated with this regular file vnode.
5532                  * Normally there should not be buffer cache blocks for regular
5533                  * files, but for objects like symlinks, we can have buffer cache
5534                  * blocks associated with the vnode.  Therefore we call
5535                  * buf_flushdirtyblks() also.
5536                  */
5537                 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5538
5539                 hfs_unlock(cp);
5540                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5541                 took_truncate_lock = true;
5542                 (void) cluster_push(vp, 0);
5543                 error = hfs_lock(cp, HFS_FORCE_LOCK);
5544                 if (error) {
5545                         goto out;
5546                 }
5547
5548                 /* If the file no longer exists, nothing left to do */
5549                 if (cp->c_flag & C_NOEXISTS) {
5550                         error = 0;
5551                         goto out;
5552                 }
5553
5554                 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5555                  * be copying consistent bits.  (Otherwise, it's possible that an async
5556                  * write will complete to the old extent after we read from it.  That
5557                  * could lead to corruption.)
5558                  */
5559                 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5560                 if (error) {
5561                         goto out;
5562                 }
5563         }
5564
5565         if (hfs_resize_debug) {
5566                 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5567         }
5568
5569         if (extent_info->is_dirlink) {
5570                 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5571                                 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5572                 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5573                                 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5574                 MALLOC(extent_info->dirlink_fork, struct filefork *,
5575                                 sizeof(struct filefork), M_TEMP, M_WAITOK);
5576                 if ((extent_info->dirlink_desc == NULL) ||
5577                     (extent_info->dirlink_attr == NULL) ||
5578                     (extent_info->dirlink_fork == NULL)) {
5579                         error = ENOMEM;
5580                         goto out;
5581                 }
5582
5583                 /* Lookup catalog record for directory hard link and
5584                  * create a fake filefork for the value looked up from
5585                  * the disk.
5586                  */
5587                 fp = extent_info->dirlink_fork;
5588                 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5589                 extent_info->dirlink_fork->ff_cp = cp;
5590                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5591                 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5592                                 extent_info->dirlink_desc, extent_info->dirlink_attr,
5593                                 &(extent_info->dirlink_fork->ff_data));
5594                 hfs_systemfile_unlock(hfsmp, lockflags);
5595                 if (error) {
5596                         printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5597                         goto out;
5598                 }
5599                 release_desc = true;
5600         } else {
5601                 fp = VTOF(vp);
5602         }
5603
5604         extent_info->catalog_fp = fp;
5605         extent_info->recStartBlock = 0;
5606         extent_info->extents = extent_info->catalog_fp->ff_extents;
5607         /* Relocate extents from the catalog record */
5608         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5609                 if (fp->ff_extents[i].blockCount == 0) {
5610                         break;
5611                 }
5612                 extent_info->extent_index = i;
5613                 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5614                 if (error) {
5615                         printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5616                         goto out;
5617                 }
5618         }
5619
5620         /* If the number of allocation blocks processed for reclaiming
5621          * are less than total number of blocks for the file, continuing
5622          * working on overflow extents record.
5623          */
5624         if (fp->ff_blocks <= extent_info->cur_blockCount) {
5625                 if (0 && hfs_resize_debug) {
5626                         printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5627                 }
5628                 goto out;
5629         }
5630
5631         if (hfs_resize_debug) {
5632                 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5633         }
5634
5635         MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5636         if (extent_info->iterator == NULL) {
5637                 error = ENOMEM;
5638                 goto out;
5639         }
5640         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5641         key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5642         key->keyLength = kHFSPlusExtentKeyMaximumLength;
5643         key->forkType = forktype;
5644         key->fileID = fileID;
5645         key->startBlock = extent_info->cur_blockCount;
5646
5647         extent_info->btdata.bufferAddress = extent_info->record.overflow;
5648         extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5649         extent_info->btdata.itemCount = 1;
5650
5651         extent_info->catalog_fp = NULL;
5652
5653         /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5654         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5655         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5656                         &(extent_info->btdata), &(extent_info->recordlen),
5657                         extent_info->iterator);
5658         hfs_systemfile_unlock(hfsmp, lockflags);
5659         while (error == 0) {
5660                 extent_info->overflow_count++;
5661                 extent_info->recStartBlock = key->startBlock;
5662                 extent_info->extents = extent_info->record.overflow;
5663                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5664                         if (extent_info->record.overflow[i].blockCount == 0) {
5665                                 goto out;
5666                         }
5667                         extent_info->extent_index = i;
5668                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5669                         if (error) {
5670                                 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5671                                 goto out;
5672                         }
5673                 }
5674
5675                 /* Look for more overflow records */
5676                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5677                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5678                                 extent_info->iterator, &(extent_info->btdata),
5679                                 &(extent_info->recordlen));
5680                 hfs_systemfile_unlock(hfsmp, lockflags);
5681                 if (error) {
5682                         break;
5683                 }
5684                 /* Stop when we encounter a different file or fork. */
5685                 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5686                         break;
5687                 }
5688         }
5689         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5690                 error = 0;
5691         }
5692
5693 out:
5694         /* If any blocks were relocated, account them and report progress */
5695         if (extent_info->blocks_relocated) {
5696                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5697                 hfs_truncatefs_progress(hfsmp);
5698                 if (fileID < kHFSFirstUserCatalogNodeID) {
5699                         printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5700                                         extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5701                 }
5702         }
5703         if (extent_info->iterator) {
5704                 FREE(extent_info->iterator, M_TEMP);
5705         }
5706         if (release_desc == true) {
5707                 cat_releasedesc(extent_info->dirlink_desc);
5708         }
5709         if (extent_info->dirlink_desc) {
5710                 FREE(extent_info->dirlink_desc, M_TEMP);
5711         }
5712         if (extent_info->dirlink_attr) {
5713                 FREE(extent_info->dirlink_attr, M_TEMP);
5714         }
5715         if (extent_info->dirlink_fork) {
5716                 FREE(extent_info->dirlink_fork, M_TEMP);
5717         }
5718         if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5719                 (void) hfs_update(vp, MNT_WAIT);
5720         }
5721         if (took_truncate_lock) {
5722                 hfs_unlock_truncate(cp, 0);
5723         }
5724         if (extent_info) {
5725                 FREE(extent_info, M_TEMP);
5726         }
5727         if (hfs_resize_debug) {
5728                 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5729         }
5730
5731         return error;
5732 }
5733
5734
5735 /*
5736  * This journal_relocate callback updates the journal info block to point
5737  * at the new journal location.  This write must NOT be done using the
5738  * transaction.  We must write the block immediately.  We must also force
5739  * it to get to the media so that the new journal location will be seen by
5740  * the replay code before we can safely let journaled blocks be written
5741  * to their normal locations.
5742  *
5743  * The tests for journal_uses_fua below are mildly hacky.  Since the journal
5744  * and the file system are both on the same device, I'm leveraging what
5745  * the journal has decided about FUA.
5746  */
5747 struct hfs_journal_relocate_args {
5748         struct hfsmount *hfsmp;
5749         vfs_context_t context;
5750         u_int32_t newStartBlock;
5751 };
5752
5753 static errno_t
5754 hfs_journal_relocate_callback(void *_args)
5755 {
5756         int error;
5757         struct hfs_journal_relocate_args *args = _args;
5758         struct hfsmount *hfsmp = args->hfsmp;
5759         buf_t bp;
5760         JournalInfoBlock *jibp;
5761
5762         error = buf_meta_bread(hfsmp->hfs_devvp,
5763                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5764                 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5765         if (error) {
5766                 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5767                 if (bp) {
5768                         buf_brelse(bp);
5769                 }
5770                 return error;
5771         }
5772         jibp = (JournalInfoBlock*) buf_dataptr(bp);
5773         jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5774         jibp->size = SWAP_BE64(hfsmp->jnl_size);
5775         if (journal_uses_fua(hfsmp->jnl))
5776                 buf_markfua(bp);
5777         error = buf_bwrite(bp);
5778         if (error) {
5779                 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5780                 return error;
5781         }
5782         if (!journal_uses_fua(hfsmp->jnl)) {
5783                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5784                 if (error) {
5785                         printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5786                         error = 0;              /* Don't fail the operation. */
5787                 }
5788         }
5789
5790         return error;
5791 }
5792
5793
5794 static int
5795 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5796 {
5797         int error;
5798         int journal_err;
5799         int lockflags;
5800         u_int32_t oldStartBlock;
5801         u_int32_t newStartBlock;
5802         u_int32_t oldBlockCount;
5803         u_int32_t newBlockCount;
5804         struct cat_desc journal_desc;
5805         struct cat_attr journal_attr;
5806         struct cat_fork journal_fork;
5807         struct hfs_journal_relocate_args callback_args;
5808
5809         if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5810                 /* The journal does not require relocation */
5811                 return 0;
5812         }
5813
5814         error = hfs_start_transaction(hfsmp);
5815         if (error) {
5816                 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5817                 return error;
5818         }
5819         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5820
5821         oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5822
5823         /* TODO: Allow the journal to change size based on the new volume size. */
5824         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5825                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5826                          &newStartBlock, &newBlockCount);
5827         if (error) {
5828                 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5829                 goto fail;
5830         }
5831         if (newBlockCount != oldBlockCount) {
5832                 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5833                 goto free_fail;
5834         }
5835
5836         error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5837         if (error) {
5838                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5839                 goto free_fail;
5840         }
5841
5842         /* Update the catalog record for .journal */
5843         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5844         if (error) {
5845                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5846                 goto free_fail;
5847         }
5848         oldStartBlock = journal_fork.cf_extents[0].startBlock;
5849         journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5850         journal_fork.cf_extents[0].startBlock = newStartBlock;
5851         journal_fork.cf_extents[0].blockCount = newBlockCount;
5852         journal_fork.cf_blocks = newBlockCount;
5853         error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
5854         cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
5855         if (error) {
5856                 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
5857                 goto free_fail;
5858         }
5859         callback_args.hfsmp = hfsmp;
5860         callback_args.context = context;
5861         callback_args.newStartBlock = newStartBlock;
5862
5863         error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
5864                 (off_t)newBlockCount*hfsmp->blockSize, 0,
5865                 hfs_journal_relocate_callback, &callback_args);
5866         if (error) {
5867                 /* NOTE: journal_relocate will mark the journal invalid. */
5868                 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
5869                 goto fail;
5870         }
5871         hfsmp->jnl_start = newStartBlock;
5872         hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
5873
5874         hfs_systemfile_unlock(hfsmp, lockflags);
5875         error = hfs_end_transaction(hfsmp);
5876         if (error) {
5877                 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
5878         }
5879
5880         /* Account for the blocks relocated and print progress */
5881         hfsmp->hfs_resize_blocksmoved += oldBlockCount;
5882         hfs_truncatefs_progress(hfsmp);
5883         if (!error) {
5884                 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
5885                                 oldBlockCount, hfsmp->vcbVN);
5886                 if (hfs_resize_debug) {
5887                         printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5888                 }
5889         }
5890         return error;
5891
5892 free_fail:
5893         journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5894         if (journal_err) {
5895                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5896                 hfs_mark_volume_inconsistent(hfsmp);
5897         }
5898 fail:
5899         hfs_systemfile_unlock(hfsmp, lockflags);
5900         (void) hfs_end_transaction(hfsmp);
5901         if (hfs_resize_debug) {
5902                 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
5903         }
5904         return error;
5905 }
5906
5907
5908 /*
5909  * Move the journal info block to a new location.  We have to make sure the
5910  * new copy of the journal info block gets to the media first, then change
5911  * the field in the volume header and the catalog record.
5912  */
5913 static int
5914 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5915 {
5916         int error;
5917         int journal_err;
5918         int lockflags;
5919         u_int32_t oldBlock;
5920         u_int32_t newBlock;
5921         u_int32_t blockCount;
5922         struct cat_desc jib_desc;
5923         struct cat_attr jib_attr;
5924         struct cat_fork jib_fork;
5925         buf_t old_bp, new_bp;
5926
5927         if (hfsmp->vcbJinfoBlock <= allocLimit) {
5928                 /* The journal info block does not require relocation */
5929                 return 0;
5930         }
5931
5932         error = hfs_start_transaction(hfsmp);
5933         if (error) {
5934                 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
5935                 return error;
5936         }
5937         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5938
5939         error = BlockAllocate(hfsmp, 1, 1, 1,
5940                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5941                         &newBlock, &blockCount);
5942         if (error) {
5943                 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
5944                 goto fail;
5945         }
5946         if (blockCount != 1) {
5947                 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
5948                 goto free_fail;
5949         }
5950         error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
5951         if (error) {
5952                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
5953                 goto free_fail;
5954         }
5955
5956         /* Copy the old journal info block content to the new location */
5957         error = buf_meta_bread(hfsmp->hfs_devvp,
5958                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5959                 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
5960         if (error) {
5961                 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
5962                 if (old_bp) {
5963                         buf_brelse(old_bp);
5964                 }
5965                 goto free_fail;
5966         }
5967         new_bp = buf_getblk(hfsmp->hfs_devvp,
5968                 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5969                 hfsmp->blockSize, 0, 0, BLK_META);
5970         bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
5971         buf_brelse(old_bp);
5972         if (journal_uses_fua(hfsmp->jnl))
5973                 buf_markfua(new_bp);
5974         error = buf_bwrite(new_bp);
5975         if (error) {
5976                 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
5977                 goto free_fail;
5978         }
5979         if (!journal_uses_fua(hfsmp->jnl)) {
5980                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5981                 if (error) {
5982                         printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5983                         /* Don't fail the operation. */
5984                 }
5985         }
5986
5987         /* Update the catalog record for .journal_info_block */
5988         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
5989         if (error) {
5990                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5991                 goto fail;
5992         }
5993         oldBlock = jib_fork.cf_extents[0].startBlock;
5994         jib_fork.cf_size = hfsmp->blockSize;
5995         jib_fork.cf_extents[0].startBlock = newBlock;
5996         jib_fork.cf_extents[0].blockCount = 1;
5997         jib_fork.cf_blocks = 1;
5998         error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
5999         cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
6000         if (error) {
6001                 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6002                 goto fail;
6003         }
6004
6005         /* Update the pointer to the journal info block in the volume header. */
6006         hfsmp->vcbJinfoBlock = newBlock;
6007         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6008         if (error) {
6009                 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6010                 goto fail;
6011         }
6012         hfs_systemfile_unlock(hfsmp, lockflags);
6013         error = hfs_end_transaction(hfsmp);
6014         if (error) {
6015                 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6016         }
6017         error = hfs_journal_flush(hfsmp, FALSE);
6018         if (error) {
6019                 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6020         }
6021
6022         /* Account for the block relocated and print progress */
6023         hfsmp->hfs_resize_blocksmoved += 1;
6024         hfs_truncatefs_progress(hfsmp);
6025         if (!error) {
6026                 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6027                                 hfsmp->vcbVN);
6028                 if (hfs_resize_debug) {
6029                         printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6030                 }
6031         }
6032         return error;
6033
6034 free_fail:
6035         journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6036         if (journal_err) {
6037                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6038                 hfs_mark_volume_inconsistent(hfsmp);
6039         }
6040
6041 fail:
6042         hfs_systemfile_unlock(hfsmp, lockflags);
6043         (void) hfs_end_transaction(hfsmp);
6044         if (hfs_resize_debug) {
6045                 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6046         }
6047         return error;
6048 }
6049
6050
6051 /*
6052  * This function traverses through all extended attribute records for a given
6053  * fileID, and calls function that reclaims data blocks that exist in the
6054  * area of the disk being reclaimed which in turn is responsible for allocating
6055  * new space, copying extent data, deallocating new space, and if required,
6056  * splitting the extent.
6057  *
6058  * Note: The caller has already acquired the cnode lock on the file.  Therefore
6059  * we are assured that no other thread would be creating/deleting/modifying
6060  * extended attributes for this file.
6061  *
6062  * Side Effects:
6063  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6064  * blocks that were relocated.
6065  *
6066  * Returns:
6067  *      0 on success, non-zero on failure.
6068  */
6069 static int
6070 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6071 {
6072         int error = 0;
6073         struct hfs_reclaim_extent_info *extent_info;
6074         int i;
6075         HFSPlusAttrKey *key;
6076         int *lockflags;
6077
6078         if (hfs_resize_debug) {
6079                 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6080         }
6081
6082         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6083                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6084         if (extent_info == NULL) {
6085                 return ENOMEM;
6086         }
6087         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6088         extent_info->vp = vp;
6089         extent_info->fileID = fileID;
6090         extent_info->is_xattr = true;
6091         extent_info->is_sysfile = vnode_issystem(vp);
6092         extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6093         lockflags = &(extent_info->lockflags);
6094         *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6095
6096         /* Initialize iterator from the extent_info structure */
6097         MALLOC(extent_info->iterator, struct BTreeIterator *,
6098                sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6099         if (extent_info->iterator == NULL) {
6100                 error = ENOMEM;
6101                 goto out;
6102         }
6103         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6104
6105         /* Build attribute key */
6106         key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6107         error = hfs_buildattrkey(fileID, NULL, key);
6108         if (error) {
6109                 goto out;
6110         }
6111
6112         /* Initialize btdata from extent_info structure.  Note that the
6113          * buffer pointer actually points to the xattr record from the
6114          * extent_info structure itself.
6115          */
6116         extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6117         extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6118         extent_info->btdata.itemCount = 1;
6119
6120         /*
6121          * Sync all extent-based attribute data to the disk.
6122          *
6123          * All extent-based attribute data I/O is performed via cluster
6124          * I/O using a virtual file that spans across entire file system
6125          * space.
6126          */
6127         hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6128         (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6129         error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6130         hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6131         if (error) {
6132                 goto out;
6133         }
6134
6135         /* Search for extended attribute for current file.  This
6136          * will place the iterator before the first matching record.
6137          */
6138         *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6139         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6140                         &(extent_info->btdata), &(extent_info->recordlen),
6141                         extent_info->iterator);
6142         hfs_systemfile_unlock(hfsmp, *lockflags);
6143         if (error) {
6144                 if (error != btNotFound) {
6145                         goto out;
6146                 }
6147                 /* btNotFound is expected here, so just mask it */
6148                 error = 0;
6149         }
6150
6151         while (1) {
6152                 /* Iterate to the next record */
6153                 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6154                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6155                                 extent_info->iterator, &(extent_info->btdata),
6156                                 &(extent_info->recordlen));
6157                 hfs_systemfile_unlock(hfsmp, *lockflags);
6158
6159                 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6160                 if (error || key->fileID != fileID) {
6161                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6162                                 error = 0;
6163                         }
6164                         break;
6165                 }
6166
6167                 /* We only care about extent-based EAs */
6168                 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6169                     (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6170                         continue;
6171                 }
6172
6173                 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6174                         extent_info->overflow_count = 0;
6175                         extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6176                 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6177                         extent_info->overflow_count++;
6178                         extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6179                 }
6180
6181                 extent_info->recStartBlock = key->startBlock;
6182                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6183                         if (extent_info->extents[i].blockCount == 0) {
6184                                 break;
6185                         }
6186                         extent_info->extent_index = i;
6187                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6188                         if (error) {
6189                                 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6190                                 goto out;
6191                         }
6192                 }
6193         }
6194
6195 out:
6196         /* If any blocks were relocated, account them and report progress */
6197         if (extent_info->blocks_relocated) {
6198                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6199                 hfs_truncatefs_progress(hfsmp);
6200         }
6201         if (extent_info->iterator) {
6202                 FREE(extent_info->iterator, M_TEMP);
6203         }
6204         if (extent_info) {
6205                 FREE(extent_info, M_TEMP);
6206         }
6207         if (hfs_resize_debug) {
6208                 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6209         }
6210         return error;
6211 }
6212
6213 /*
6214  * Reclaim any extent-based extended attributes allocation blocks from
6215  * the area of the disk that is being truncated.
6216  *
6217  * The function traverses the attribute btree to find out the fileIDs
6218  * of the extended attributes that need to be relocated.  For every
6219  * file whose large EA requires relocation, it looks up the cnode and
6220  * calls hfs_reclaim_xattr() to do all the work for allocating
6221  * new space, copying data, deallocating old space, and if required,
6222  * splitting the extents.
6223  *
6224  * Inputs:
6225  *      allocLimit    - starting block of the area being reclaimed
6226  *
6227  * Returns:
6228  *      returns 0 on success, non-zero on failure.
6229  */
6230 static int
6231 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6232 {
6233         int error = 0;
6234         FCB *fcb;
6235         struct BTreeIterator *iterator = NULL;
6236         struct FSBufferDescriptor btdata;
6237         HFSPlusAttrKey *key;
6238         HFSPlusAttrRecord rec;
6239         int lockflags = 0;
6240         cnid_t prev_fileid = 0;
6241         struct vnode *vp;
6242         int need_relocate;
6243         int btree_operation;
6244         u_int32_t files_moved = 0;
6245         u_int32_t prev_blocksmoved;
6246         int i;
6247
6248         fcb = VTOF(hfsmp->hfs_attribute_vp);
6249         /* Store the value to print total blocks moved by this function in end */
6250         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6251
6252         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6253                 return ENOMEM;
6254         }
6255         bzero(iterator, sizeof(*iterator));
6256         key = (HFSPlusAttrKey *)&iterator->key;
6257         btdata.bufferAddress = &rec;
6258         btdata.itemSize = sizeof(rec);
6259         btdata.itemCount = 1;
6260
6261         need_relocate = false;
6262         btree_operation = kBTreeFirstRecord;
6263         /* Traverse the attribute btree to find extent-based EAs to reclaim */
6264         while (1) {
6265                 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6266                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6267                 hfs_systemfile_unlock(hfsmp, lockflags);
6268                 if (error) {
6269                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6270                                 error = 0;
6271                         }
6272                         break;
6273                 }
6274                 btree_operation = kBTreeNextRecord;
6275
6276                 /* If the extents of current fileID were already relocated, skip it */
6277                 if (prev_fileid == key->fileID) {
6278                         continue;
6279                 }
6280
6281                 /* Check if any of the extents in the current record need to be relocated */
6282                 need_relocate = false;
6283                 switch(rec.recordType) {
6284                         case kHFSPlusAttrForkData:
6285                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6286                                         if (rec.forkData.theFork.extents[i].blockCount == 0) {
6287                                                 break;
6288                                         }
6289                                         if ((rec.forkData.theFork.extents[i].startBlock +
6290                                              rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6291                                                 need_relocate = true;
6292                                                 break;
6293                                         }
6294                                 }
6295                                 break;
6296
6297                         case kHFSPlusAttrExtents:
6298                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6299                                         if (rec.overflowExtents.extents[i].blockCount == 0) {
6300                                                 break;
6301                                         }
6302                                         if ((rec.overflowExtents.extents[i].startBlock +
6303                                              rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6304                                                 need_relocate = true;
6305                                                 break;
6306                                         }
6307                                 }
6308                                 break;
6309                 };
6310
6311                 /* Continue iterating to next attribute record */
6312                 if (need_relocate == false) {
6313                         continue;
6314                 }
6315
6316                 /* Look up the vnode for corresponding file.  The cnode
6317                  * will be locked which will ensure that no one modifies
6318                  * the xattrs when we are relocating them.
6319                  *
6320                  * We want to allow open-unlinked files to be moved,
6321                  * so provide allow_deleted == 1 for hfs_vget().
6322                  */
6323                 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6324                         continue;
6325                 }
6326
6327                 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6328                 hfs_unlock(VTOC(vp));
6329                 vnode_put(vp);
6330                 if (error) {
6331                         printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6332                         break;
6333                 }
6334                 prev_fileid = key->fileID;
6335                 files_moved++;
6336         }
6337
6338         if (files_moved) {
6339                 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6340                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6341                                 files_moved, hfsmp->vcbVN);
6342         }
6343
6344         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6345         return error;
6346 }
6347
6348 /*
6349  * Reclaim blocks from regular files.
6350  *
6351  * This function iterates over all the record in catalog btree looking
6352  * for files with extents that overlap into the space we're trying to
6353  * free up.  If a file extent requires relocation, it looks up the vnode
6354  * and calls function to relocate the data.
6355  *
6356  * Returns:
6357  *      Zero on success, non-zero on failure.
6358  */
6359 static int
6360 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6361 {
6362         int error;
6363         FCB *fcb;
6364         struct BTreeIterator *iterator = NULL;
6365         struct FSBufferDescriptor btdata;
6366         int btree_operation;
6367         int lockflags;
6368         struct HFSPlusCatalogFile filerec;
6369         struct vnode *vp;
6370         struct vnode *rvp;
6371         struct filefork *datafork;
6372         u_int32_t files_moved = 0;
6373         u_int32_t prev_blocksmoved;
6374
6375         fcb = VTOF(hfsmp->hfs_catalog_vp);
6376         /* Store the value to print total blocks moved by this function at the end */
6377         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6378
6379         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6380                 return ENOMEM;
6381         }
6382         bzero(iterator, sizeof(*iterator));
6383
6384         btdata.bufferAddress = &filerec;
6385         btdata.itemSize = sizeof(filerec);
6386         btdata.itemCount = 1;
6387
6388         btree_operation = kBTreeFirstRecord;
6389         while (1) {
6390                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6391                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6392                 hfs_systemfile_unlock(hfsmp, lockflags);
6393                 if (error) {
6394                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6395                                 error = 0;
6396                         }
6397                         break;
6398                 }
6399                 btree_operation = kBTreeNextRecord;
6400
6401                 if (filerec.recordType != kHFSPlusFileRecord) {
6402                         continue;
6403                 }
6404
6405                 /* Check if any of the extents require relocation */
6406                 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6407                         continue;
6408                 }
6409
6410                 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6411                 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6412                         continue;
6413                 }
6414
6415                 /* If data fork exists or item is a directory hard link, relocate blocks */
6416                 datafork = VTOF(vp);
6417                 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6418                         error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6419                                         kHFSDataForkType, allocLimit, context);
6420                         if (error)  {
6421                                 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6422                                 hfs_unlock(VTOC(vp));
6423                                 vnode_put(vp);
6424                                 break;
6425                         }
6426                 }
6427
6428                 /* If resource fork exists or item is a directory hard link, relocate blocks */
6429                 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6430                         if (vnode_isdir(vp)) {
6431                                 /* Resource fork vnode lookup is invalid for directory hard link.
6432                                  * So we fake data fork vnode as resource fork vnode.
6433                                  */
6434                                 rvp = vp;
6435                         } else {
6436                                 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6437                                 if (error) {
6438                                         printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6439                                         hfs_unlock(VTOC(vp));
6440                                         vnode_put(vp);
6441                                         break;
6442                                 }
6443                                 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6444                         }
6445
6446                         error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6447                                         kHFSResourceForkType, allocLimit, context);
6448                         if (error) {
6449                                 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6450                                 hfs_unlock(VTOC(vp));
6451                                 vnode_put(vp);
6452                                 break;
6453                         }
6454                 }
6455
6456                 /* The file forks were relocated successfully, now drop the
6457                  * cnode lock and vnode reference, and continue iterating to
6458                  * next catalog record.
6459                  */
6460                 hfs_unlock(VTOC(vp));
6461                 vnode_put(vp);
6462                 files_moved++;
6463         }
6464
6465         if (files_moved) {
6466                 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6467                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6468                                 files_moved, hfsmp->vcbVN);
6469         }
6470
6471         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6472         return error;
6473 }
6474
6475 /*
6476  * Reclaim space at the end of a file system.
6477  *
6478  * Inputs -
6479  *      allocLimit      - start block of the space being reclaimed
6480  *      reclaimblks     - number of allocation blocks to reclaim
6481  */
6482 static int
6483 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6484 {
6485         int error = 0;
6486
6487         /*
6488          * Preflight the bitmap to find out total number of blocks that need
6489          * relocation.
6490          *
6491          * Note: Since allocLimit is set to the location of new alternate volume
6492          * header, the check below does not account for blocks allocated for old
6493          * alternate volume header.
6494          */
6495         error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6496         if (error) {
6497                 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6498                 return error;
6499         }
6500         if (hfs_resize_debug) {
6501                 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6502         }
6503
6504         /* Relocate extents of the Allocation file if they're in the way. */
6505         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6506                         kHFSDataForkType, allocLimit, context);
6507         if (error) {
6508                 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6509                 return error;
6510         }
6511
6512         /* Relocate extents of the Extents B-tree if they're in the way. */
6513         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6514                         kHFSDataForkType, allocLimit, context);
6515         if (error) {
6516                 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6517                 return error;
6518         }
6519
6520         /* Relocate extents of the Catalog B-tree if they're in the way. */
6521         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6522                         kHFSDataForkType, allocLimit, context);
6523         if (error) {
6524                 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6525                 return error;
6526         }
6527
6528         /* Relocate extents of the Attributes B-tree if they're in the way. */
6529         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6530                         kHFSDataForkType, allocLimit, context);
6531         if (error) {
6532                 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6533                 return error;
6534         }
6535
6536         /* Relocate extents of the Startup File if there is one and they're in the way. */
6537         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6538                         kHFSDataForkType, allocLimit, context);
6539         if (error) {
6540                 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6541                 return error;
6542         }
6543
6544         /*
6545          * We need to make sure the alternate volume header gets flushed if we moved
6546          * any extents in the volume header.  But we need to do that before
6547          * shrinking the size of the volume, or else the journal code will panic
6548          * with an invalid (too large) block number.
6549          *
6550          * Note that blks_moved will be set if ANY extent was moved, even
6551          * if it was just an overflow extent.  In this case, the journal_flush isn't
6552          * strictly required, but shouldn't hurt.
6553          */
6554         if (hfsmp->hfs_resize_blocksmoved) {
6555                 hfs_journal_flush(hfsmp, FALSE);
6556         }
6557
6558         /* Relocate journal file blocks if they're in the way. */
6559         error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6560         if (error) {
6561                 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6562                 return error;
6563         }
6564
6565         /* Relocate journal info block blocks if they're in the way. */
6566         error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6567         if (error) {
6568                 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6569                 return error;
6570         }
6571
6572         /* Reclaim extents from catalog file records */
6573         error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6574         if (error) {
6575                 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6576                 return error;
6577         }
6578
6579         /* Reclaim extents from extent-based extended attributes, if any */
6580         error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6581         if (error) {
6582                 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6583                 return error;
6584         }
6585
6586         return error;
6587 }
6588
6589
6590 /*
6591  * Check if there are any extents (including overflow extents) that overlap
6592  * into the disk space that is being reclaimed.
6593  *
6594  * Output -
6595  *      true  - One of the extents need to be relocated
6596  *      false - No overflow extents need to be relocated, or there was an error
6597  */
6598 static int
6599 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6600 {
6601         struct BTreeIterator * iterator = NULL;
6602         struct FSBufferDescriptor btdata;
6603         HFSPlusExtentRecord extrec;
6604         HFSPlusExtentKey *extkeyptr;
6605         FCB *fcb;
6606         int overlapped = false;
6607         int i, j;
6608         int error;
6609         int lockflags = 0;
6610         u_int32_t endblock;
6611
6612         /* Check if data fork overlaps the target space */
6613         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6614                 if (filerec->dataFork.extents[i].blockCount == 0) {
6615                         break;
6616                 }
6617                 endblock = filerec->dataFork.extents[i].startBlock +
6618                         filerec->dataFork.extents[i].blockCount;
6619                 if (endblock > allocLimit) {
6620                         overlapped = true;
6621                         goto out;
6622                 }
6623         }
6624
6625         /* Check if resource fork overlaps the target space */
6626         for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6627                 if (filerec->resourceFork.extents[j].blockCount == 0) {
6628                         break;
6629                 }
6630                 endblock = filerec->resourceFork.extents[j].startBlock +
6631                         filerec->resourceFork.extents[j].blockCount;
6632                 if (endblock > allocLimit) {
6633                         overlapped = true;
6634                         goto out;
6635                 }
6636         }
6637
6638         /* Return back if there are no overflow extents for this file */
6639         if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6640                 goto out;
6641         }
6642
6643         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6644                 return 0;
6645         }
6646         bzero(iterator, sizeof(*iterator));
6647         extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6648         extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6649         extkeyptr->forkType = 0;
6650         extkeyptr->fileID = filerec->fileID;
6651         extkeyptr->startBlock = 0;
6652
6653         btdata.bufferAddress = &extrec;
6654         btdata.itemSize = sizeof(extrec);
6655         btdata.itemCount = 1;
6656
6657         fcb = VTOF(hfsmp->hfs_extents_vp);
6658
6659         lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6660
6661         /* This will position the iterator just before the first overflow
6662          * extent record for given fileID.  It will always return btNotFound,
6663          * so we special case the error code.
6664          */
6665         error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6666         if (error && (error != btNotFound)) {
6667                 goto out;
6668         }
6669
6670         /* BTIterateRecord() might return error if the btree is empty, and
6671          * therefore we return that the extent does not overflow to the caller
6672          */
6673         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6674         while (error == 0) {
6675                 /* Stop when we encounter a different file. */
6676                 if (extkeyptr->fileID != filerec->fileID) {
6677                         break;
6678                 }
6679                 /* Check if any of the forks exist in the target space. */
6680                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6681                         if (extrec[i].blockCount == 0) {
6682                                 break;
6683                         }
6684                         endblock = extrec[i].startBlock + extrec[i].blockCount;
6685                         if (endblock > allocLimit) {
6686                                 overlapped = true;
6687                                 goto out;
6688                         }
6689                 }
6690                 /* Look for more records. */
6691                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6692         }
6693
6694 out:
6695         if (lockflags) {
6696                 hfs_systemfile_unlock(hfsmp, lockflags);
6697         }
6698         if (iterator) {
6699                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6700         }
6701         return overlapped;
6702 }
6703
6704
6705 /*
6706  * Calculate the progress of a file system resize operation.
6707  */
6708 __private_extern__
6709 int
6710 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6711 {
6712         if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6713                 return (ENXIO);
6714         }
6715
6716         if (hfsmp->hfs_resize_totalblocks > 0) {
6717                 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6718         } else {
6719                 *progress = 0;
6720         }
6721
6722         return (0);
6723 }
6724
6725
6726 /*
6727  * Creates a UUID from a unique "name" in the HFS UUID Name space.
6728  * See version 3 UUID.
6729  */
6730 static void
6731 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6732 {
6733         MD5_CTX  md5c;
6734         uint8_t  rawUUID[8];
6735
6736         ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6737         ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6738
6739         MD5Init( &md5c );
6740         MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6741         MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6742         MD5Final( result, &md5c );
6743
6744         result[6] = 0x30 | ( result[6] & 0x0F );
6745         result[8] = 0x80 | ( result[8] & 0x3F );
6746 }
6747
6748 /*
6749  * Get file system attributes.
6750  */
6751 static int
6752 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6753 {
6754 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6755 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6756 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6757
6758         ExtendedVCB *vcb = VFSTOVCB(mp);
6759         struct hfsmount *hfsmp = VFSTOHFS(mp);
6760         u_int32_t freeCNIDs;
6761
6762         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6763
6764         VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6765         VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6766         VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6767         VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6768         VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6769         VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6770         VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6771         VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6772         VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6773         /* XXX needs clarification */
6774         VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6775         /* Maximum files is constrained by total blocks. */
6776         VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6777         VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6778
6779         fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6780         fsap->f_fsid.val[1] = vfs_typenum(mp);
6781         VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6782
6783         VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6784         VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6785
6786         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6787                 vol_capabilities_attr_t *cap;
6788
6789                 cap = &fsap->f_capabilities;
6790
6791                 if (hfsmp->hfs_flags & HFS_STANDARD) {
6792                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6793                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6794                                 VOL_CAP_FMT_CASE_PRESERVING |
6795                                 VOL_CAP_FMT_FAST_STATFS |
6796                                 VOL_CAP_FMT_HIDDEN_FILES |
6797                                 VOL_CAP_FMT_PATH_FROM_ID;
6798                 } else {
6799                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6800                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6801                                 VOL_CAP_FMT_SYMBOLICLINKS |
6802                                 VOL_CAP_FMT_HARDLINKS |
6803                                 VOL_CAP_FMT_JOURNAL |
6804                                 VOL_CAP_FMT_ZERO_RUNS |
6805                                 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6806                                 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6807                                 VOL_CAP_FMT_CASE_PRESERVING |
6808                                 VOL_CAP_FMT_FAST_STATFS |
6809                                 VOL_CAP_FMT_2TB_FILESIZE |
6810                                 VOL_CAP_FMT_HIDDEN_FILES |
6811 #if HFS_COMPRESSION
6812                                 VOL_CAP_FMT_PATH_FROM_ID |
6813                                 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6814 #else
6815                                 VOL_CAP_FMT_PATH_FROM_ID;
6816 #endif
6817                 }
6818                 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6819                         VOL_CAP_INT_SEARCHFS |
6820                         VOL_CAP_INT_ATTRLIST |
6821                         VOL_CAP_INT_NFSEXPORT |
6822                         VOL_CAP_INT_READDIRATTR |
6823                         VOL_CAP_INT_EXCHANGEDATA |
6824                         VOL_CAP_INT_ALLOCATE |
6825                         VOL_CAP_INT_VOL_RENAME |
6826                         VOL_CAP_INT_ADVLOCK |
6827                         VOL_CAP_INT_FLOCK |
6828 #if NAMEDSTREAMS
6829                         VOL_CAP_INT_EXTENDED_ATTR |
6830                         VOL_CAP_INT_NAMEDSTREAMS;
6831 #else
6832                         VOL_CAP_INT_EXTENDED_ATTR;
6833 #endif
6834                 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6835                 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6836
6837                 cap->valid[VOL_CAPABILITIES_FORMAT] =
6838                         VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6839                         VOL_CAP_FMT_SYMBOLICLINKS |
6840                         VOL_CAP_FMT_HARDLINKS |
6841                         VOL_CAP_FMT_JOURNAL |
6842                         VOL_CAP_FMT_JOURNAL_ACTIVE |
6843                         VOL_CAP_FMT_NO_ROOT_TIMES |
6844                         VOL_CAP_FMT_SPARSE_FILES |
6845                         VOL_CAP_FMT_ZERO_RUNS |
6846                         VOL_CAP_FMT_CASE_SENSITIVE |
6847                         VOL_CAP_FMT_CASE_PRESERVING |
6848                         VOL_CAP_FMT_FAST_STATFS |
6849                         VOL_CAP_FMT_2TB_FILESIZE |
6850                         VOL_CAP_FMT_OPENDENYMODES |
6851                         VOL_CAP_FMT_HIDDEN_FILES |
6852 #if HFS_COMPRESSION
6853                         VOL_CAP_FMT_PATH_FROM_ID |
6854                         VOL_CAP_FMT_DECMPFS_COMPRESSION;
6855 #else
6856                         VOL_CAP_FMT_PATH_FROM_ID;
6857 #endif
6858                 cap->valid[VOL_CAPABILITIES_INTERFACES] =
6859                         VOL_CAP_INT_SEARCHFS |
6860                         VOL_CAP_INT_ATTRLIST |
6861                         VOL_CAP_INT_NFSEXPORT |
6862                         VOL_CAP_INT_READDIRATTR |
6863                         VOL_CAP_INT_EXCHANGEDATA |
6864                         VOL_CAP_INT_COPYFILE |
6865                         VOL_CAP_INT_ALLOCATE |
6866                         VOL_CAP_INT_VOL_RENAME |
6867                         VOL_CAP_INT_ADVLOCK |
6868                         VOL_CAP_INT_FLOCK |
6869                         VOL_CAP_INT_MANLOCK |
6870 #if NAMEDSTREAMS
6871                         VOL_CAP_INT_EXTENDED_ATTR |
6872                         VOL_CAP_INT_NAMEDSTREAMS;
6873 #else
6874                         VOL_CAP_INT_EXTENDED_ATTR;
6875 #endif
6876                 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
6877                 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
6878                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
6879         }
6880         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
6881                 vol_attributes_attr_t *attrp = &fsap->f_attributes;
6882
6883                 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6884                 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6885                 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
6886                 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6887                 attrp->validattr.forkattr = 0;
6888
6889                 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6890                 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6891                 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
6892                 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6893                 attrp->nativeattr.forkattr = 0;
6894                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
6895         }
6896         fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
6897         fsap->f_create_time.tv_nsec = 0;
6898         VFSATTR_SET_SUPPORTED(fsap, f_create_time);
6899         fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
6900         fsap->f_modify_time.tv_nsec = 0;
6901         VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
6902
6903         fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
6904         fsap->f_backup_time.tv_nsec = 0;
6905         VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
6906         if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
6907                 u_int16_t subtype = 0;
6908
6909                 /*
6910                  * Subtypes (flavors) for HFS
6911                  *   0:   Mac OS Extended
6912                  *   1:   Mac OS Extended (Journaled)
6913                  *   2:   Mac OS Extended (Case Sensitive)
6914                  *   3:   Mac OS Extended (Case Sensitive, Journaled)
6915                  *   4 - 127:   Reserved
6916                  * 128:   Mac OS Standard
6917                  *
6918                  */
6919                 if (hfsmp->hfs_flags & HFS_STANDARD) {
6920                         subtype = HFS_SUBTYPE_STANDARDHFS;
6921                 } else /* HFS Plus */ {
6922                         if (hfsmp->jnl)
6923                                 subtype |= HFS_SUBTYPE_JOURNALED;
6924                         if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
6925                                 subtype |= HFS_SUBTYPE_CASESENSITIVE;
6926                 }
6927                 fsap->f_fssubtype = subtype;
6928                 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
6929         }
6930
6931         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
6932                 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
6933                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
6934         }
6935         if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
6936                 hfs_getvoluuid(hfsmp, fsap->f_uuid);
6937                 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
6938         }
6939         return (0);
6940 }
6941
6942 /*
6943  * Perform a volume rename.  Requires the FS' root vp.
6944  */
6945 static int
6946 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
6947 {
6948         ExtendedVCB *vcb = VTOVCB(vp);
6949         struct cnode *cp = VTOC(vp);
6950         struct hfsmount *hfsmp = VTOHFS(vp);
6951         struct cat_desc to_desc;
6952         struct cat_desc todir_desc;
6953         struct cat_desc new_desc;
6954         cat_cookie_t cookie;
6955         int lockflags;
6956         int error = 0;
6957         char converted_volname[256];
6958         size_t volname_length = 0;
6959         size_t conv_volname_length = 0;
6960
6961
6962         /*
6963          * Ignore attempts to rename a volume to a zero-length name.
6964          */
6965         if (name[0] == 0)
6966                 return(0);
6967
6968         bzero(&to_desc, sizeof(to_desc));
6969         bzero(&todir_desc, sizeof(todir_desc));
6970         bzero(&new_desc, sizeof(new_desc));
6971         bzero(&cookie, sizeof(cookie));
6972
6973         todir_desc.cd_parentcnid = kHFSRootParentID;
6974         todir_desc.cd_cnid = kHFSRootFolderID;
6975         todir_desc.cd_flags = CD_ISDIR;
6976
6977         to_desc.cd_nameptr = (const u_int8_t *)name;
6978         to_desc.cd_namelen = strlen(name);
6979         to_desc.cd_parentcnid = kHFSRootParentID;
6980         to_desc.cd_cnid = cp->c_cnid;
6981         to_desc.cd_flags = CD_ISDIR;
6982
6983         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
6984                 if ((error = hfs_start_transaction(hfsmp)) == 0) {
6985                         if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
6986                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
6987
6988                                 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
6989
6990                                 /*
6991                                  * If successful, update the name in the VCB, ensure it's terminated.
6992                                  */
6993                                 if (!error) {
6994                                         strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6995                                         volname_length = strlen ((const char*)vcb->vcbVN);
6996 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
6997                                         /* Send the volume name down to CoreStorage if necessary */
6998                                         error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
6999                                         if (error == 0) {
7000                                                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7001                                         }
7002                                         error = 0;
7003                                 }
7004
7005                                 hfs_systemfile_unlock(hfsmp, lockflags);
7006                                 cat_postflight(hfsmp, &cookie, p);
7007
7008                                 if (error)
7009                                         MarkVCBDirty(vcb);
7010                                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7011                         }
7012                         hfs_end_transaction(hfsmp);
7013                 }
7014                 if (!error) {
7015                         /* Release old allocated name buffer */
7016                         if (cp->c_desc.cd_flags & CD_HASBUF) {
7017                                 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7018
7019                                 cp->c_desc.cd_nameptr = 0;
7020                                 cp->c_desc.cd_namelen = 0;
7021                                 cp->c_desc.cd_flags &= ~CD_HASBUF;
7022                                 vfs_removename(tmp_name);
7023                         }
7024                         /* Update cnode's catalog descriptor */
7025                         replace_desc(cp, &new_desc);
7026                         vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7027                         cp->c_touch_chgtime = TRUE;
7028                 }
7029
7030                 hfs_unlock(cp);
7031         }
7032
7033         return(error);
7034 }
7035
7036 /*
7037  * Get file system attributes.
7038  */
7039 static int
7040 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7041 {
7042         kauth_cred_t cred = vfs_context_ucred(context);
7043         int error = 0;
7044
7045         /*
7046          * Must be superuser or owner of filesystem to change volume attributes
7047          */
7048         if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7049                 return(EACCES);
7050
7051         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7052                 vnode_t root_vp;
7053
7054                 error = hfs_vfs_root(mp, &root_vp, context);
7055                 if (error)
7056                         goto out;
7057
7058                 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7059                 (void) vnode_put(root_vp);
7060                 if (error)
7061                         goto out;
7062
7063                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7064         }
7065
7066 out:
7067         return error;
7068 }
7069
7070 /* If a runtime corruption is detected, set the volume inconsistent
7071  * bit in the volume attributes.  The volume inconsistent bit is a persistent
7072  * bit which represents that the volume is corrupt and needs repair.
7073  * The volume inconsistent bit can be set from the kernel when it detects
7074  * runtime corruption or from file system repair utilities like fsck_hfs when
7075  * a repair operation fails.  The bit should be cleared only from file system
7076  * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7077  */
7078 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7079 {
7080         HFS_MOUNT_LOCK(hfsmp, TRUE);
7081         if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7082                 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7083                 MarkVCBDirty(hfsmp);
7084         }
7085         if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7086                 /* Log information to ASL log */
7087                 fslog_fs_corrupt(hfsmp->hfs_mp);
7088                 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7089         }
7090         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7091 }
7092
7093 /* Replay the journal on the device node provided.  Returns zero if
7094  * journal replay succeeded or no journal was supposed to be replayed.
7095  */
7096 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7097 {
7098         int retval = 0;
7099         struct mount *mp = NULL;
7100         struct hfs_mount_args *args = NULL;
7101
7102         /* Replay allowed only on raw devices */
7103         if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7104                 retval = EINVAL;
7105                 goto out;
7106         }
7107
7108         /* Create dummy mount structures */
7109         MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7110         if (mp == NULL) {
7111                 retval = ENOMEM;
7112                 goto out;
7113         }
7114         bzero(mp, sizeof(struct mount));
7115         mount_lock_init(mp);
7116
7117         MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7118         if (args == NULL) {
7119                 retval = ENOMEM;
7120                 goto out;
7121         }
7122         bzero(args, sizeof(struct hfs_mount_args));
7123
7124         retval = hfs_mountfs(devvp, mp, args, 1, context);
7125         buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7126
7127         /* FSYNC the devnode to be sure all data has been flushed */
7128         retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7129
7130 out:
7131         if (mp) {
7132                 mount_lock_destroy(mp);
7133                 FREE(mp, M_TEMP);
7134         }
7135         if (args) {
7136                 FREE(args, M_TEMP);
7137         }
7138         return retval;
7139 }
7140
7141 /*
7142  * hfs vfs operations.
7143  */
7144 struct vfsops hfs_vfsops = {
7145         hfs_mount,
7146         hfs_start,
7147         hfs_unmount,
7148         hfs_vfs_root,
7149         hfs_quotactl,
7150         hfs_vfs_getattr,        /* was hfs_statfs */
7151         hfs_sync,
7152         hfs_vfs_vget,
7153         hfs_fhtovp,
7154         hfs_vptofh,
7155         hfs_init,
7156         hfs_sysctl,
7157         hfs_vfs_setattr,
7158         {NULL}
7159 };