bsd/hfs/hfs_vfsops.c

   1 /*
   2  * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1991, 1993, 1994
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      hfs_vfsops.c
  66  *  derived from        @(#)ufs_vfsops.c        8.8 (Berkeley) 5/20/95
  67  *
  68  *      (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
  69  *
  70  *      hfs_vfsops.c -- VFS layer for loadable HFS file system.
  71  *
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/kauth.h>
  76
  77 #include <sys/ubc.h>
  78 #include <sys/ubc_internal.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/mount_internal.h>
  81 #include <sys/sysctl.h>
  82 #include <sys/malloc.h>
  83 #include <sys/stat.h>
  84 #include <sys/quota.h>
  85 #include <sys/disk.h>
  86 #include <sys/paths.h>
  87 #include <sys/utfconv.h>
  88 #include <sys/kdebug.h>
  89 #include <sys/fslog.h>
  90 #include <sys/ubc.h>
  91
  92 #include <kern/locks.h>
  93
  94 #include <vfs/vfs_journal.h>
  95
  96 #include <miscfs/specfs/specdev.h>
  97 #include <hfs/hfs_mount.h>
  98
  99 #include <libkern/crypto/md5.h>
 100 #include <uuid/uuid.h>
 101
 102 #include "hfs.h"
 103 #include "hfs_catalog.h"
 104 #include "hfs_cnode.h"
 105 #include "hfs_dbg.h"
 106 #include "hfs_endian.h"
 107 #include "hfs_hotfiles.h"
 108 #include "hfs_quota.h"
 109
 110 #include "hfscommon/headers/FileMgrInternal.h"
 111 #include "hfscommon/headers/BTreesInternal.h"
 112
 113 #if CONFIG_PROTECT
 114 #include <sys/cprotect.h>
 115 #endif
 116
 117 #if CONFIG_HFS_ALLOC_RBTREE
 118 #include "hfscommon/headers/HybridAllocator.h"
 119 #endif
 120
 121 #define HFS_MOUNT_DEBUG 1
 122
 123 #if     HFS_DIAGNOSTIC
 124 int hfs_dbg_all = 0;
 125 int hfs_dbg_err = 0;
 126 #endif
 127
 128 /* Enable/disable debugging code for live volume resizing */
 129 int hfs_resize_debug = 0;
 130
 131 lck_grp_attr_t *  hfs_group_attr;
 132 lck_attr_t *  hfs_lock_attr;
 133 lck_grp_t *  hfs_mutex_group;
 134 lck_grp_t *  hfs_rwlock_group;
 135 lck_grp_t *  hfs_spinlock_group;
 136
 137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 138 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
 139
 140 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
 141 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 142
 143 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
 144 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
 145 static int hfs_flushfiles(struct mount *, int, struct proc *);
 146 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
 147 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
 148 static int hfs_init(struct vfsconf *vfsp);
 149 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
 150 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
 151 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
 152 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 153 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
 154 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 155 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
 156
 157 void hfs_initialize_allocator (struct hfsmount *hfsmp);
 158 int hfs_teardown_allocator (struct hfsmount *hfsmp);
 159
 160 int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
 161 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
 162 int hfs_reload(struct mount *mp);
 163 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
 164 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
 165 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 166                       user_addr_t newp, size_t newlen, vfs_context_t context);
 167 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 168
 169 /*
 170  * Called by vfs_mountroot when mounting HFS Plus as root.
 171  */
 172
 173 int
 174 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 175 {
 176         struct hfsmount *hfsmp;
 177         ExtendedVCB *vcb;
 178         struct vfsstatfs *vfsp;
 179         int error;
 180
 181         if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
 182                 if (HFS_MOUNT_DEBUG) {
 183                         printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
 184                                         error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
 185                 }
 186                 return (error);
 187         }
 188
 189         /* Init hfsmp */
 190         hfsmp = VFSTOHFS(mp);
 191
 192         hfsmp->hfs_uid = UNKNOWNUID;
 193         hfsmp->hfs_gid = UNKNOWNGID;
 194         hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 195         hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 196
 197         /* Establish the free block reserve. */
 198         vcb = HFSTOVCB(hfsmp);
 199         vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
 200         vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
 201
 202         vfsp = vfs_statfs(mp);
 203         (void)hfs_statfs(mp, vfsp, NULL);
 204
 205         return (0);
 206 }
 207
 208
 209 /*
 210  * VFS Operations.
 211  *
 212  * mount system call
 213  */
 214
 215 int
 216 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
 217 {
 218         struct proc *p = vfs_context_proc(context);
 219         struct hfsmount *hfsmp = NULL;
 220         struct hfs_mount_args args;
 221         int retval = E_NONE;
 222         u_int32_t cmdflags;
 223
 224         if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
 225                 if (HFS_MOUNT_DEBUG) {
 226                         printf("hfs_mount: copyin returned %d for fs\n", retval);
 227                 }
 228                 return (retval);
 229         }
 230         cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
 231         if (cmdflags & MNT_UPDATE) {
 232                 hfsmp = VFSTOHFS(mp);
 233
 234                 /* Reload incore data after an fsck. */
 235                 if (cmdflags & MNT_RELOAD) {
 236                         if (vfs_isrdonly(mp)) {
 237                                 int error = hfs_reload(mp);
 238                                 if (error && HFS_MOUNT_DEBUG) {
 239                                         printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
 240                                 }
 241                                 return error;
 242                         }
 243                         else {
 244                                 if (HFS_MOUNT_DEBUG) {
 245                                         printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
 246                                 }
 247                                 return (EINVAL);
 248                         }
 249                 }
 250
 251                 /* Change to a read-only file system. */
 252                 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 253                     vfs_isrdonly(mp)) {
 254                         int flags;
 255
 256                         /* Set flag to indicate that a downgrade to read-only
 257                          * is in progress and therefore block any further
 258                          * modifications to the file system.
 259                          */
 260                         hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 261                         hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
 262                         hfsmp->hfs_downgrading_proc = current_thread();
 263                         hfs_unlock_global (hfsmp);
 264
 265                         /* use VFS_SYNC to push out System (btree) files */
 266                         retval = VFS_SYNC(mp, MNT_WAIT, context);
 267                         if (retval && ((cmdflags & MNT_FORCE) == 0)) {
 268                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 269                                 hfsmp->hfs_downgrading_proc = NULL;
 270                                 if (HFS_MOUNT_DEBUG) {
 271                                         printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
 272                                 }
 273                                 goto out;
 274                         }
 275
 276                         flags = WRITECLOSE;
 277                         if (cmdflags & MNT_FORCE)
 278                                 flags |= FORCECLOSE;
 279
 280                         if ((retval = hfs_flushfiles(mp, flags, p))) {
 281                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 282                                 hfsmp->hfs_downgrading_proc = NULL;
 283                                 if (HFS_MOUNT_DEBUG) {
 284                                         printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
 285                                 }
 286                                 goto out;
 287                         }
 288
 289                         /* mark the volume cleanly unmounted */
 290                         hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
 291                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 292                         hfsmp->hfs_flags |= HFS_READ_ONLY;
 293
 294                         /* also get the volume bitmap blocks */
 295                         if (!retval) {
 296                                 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
 297                                         retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
 298                                 } else {
 299                                         vnode_get(hfsmp->hfs_devvp);
 300                                         retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
 301                                         vnode_put(hfsmp->hfs_devvp);
 302                                 }
 303                         }
 304                         if (retval) {
 305                                 if (HFS_MOUNT_DEBUG) {
 306                                         printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
 307                                 }
 308                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 309                                 hfsmp->hfs_downgrading_proc = NULL;
 310                                 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 311                                 goto out;
 312                         }
 313                         if (hfsmp->jnl) {
 314                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 315
 316                             journal_close(hfsmp->jnl);
 317                             hfsmp->jnl = NULL;
 318
 319                             // Note: we explicitly don't want to shutdown
 320                             //       access to the jvp because we may need
 321                             //       it later if we go back to being read-write.
 322
 323                                 hfs_unlock_global (hfsmp);
 324                         }
 325
 326 #if CONFIG_HFS_ALLOC_RBTREE
 327                         (void) hfs_teardown_allocator(hfsmp);
 328 #endif
 329                         hfsmp->hfs_downgrading_proc = NULL;
 330                 }
 331
 332                 /* Change to a writable file system. */
 333                 if (vfs_iswriteupgrade(mp)) {
 334 #if CONFIG_HFS_ALLOC_RBTREE
 335                                 thread_t allocator_thread;
 336 #endif
 337
 338                         /*
 339                          * On inconsistent disks, do not allow read-write mount
 340                          * unless it is the boot volume being mounted.
 341                          */
 342                         if (!(vfs_flags(mp) & MNT_ROOTFS) &&
 343                                         (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
 344                                 if (HFS_MOUNT_DEBUG) {
 345                                         printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n",  (hfsmp->vcbVN));
 346                                 }
 347                                 retval = EINVAL;
 348                                 goto out;
 349                         }
 350
 351                         // If the journal was shut-down previously because we were
 352                         // asked to be read-only, let's start it back up again now
 353
 354                         if (   (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
 355                             && hfsmp->jnl == NULL
 356                             && hfsmp->jvp != NULL) {
 357                             int jflags;
 358
 359                             if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
 360                                         jflags = JOURNAL_RESET;
 361                                 } else {
 362                                         jflags = 0;
 363                                 }
 364
 365                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 366
 367                                 hfsmp->jnl = journal_open(hfsmp->jvp,
 368                                                 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
 369                                                 hfsmp->jnl_size,
 370                                                 hfsmp->hfs_devvp,
 371                                                 hfsmp->hfs_logical_block_size,
 372                                                 jflags,
 373                                                 0,
 374                                                 hfs_sync_metadata, hfsmp->hfs_mp);
 375
 376                                 /*
 377                                  * Set up the trim callback function so that we can add
 378                                  * recently freed extents to the free extent cache once
 379                                  * the transaction that freed them is written to the
 380                                  * journal on disk.
 381                                  */
 382                                 if (hfsmp->jnl)
 383                                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 384
 385                                 hfs_unlock_global (hfsmp);
 386
 387                                 if (hfsmp->jnl == NULL) {
 388                                         if (HFS_MOUNT_DEBUG) {
 389                                                 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
 390                                         }
 391                                         retval = EINVAL;
 392                                         goto out;
 393                                 } else {
 394                                         hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
 395                                 }
 396
 397                         }
 398
 399                         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 400                         retval = hfs_erase_unused_nodes(hfsmp);
 401                         if (retval != E_NONE) {
 402                                 if (HFS_MOUNT_DEBUG) {
 403                                         printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
 404                                 }
 405                                 goto out;
 406                         }
 407
 408                         /* If this mount point was downgraded from read-write
 409                          * to read-only, clear that information as we are now
 410                          * moving back to read-write.
 411                          */
 412                         hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 413                         hfsmp->hfs_downgrading_proc = NULL;
 414
 415                         /* mark the volume dirty (clear clean unmount bit) */
 416                         hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 417
 418                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 419                         if (retval != E_NONE) {
 420                                 if (HFS_MOUNT_DEBUG) {
 421                                         printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
 422                                 }
 423                                 goto out;
 424                         }
 425
 426                         /* Only clear HFS_READ_ONLY after a successful write */
 427                         hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 428
 429
 430                         if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
 431                                 /* Setup private/hidden directories for hardlinks. */
 432                                 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 433                                 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 434
 435                                 hfs_remove_orphans(hfsmp);
 436
 437                                 /*
 438                                  * Allow hot file clustering if conditions allow.
 439                                  */
 440                                 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
 441                                                 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
 442                                         (void) hfs_recording_init(hfsmp);
 443                                 }
 444                                 /* Force ACLs on HFS+ file systems. */
 445                                 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
 446                                         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 447                                 }
 448                         }
 449
 450 #if CONFIG_HFS_ALLOC_RBTREE
 451                         /*
 452                          * Like the normal mount case, we need to handle creation of the allocation red-black tree
 453                          * if we're upgrading from read-only to read-write.
 454                          *
 455                          * We spawn a thread to create the pair of red-black trees for this volume.
 456                          * However, in so doing, we must be careful to ensure that if this thread is still
 457                          * running after mount has finished, it doesn't interfere with an unmount. Specifically,
 458                          * we'll need to set a bit that indicates we're in progress building the trees here.
 459                          * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
 460                          * notifies the tree generation code that an unmount is waiting.  Also, mark the extent
 461                          * tree flags that the allocator is enabled for use before we spawn the thread that will start
 462                          * scanning the RB tree.
 463                          *
 464                          * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
 465                          * which has not previously encountered a bad error on the red-black tree code.  Also, don't
 466                          * try to re-build a tree that already exists.
 467                          */
 468
 469                         if (hfsmp->extent_tree_flags == 0) {
 470                                 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
 471                                 /* Initialize EOF counter so that the thread can assume it started at initial values */
 472                                 hfsmp->offset_block_end = 0;
 473
 474                                 InitTree(hfsmp);
 475
 476                                 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
 477                                 thread_deallocate(allocator_thread);
 478                         }
 479
 480 #endif
 481                 }
 482
 483                 /* Update file system parameters. */
 484                 retval = hfs_changefs(mp, &args);
 485                 if (retval &&  HFS_MOUNT_DEBUG) {
 486                         printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
 487                 }
 488
 489         } else /* not an update request */ {
 490
 491                 /* Set the mount flag to indicate that we support volfs  */
 492                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
 493
 494                 retval = hfs_mountfs(devvp, mp, &args, 0, context);
 495                 if (retval && HFS_MOUNT_DEBUG) {
 496                         printf("hfs_mount: hfs_mountfs returned %d\n", retval);
 497                 }
 498 #if CONFIG_PROTECT
 499                 /*
 500                  * If above mount call was successful, and this mount is content protection
 501                  * enabled, then verify the on-disk EA on the root to ensure that the filesystem
 502                  * is of a suitable vintage to allow the mount to proceed.
 503                  */
 504                 if ((retval == 0) && (cp_fs_protected (mp))) {
 505                         int err = 0;
 506                         struct cp_root_xattr xattr;
 507                         bzero (&xattr, sizeof(struct cp_root_xattr));
 508                         hfsmp = vfs_fsprivate(mp);
 509
 510                         /* go get the EA to get the version information */
 511                         err = cp_getrootxattr (hfsmp, &xattr);
 512                         /* If there was no EA there, then write one out. */
 513                         if (err == ENOATTR) {
 514                                 bzero(&xattr, sizeof(struct cp_root_xattr));
 515                                 xattr.major_version = CP_CURRENT_MAJOR_VERS;
 516                                 xattr.minor_version = CP_CURRENT_MINOR_VERS;
 517                                 xattr.flags = 0;
 518
 519                                 err = cp_setrootxattr (hfsmp, &xattr);
 520                         }
 521                         /*
 522                          * For any other error, including having an out of date CP version in the
 523                          * EA, or for an error out of cp_setrootxattr, deny the mount
 524                          * and do not proceed further.
 525                          */
 526                         if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS)  {
 527                                 /* Deny the mount and tear down. */
 528                                 retval = EPERM;
 529                                 (void) hfs_unmount (mp, MNT_FORCE, context);
 530                         }
 531                 }
 532 #endif
 533         }
 534 out:
 535         if (retval == 0) {
 536                 (void)hfs_statfs(mp, vfs_statfs(mp), context);
 537         }
 538         return (retval);
 539 }
 540
 541
 542 struct hfs_changefs_cargs {
 543         struct hfsmount *hfsmp;
 544         int             namefix;
 545         int             permfix;
 546         int             permswitch;
 547 };
 548
 549 static int
 550 hfs_changefs_callback(struct vnode *vp, void *cargs)
 551 {
 552         ExtendedVCB *vcb;
 553         struct cnode *cp;
 554         struct cat_desc cndesc;
 555         struct cat_attr cnattr;
 556         struct hfs_changefs_cargs *args;
 557         int lockflags;
 558         int error;
 559
 560         args = (struct hfs_changefs_cargs *)cargs;
 561
 562         cp = VTOC(vp);
 563         vcb = HFSTOVCB(args->hfsmp);
 564
 565         lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 566         error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
 567         hfs_systemfile_unlock(args->hfsmp, lockflags);
 568         if (error) {
 569                 /*
 570                  * If we couldn't find this guy skip to the next one
 571                  */
 572                 if (args->namefix)
 573                         cache_purge(vp);
 574
 575                 return (VNODE_RETURNED);
 576         }
 577         /*
 578          * Get the real uid/gid and perm mask from disk.
 579          */
 580         if (args->permswitch || args->permfix) {
 581                 cp->c_uid = cnattr.ca_uid;
 582                 cp->c_gid = cnattr.ca_gid;
 583                 cp->c_mode = cnattr.ca_mode;
 584         }
 585         /*
 586          * If we're switching name converters then...
 587          *   Remove the existing entry from the namei cache.
 588          *   Update name to one based on new encoder.
 589          */
 590         if (args->namefix) {
 591                 cache_purge(vp);
 592                 replace_desc(cp, &cndesc);
 593
 594                 if (cndesc.cd_cnid == kHFSRootFolderID) {
 595                         strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
 596                         cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
 597                 }
 598         } else {
 599                 cat_releasedesc(&cndesc);
 600         }
 601         return (VNODE_RETURNED);
 602 }
 603
 604 /* Change fs mount parameters */
 605 static int
 606 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
 607 {
 608         int retval = 0;
 609         int namefix, permfix, permswitch;
 610         struct hfsmount *hfsmp;
 611         ExtendedVCB *vcb;
 612         hfs_to_unicode_func_t   get_unicode_func;
 613         unicode_to_hfs_func_t   get_hfsname_func;
 614         u_int32_t old_encoding = 0;
 615         struct hfs_changefs_cargs cargs;
 616         u_int32_t mount_flags;
 617
 618         hfsmp = VFSTOHFS(mp);
 619         vcb = HFSTOVCB(hfsmp);
 620         mount_flags = (unsigned int)vfs_flags(mp);
 621
 622         hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
 623
 624         permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
 625                        ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
 626                       (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
 627                        (mount_flags & MNT_UNKNOWNPERMISSIONS)));
 628
 629         /* The root filesystem must operate with actual permissions: */
 630         if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
 631                 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));  /* Just say "No". */
 632                 retval = EINVAL;
 633                 goto exit;
 634         }
 635         if (mount_flags & MNT_UNKNOWNPERMISSIONS)
 636                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
 637         else
 638                 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
 639
 640         namefix = permfix = 0;
 641
 642         /*
 643          * Tracking of hot files requires up-to-date access times.  So if
 644          * access time updates are disabled, we must also disable hot files.
 645          */
 646         if (mount_flags & MNT_NOATIME) {
 647                 (void) hfs_recording_suspend(hfsmp);
 648         }
 649
 650         /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
 651         if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
 652                 gTimeZone = args->hfs_timezone;
 653         }
 654
 655         /* Change the default uid, gid and/or mask */
 656         if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
 657                 hfsmp->hfs_uid = args->hfs_uid;
 658                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 659                         ++permfix;
 660         }
 661         if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
 662                 hfsmp->hfs_gid = args->hfs_gid;
 663                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 664                         ++permfix;
 665         }
 666         if (args->hfs_mask != (mode_t)VNOVAL) {
 667                 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
 668                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
 669                         hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
 670                         if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
 671                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
 672                         if (vcb->vcbSigWord == kHFSPlusSigWord)
 673                                 ++permfix;
 674                 }
 675         }
 676
 677         /* Change the hfs encoding value (hfs only) */
 678         if ((vcb->vcbSigWord == kHFSSigWord)    &&
 679             (args->hfs_encoding != (u_int32_t)VNOVAL)              &&
 680             (hfsmp->hfs_encoding != args->hfs_encoding)) {
 681
 682                 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
 683                 if (retval)
 684                         goto exit;
 685
 686                 /*
 687                  * Connect the new hfs_get_unicode converter but leave
 688                  * the old hfs_get_hfsname converter in place so that
 689                  * we can lookup existing vnodes to get their correctly
 690                  * encoded names.
 691                  *
 692                  * When we're all finished, we can then connect the new
 693                  * hfs_get_hfsname converter and release our interest
 694                  * in the old converters.
 695                  */
 696                 hfsmp->hfs_get_unicode = get_unicode_func;
 697                 old_encoding = hfsmp->hfs_encoding;
 698                 hfsmp->hfs_encoding = args->hfs_encoding;
 699                 ++namefix;
 700         }
 701
 702         if (!(namefix || permfix || permswitch))
 703                 goto exit;
 704
 705         /* XXX 3762912 hack to support HFS filesystem 'owner' */
 706         if (permfix)
 707                 vfs_setowner(mp,
 708                     hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
 709                     hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
 710
 711         /*
 712          * For each active vnode fix things that changed
 713          *
 714          * Note that we can visit a vnode more than once
 715          * and we can race with fsync.
 716          *
 717          * hfs_changefs_callback will be called for each vnode
 718          * hung off of this mount point
 719          *
 720          * The vnode will be properly referenced and unreferenced
 721          * around the callback
 722          */
 723         cargs.hfsmp = hfsmp;
 724         cargs.namefix = namefix;
 725         cargs.permfix = permfix;
 726         cargs.permswitch = permswitch;
 727
 728         vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
 729
 730         /*
 731          * If we're switching name converters we can now
 732          * connect the new hfs_get_hfsname converter and
 733          * release our interest in the old converters.
 734          */
 735         if (namefix) {
 736                 hfsmp->hfs_get_hfsname = get_hfsname_func;
 737                 vcb->volumeNameEncodingHint = args->hfs_encoding;
 738                 (void) hfs_relconverter(old_encoding);
 739         }
 740 exit:
 741         hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
 742         return (retval);
 743 }
 744
 745
 746 struct hfs_reload_cargs {
 747         struct hfsmount *hfsmp;
 748         int             error;
 749 };
 750
 751 static int
 752 hfs_reload_callback(struct vnode *vp, void *cargs)
 753 {
 754         struct cnode *cp;
 755         struct hfs_reload_cargs *args;
 756         int lockflags;
 757
 758         args = (struct hfs_reload_cargs *)cargs;
 759         /*
 760          * flush all the buffers associated with this node
 761          */
 762         (void) buf_invalidateblks(vp, 0, 0, 0);
 763
 764         cp = VTOC(vp);
 765         /*
 766          * Remove any directory hints
 767          */
 768         if (vnode_isdir(vp))
 769                 hfs_reldirhints(cp, 0);
 770
 771         /*
 772          * Re-read cnode data for all active vnodes (non-metadata files).
 773          */
 774         if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
 775                 struct cat_fork *datafork;
 776                 struct cat_desc desc;
 777
 778                 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
 779
 780                 /* lookup by fileID since name could have changed */
 781                 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 782                 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
 783                 hfs_systemfile_unlock(args->hfsmp, lockflags);
 784                 if (args->error) {
 785                         return (VNODE_RETURNED_DONE);
 786                 }
 787
 788                 /* update cnode's catalog descriptor */
 789                 (void) replace_desc(cp, &desc);
 790         }
 791         return (VNODE_RETURNED);
 792 }
 793
 794 /*
 795  * Reload all incore data for a filesystem (used after running fsck on
 796  * the root filesystem and finding things to fix). The filesystem must
 797  * be mounted read-only.
 798  *
 799  * Things to do to update the mount:
 800  *      invalidate all cached meta-data.
 801  *      invalidate all inactive vnodes.
 802  *      invalidate all cached file data.
 803  *      re-read volume header from disk.
 804  *      re-load meta-file info (extents, file size).
 805  *      re-load B-tree header data.
 806  *      re-read cnode data for all active vnodes.
 807  */
 808 int
 809 hfs_reload(struct mount *mountp)
 810 {
 811         register struct vnode *devvp;
 812         struct buf *bp;
 813         int error, i;
 814         struct hfsmount *hfsmp;
 815         struct HFSPlusVolumeHeader *vhp;
 816         ExtendedVCB *vcb;
 817         struct filefork *forkp;
 818         struct cat_desc cndesc;
 819         struct hfs_reload_cargs args;
 820         daddr64_t priIDSector;
 821
 822         hfsmp = VFSTOHFS(mountp);
 823         vcb = HFSTOVCB(hfsmp);
 824
 825         if (vcb->vcbSigWord == kHFSSigWord)
 826                 return (EINVAL);        /* rooting from HFS is not supported! */
 827
 828         /*
 829          * Invalidate all cached meta-data.
 830          */
 831         devvp = hfsmp->hfs_devvp;
 832         if (buf_invalidateblks(devvp, 0, 0, 0))
 833                 panic("hfs_reload: dirty1");
 834
 835         args.hfsmp = hfsmp;
 836         args.error = 0;
 837         /*
 838          * hfs_reload_callback will be called for each vnode
 839          * hung off of this mount point that can't be recycled...
 840          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 841          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 842          * properly referenced and unreferenced around the callback
 843          */
 844         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
 845
 846         if (args.error)
 847                 return (args.error);
 848
 849         /*
 850          * Re-read VolumeHeader from disk.
 851          */
 852         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 853                         HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 854
 855         error = (int)buf_meta_bread(hfsmp->hfs_devvp,
 856                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
 857                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
 858         if (error) {
 859                 if (bp != NULL)
 860                         buf_brelse(bp);
 861                 return (error);
 862         }
 863
 864         vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 865
 866         /* Do a quick sanity check */
 867         if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
 868              SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
 869             (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
 870              SWAP_BE16(vhp->version) != kHFSXVersion) ||
 871             SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
 872                 buf_brelse(bp);
 873                 return (EIO);
 874         }
 875
 876         vcb->vcbLsMod           = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 877         vcb->vcbAtrb            = SWAP_BE32 (vhp->attributes);
 878         vcb->vcbJinfoBlock  = SWAP_BE32(vhp->journalInfoBlock);
 879         vcb->vcbClpSiz          = SWAP_BE32 (vhp->rsrcClumpSize);
 880         vcb->vcbNxtCNID         = SWAP_BE32 (vhp->nextCatalogID);
 881         vcb->vcbVolBkUp         = to_bsd_time(SWAP_BE32(vhp->backupDate));
 882         vcb->vcbWrCnt           = SWAP_BE32 (vhp->writeCount);
 883         vcb->vcbFilCnt          = SWAP_BE32 (vhp->fileCount);
 884         vcb->vcbDirCnt          = SWAP_BE32 (vhp->folderCount);
 885         HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
 886         vcb->totalBlocks        = SWAP_BE32 (vhp->totalBlocks);
 887         vcb->freeBlocks         = SWAP_BE32 (vhp->freeBlocks);
 888         vcb->encodingsBitmap    = SWAP_BE64 (vhp->encodingsBitmap);
 889         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 890         vcb->localCreateDate    = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
 891
 892         /*
 893          * Re-load meta-file vnode data (extent info, file size, etc).
 894          */
 895         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 896         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 897                 forkp->ff_extents[i].startBlock =
 898                         SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 899                 forkp->ff_extents[i].blockCount =
 900                         SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 901         }
 902         forkp->ff_size      = SWAP_BE64 (vhp->extentsFile.logicalSize);
 903         forkp->ff_blocks    = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 904         forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
 905
 906
 907         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 908         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 909                 forkp->ff_extents[i].startBlock =
 910                         SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 911                 forkp->ff_extents[i].blockCount =
 912                         SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 913         }
 914         forkp->ff_size      = SWAP_BE64 (vhp->catalogFile.logicalSize);
 915         forkp->ff_blocks    = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 916         forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
 917
 918         if (hfsmp->hfs_attribute_vp) {
 919                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 920                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 921                         forkp->ff_extents[i].startBlock =
 922                                 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 923                         forkp->ff_extents[i].blockCount =
 924                                 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 925                 }
 926                 forkp->ff_size      = SWAP_BE64 (vhp->attributesFile.logicalSize);
 927                 forkp->ff_blocks    = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 928                 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
 929         }
 930
 931         forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
 932         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 933                 forkp->ff_extents[i].startBlock =
 934                         SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 935                 forkp->ff_extents[i].blockCount =
 936                         SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 937         }
 938         forkp->ff_size      = SWAP_BE64 (vhp->allocationFile.logicalSize);
 939         forkp->ff_blocks    = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 940         forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
 941
 942         buf_brelse(bp);
 943         vhp = NULL;
 944
 945         /*
 946          * Re-load B-tree header data
 947          */
 948         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 949         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 950                 return (error);
 951
 952         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 953         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 954                 return (error);
 955
 956         if (hfsmp->hfs_attribute_vp) {
 957                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 958                 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 959                         return (error);
 960         }
 961
 962         /* Reload the volume name */
 963         if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
 964                 return (error);
 965         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 966         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 967         cat_releasedesc(&cndesc);
 968
 969         /* Re-establish private/hidden directories. */
 970         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 971         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 972
 973         /* In case any volume information changed to trigger a notification */
 974         hfs_generate_volume_notifications(hfsmp);
 975
 976         return (0);
 977 }
 978
 979
 980
 981 static void
 982 hfs_syncer(void *arg0, void *unused)
 983 {
 984 #pragma unused(unused)
 985
 986     struct hfsmount *hfsmp = arg0;
 987     clock_sec_t secs;
 988     clock_usec_t usecs;
 989     uint32_t delay = HFS_META_DELAY;
 990     uint64_t now;
 991     static int no_max=1;
 992
 993     clock_get_calendar_microtime(&secs, &usecs);
 994     now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 995
 996     //
 997     // If the amount of pending writes is more than our limit, wait
 998     // for 2/3 of it to drain and then flush the journal.
 999     //
1000     if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1001             int counter=0;
1002             uint64_t pending_io, start, rate;
1003
1004             no_max = 0;
1005
1006             hfs_start_transaction(hfsmp);   // so we hold off any new i/o's
1007
1008             pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1009
1010             clock_get_calendar_microtime(&secs, &usecs);
1011             start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1012
1013             while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1014                     tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1015             }
1016
1017             if (counter >= 500) {
1018                     printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1019             }
1020
1021             if (hfsmp->jnl) {
1022                     journal_flush(hfsmp->jnl, FALSE);
1023             } else {
1024                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1025             }
1026
1027             clock_get_calendar_microtime(&secs, &usecs);
1028             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1029             hfsmp->hfs_last_sync_time = now;
1030             rate = ((pending_io * 1000000ULL) / (now - start));     // yields bytes per second
1031
1032             hfs_end_transaction(hfsmp);
1033
1034             //
1035             // If a reasonable amount of time elapsed then check the
1036             // i/o rate.  If it's taking less than 1 second or more
1037             // than 2 seconds, adjust hfs_max_pending_io so that we
1038             // will allow about 1.5 seconds of i/o to queue up.
1039             //
1040             if ((now - start) >= 300000) {
1041                     uint64_t scale = (pending_io * 100) / rate;
1042
1043                     if (scale < 100 || scale > 200) {
1044                             // set it so that it should take about 1.5 seconds to drain
1045                             hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1046                     }
1047             }
1048
1049     } else if (   ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1050                || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1051                    && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1052                    && (hfsmp->hfs_active_threads == 0)
1053                    && (hfsmp->hfs_global_lock_nesting == 0))) {
1054
1055             //
1056             // Flush the journal if more than 5 seconds elapsed since
1057             // the last sync OR we have not sync'ed recently and the
1058             // last sync request time was more than 100 milliseconds
1059             // ago and no one is in the middle of a transaction right
1060             // now.  Else we defer the sync and reschedule it.
1061             //
1062             if (hfsmp->jnl) {
1063                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1064
1065                     journal_flush(hfsmp->jnl, FALSE);
1066
1067                         hfs_unlock_global (hfsmp);
1068             } else {
1069                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1070             }
1071
1072             clock_get_calendar_microtime(&secs, &usecs);
1073             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1074             hfsmp->hfs_last_sync_time = now;
1075
1076     } else if (hfsmp->hfs_active_threads == 0) {
1077             uint64_t deadline;
1078
1079             clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1080             thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1081
1082             // note: we intentionally return early here and do not
1083             // decrement the sync_scheduled and sync_incomplete
1084             // variables because we rescheduled the timer.
1085
1086             return;
1087     }
1088
1089     //
1090     // NOTE: we decrement these *after* we're done the journal_flush() since
1091     // it can take a significant amount of time and so we don't want more
1092     // callbacks scheduled until we're done this one.
1093     //
1094     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1095     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1096     wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1097 }
1098
1099
1100 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1101
1102 /*
1103  * Initialization code for Red-Black Tree Allocator
1104  *
1105  * This function will build the two red-black trees necessary for allocating space
1106  * from the metadata zone as well as normal allocations.  Currently, we use
1107  * an advisory read to get most of the data into the buffer cache.
1108  * This function is intended to be run in a separate thread so as not to slow down mount.
1109  *
1110  */
1111
1112 void
1113 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1114
1115 #if CONFIG_HFS_ALLOC_RBTREE
1116         u_int32_t err;
1117
1118         /*
1119          * Take the allocation file lock.  Journal transactions will block until
1120          * we're done here.
1121          */
1122         int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1123
1124         /*
1125          * GenerateTree assumes that the bitmap lock is held when you call the function.
1126          * It will drop and re-acquire the lock periodically as needed to let other allocations
1127          * through.  It returns with the bitmap lock held. Since we only maintain one tree,
1128          * we don't need to specify a start block (always starts at 0).
1129          */
1130         err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1131         if (err) {
1132                 goto bailout;
1133         }
1134         /* Mark offset tree as built */
1135         hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1136
1137 bailout:
1138         /*
1139          * GenerateTree may drop the bitmap lock during operation in order to give other
1140          * threads a chance to allocate blocks, but it will always return with the lock held, so
1141          * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1142          */
1143         hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1144         if (err != 0) {
1145                 /* Wakeup any waiters on the allocation bitmap lock */
1146                 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1147         }
1148
1149         hfs_systemfile_unlock(hfsmp, flags);
1150 #else
1151 #pragma unused (hfsmp)
1152 #endif
1153 }
1154
1155
1156 /*
1157  * Teardown code for the Red-Black Tree allocator.
1158  * This function consolidates the code which serializes with respect
1159  * to a thread that may be potentially still building the tree when we need to begin
1160  * tearing it down.   Since the red-black tree may not be live when we enter this function
1161  * we return:
1162  *              1 -> Tree was live.
1163  *              0 -> Tree was not active at time of call.
1164  */
1165
1166 int
1167 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1168         int rb_used = 0;
1169
1170 #if CONFIG_HFS_ALLOC_RBTREE
1171
1172         int flags = 0;
1173
1174         /*
1175          * Check to see if the tree-generation is still on-going.
1176          * If it is, then block until it's done.
1177          */
1178
1179         flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1180
1181
1182         while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1183                 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1184
1185                 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1186                                          &hfsmp->extent_tree_flags, THREAD_UNINT);
1187         }
1188
1189         if (hfs_isrbtree_active (hfsmp)) {
1190                 rb_used = 1;
1191
1192                 /* Tear down the RB Trees while we have the bitmap locked */
1193                 DestroyTrees(hfsmp);
1194
1195         }
1196
1197         hfs_systemfile_unlock(hfsmp, flags);
1198 #else
1199         #pragma unused (hfsmp)
1200 #endif
1201         return rb_used;
1202
1203 }
1204
1205
1206 static int hfs_root_unmounted_cleanly = 0;
1207
1208 SYSCTL_DECL(_vfs_generic);
1209 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1210
1211 /*
1212  * Common code for mount and mountroot
1213  */
1214 int
1215 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1216             int journal_replay_only, vfs_context_t context)
1217 {
1218         struct proc *p = vfs_context_proc(context);
1219         int retval = E_NONE;
1220         struct hfsmount *hfsmp = NULL;
1221         struct buf *bp;
1222         dev_t dev;
1223         HFSMasterDirectoryBlock *mdbp = NULL;
1224         int ronly;
1225 #if QUOTA
1226         int i;
1227 #endif
1228         int mntwrapper;
1229         kauth_cred_t cred;
1230         u_int64_t disksize;
1231         daddr64_t log_blkcnt;
1232         u_int32_t log_blksize;
1233         u_int32_t phys_blksize;
1234         u_int32_t minblksize;
1235         u_int32_t iswritable;
1236         daddr64_t mdb_offset;
1237         int isvirtual = 0;
1238         int isroot = 0;
1239         int isssd;
1240 #if CONFIG_HFS_ALLOC_RBTREE
1241         thread_t allocator_thread;
1242 #endif
1243
1244         if (args == NULL) {
1245                 /* only hfs_mountroot passes us NULL as the 'args' argument */
1246                 isroot = 1;
1247         }
1248
1249         ronly = vfs_isrdonly(mp);
1250         dev = vnode_specrdev(devvp);
1251         cred = p ? vfs_context_ucred(context) : NOCRED;
1252         mntwrapper = 0;
1253
1254         bp = NULL;
1255         hfsmp = NULL;
1256         mdbp = NULL;
1257         minblksize = kHFSBlockSize;
1258
1259         /* Advisory locking should be handled at the VFS layer */
1260         vfs_setlocklocal(mp);
1261
1262         /* Get the logical block size (treated as physical block size everywhere) */
1263         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1264                 if (HFS_MOUNT_DEBUG) {
1265                         printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1266                 }
1267                 retval = ENXIO;
1268                 goto error_exit;
1269         }
1270         if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1271                 printf("hfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
1272                 retval = ENXIO;
1273                 goto error_exit;
1274         }
1275
1276         /* Get the physical block size. */
1277         retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1278         if (retval) {
1279                 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1280                         if (HFS_MOUNT_DEBUG) {
1281                                 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1282                         }
1283                         retval = ENXIO;
1284                         goto error_exit;
1285                 }
1286                 /* If device does not support this ioctl, assume that physical
1287                  * block size is same as logical block size
1288                  */
1289                 phys_blksize = log_blksize;
1290         }
1291         if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1292                 printf("hfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
1293                 retval = ENXIO;
1294                 goto error_exit;
1295         }
1296
1297         /* Switch to 512 byte sectors (temporarily) */
1298         if (log_blksize > 512) {
1299                 u_int32_t size512 = 512;
1300
1301                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1302                         if (HFS_MOUNT_DEBUG) {
1303                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1304                         }
1305                         retval = ENXIO;
1306                         goto error_exit;
1307                 }
1308         }
1309         /* Get the number of 512 byte physical blocks. */
1310         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1311                 /* resetting block size may fail if getting block count did */
1312                 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1313                 if (HFS_MOUNT_DEBUG) {
1314                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1315                 }
1316                 retval = ENXIO;
1317                 goto error_exit;
1318         }
1319         /* Compute an accurate disk size (i.e. within 512 bytes) */
1320         disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1321
1322         /*
1323          * On Tiger it is not necessary to switch the device
1324          * block size to be 4k if there are more than 31-bits
1325          * worth of blocks but to insure compatibility with
1326          * pre-Tiger systems we have to do it.
1327          *
1328          * If the device size is not a multiple of 4K (8 * 512), then
1329          * switching the logical block size isn't going to help because
1330          * we will be unable to write the alternate volume header.
1331          * In this case, just leave the logical block size unchanged.
1332          */
1333         if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1334                 minblksize = log_blksize = 4096;
1335                 if (phys_blksize < log_blksize)
1336                         phys_blksize = log_blksize;
1337         }
1338
1339         /*
1340          * The cluster layer is not currently prepared to deal with a logical
1341          * block size larger than the system's page size.  (It can handle
1342          * blocks per page, but not multiple pages per block.)  So limit the
1343          * logical block size to the page size.
1344          */
1345         if (log_blksize > PAGE_SIZE)
1346                 log_blksize = PAGE_SIZE;
1347
1348         /* Now switch to our preferred physical block size. */
1349         if (log_blksize > 512) {
1350                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1351                         if (HFS_MOUNT_DEBUG) {
1352                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1353                         }
1354                         retval = ENXIO;
1355                         goto error_exit;
1356                 }
1357                 /* Get the count of physical blocks. */
1358                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1359                         if (HFS_MOUNT_DEBUG) {
1360                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1361                         }
1362                         retval = ENXIO;
1363                         goto error_exit;
1364                 }
1365         }
1366         /*
1367          * At this point:
1368          *   minblksize is the minimum physical block size
1369          *   log_blksize has our preferred physical block size
1370          *   log_blkcnt has the total number of physical blocks
1371          */
1372
1373         mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1374         if ((retval = (int)buf_meta_bread(devvp,
1375                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1376                                 phys_blksize, cred, &bp))) {
1377                 if (HFS_MOUNT_DEBUG) {
1378                         printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1379                 }
1380                 goto error_exit;
1381         }
1382         MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1383         if (mdbp == NULL) {
1384                 retval = ENOMEM;
1385                 if (HFS_MOUNT_DEBUG) {
1386                         printf("hfs_mountfs: MALLOC failed\n");
1387                 }
1388                 goto error_exit;
1389         }
1390         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1391         buf_brelse(bp);
1392         bp = NULL;
1393
1394         MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1395         if (hfsmp == NULL) {
1396                 if (HFS_MOUNT_DEBUG) {
1397                         printf("hfs_mountfs: MALLOC (2) failed\n");
1398                 }
1399                 retval = ENOMEM;
1400                 goto error_exit;
1401         }
1402         bzero(hfsmp, sizeof(struct hfsmount));
1403
1404         hfs_chashinit_finish(hfsmp);
1405
1406         /*
1407          * See if the disk is a solid state device.  We need this to decide what to do about
1408          * hotfiles.
1409          */
1410         if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1411                 if (isssd) {
1412                         hfsmp->hfs_flags |= HFS_SSD;
1413                 }
1414         }
1415
1416
1417         /*
1418          *  Init the volume information structure
1419          */
1420
1421         lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1422         lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1423         lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1424         lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1425         lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1426
1427         vfs_setfsprivate(mp, hfsmp);
1428         hfsmp->hfs_mp = mp;                     /* Make VFSTOHFS work */
1429         hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1430         hfsmp->hfs_devvp = devvp;
1431         vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
1432         hfsmp->hfs_logical_block_size = log_blksize;
1433         hfsmp->hfs_logical_block_count = log_blkcnt;
1434         hfsmp->hfs_physical_block_size = phys_blksize;
1435         hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1436         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1437         if (ronly)
1438                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1439         if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1440                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1441
1442 #if QUOTA
1443         for (i = 0; i < MAXQUOTAS; i++)
1444                 dqfileinit(&hfsmp->hfs_qfiles[i]);
1445 #endif
1446
1447         if (args) {
1448                 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1449                 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1450                 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1451                 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1452                 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                               /* tell the VFS */
1453                 if (args->hfs_mask != (mode_t)VNOVAL) {
1454                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1455                         if (args->flags & HFSFSMNT_NOXONFILES) {
1456                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1457                         } else {
1458                                 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1459                         }
1460                 } else {
1461                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1462                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1463                 }
1464                 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1465                         mntwrapper = 1;
1466         } else {
1467                 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1468                 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1469                         hfsmp->hfs_uid = UNKNOWNUID;
1470                         hfsmp->hfs_gid = UNKNOWNGID;
1471                         vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                       /* tell the VFS */
1472                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1473                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1474                 }
1475         }
1476
1477         /* Find out if disk media is writable. */
1478         if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1479                 if (iswritable)
1480                         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1481                 else
1482                         hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1483         }
1484
1485         // record the current time at which we're mounting this volume
1486         struct timeval tv;
1487         microtime(&tv);
1488         hfsmp->hfs_mount_time = tv.tv_sec;
1489
1490         /* Mount a standard HFS disk */
1491         if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1492             (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1493
1494                 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1495                 if (vfs_isrdwr(mp)) {
1496                         retval = EROFS;
1497                         goto error_exit;
1498                 }
1499
1500                 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1501
1502                 /* Treat it as if it's read-only and not writeable */
1503                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1504                 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1505
1506                 /* If only journal replay is requested, exit immediately */
1507                 if (journal_replay_only) {
1508                         retval = 0;
1509                         goto error_exit;
1510                 }
1511
1512                 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1513                         retval = EINVAL;  /* Cannot root from HFS standard disks */
1514                         goto error_exit;
1515                 }
1516                 /* HFS disks can only use 512 byte physical blocks */
1517                 if (log_blksize > kHFSBlockSize) {
1518                         log_blksize = kHFSBlockSize;
1519                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1520                                 retval = ENXIO;
1521                                 goto error_exit;
1522                         }
1523                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1524                                 retval = ENXIO;
1525                                 goto error_exit;
1526                         }
1527                         hfsmp->hfs_logical_block_size = log_blksize;
1528                         hfsmp->hfs_logical_block_count = log_blkcnt;
1529                         hfsmp->hfs_physical_block_size = log_blksize;
1530                         hfsmp->hfs_log_per_phys = 1;
1531                 }
1532                 if (args) {
1533                         hfsmp->hfs_encoding = args->hfs_encoding;
1534                         HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1535
1536                         /* establish the timezone */
1537                         gTimeZone = args->hfs_timezone;
1538                 }
1539
1540                 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1541                                         &hfsmp->hfs_get_hfsname);
1542                 if (retval)
1543                         goto error_exit;
1544
1545                 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1546                 if (retval)
1547                         (void) hfs_relconverter(hfsmp->hfs_encoding);
1548
1549         } else /* Mount an HFS Plus disk */ {
1550                 HFSPlusVolumeHeader *vhp;
1551                 off_t embeddedOffset;
1552                 int   jnl_disable = 0;
1553
1554                 /* Get the embedded Volume Header */
1555                 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1556                         embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1557                         embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1558                                           (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1559
1560                         /*
1561                          * If the embedded volume doesn't start on a block
1562                          * boundary, then switch the device to a 512-byte
1563                          * block size so everything will line up on a block
1564                          * boundary.
1565                          */
1566                         if ((embeddedOffset % log_blksize) != 0) {
1567                                 printf("hfs_mountfs: embedded volume offset not"
1568                                     " a multiple of physical block size (%d);"
1569                                     " switching to 512\n", log_blksize);
1570                                 log_blksize = 512;
1571                                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1572                                     (caddr_t)&log_blksize, FWRITE, context)) {
1573
1574                                         if (HFS_MOUNT_DEBUG) {
1575                                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1576                                         }
1577                                         retval = ENXIO;
1578                                         goto error_exit;
1579                                 }
1580                                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1581                                     (caddr_t)&log_blkcnt, 0, context)) {
1582                                         if (HFS_MOUNT_DEBUG) {
1583                                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1584                                         }
1585                                         retval = ENXIO;
1586                                         goto error_exit;
1587                                 }
1588                                 /* Note: relative block count adjustment */
1589                                 hfsmp->hfs_logical_block_count *=
1590                                     hfsmp->hfs_logical_block_size / log_blksize;
1591
1592                                 /* Update logical /physical block size */
1593                                 hfsmp->hfs_logical_block_size = log_blksize;
1594                                 hfsmp->hfs_physical_block_size = log_blksize;
1595                                 phys_blksize = log_blksize;
1596                                 hfsmp->hfs_log_per_phys = 1;
1597                         }
1598
1599                         disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1600                                    (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1601
1602                         hfsmp->hfs_logical_block_count = disksize / log_blksize;
1603
1604                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1605                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1606                                         phys_blksize, cred, &bp);
1607                         if (retval) {
1608                                 if (HFS_MOUNT_DEBUG) {
1609                                         printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1610                                 }
1611                                 goto error_exit;
1612                         }
1613                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1614                         buf_brelse(bp);
1615                         bp = NULL;
1616                         vhp = (HFSPlusVolumeHeader*) mdbp;
1617
1618                 } else /* pure HFS+ */ {
1619                         embeddedOffset = 0;
1620                         vhp = (HFSPlusVolumeHeader*) mdbp;
1621                 }
1622
1623                 if (isroot) {
1624                         hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1625                 }
1626
1627                 /*
1628                  * On inconsistent disks, do not allow read-write mount
1629                  * unless it is the boot volume being mounted.  We also
1630                  * always want to replay the journal if the journal_replay_only
1631                  * flag is set because that will (most likely) get the
1632                  * disk into a consistent state before fsck_hfs starts
1633                  * looking at it.
1634                  */
1635                 if (  !(vfs_flags(mp) & MNT_ROOTFS)
1636                    && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1637                    && !journal_replay_only
1638                    && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1639
1640                         if (HFS_MOUNT_DEBUG) {
1641                                 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1642                         }
1643                         retval = EINVAL;
1644                         goto error_exit;
1645                 }
1646
1647
1648                 // XXXdbg
1649                 //
1650                 hfsmp->jnl = NULL;
1651                 hfsmp->jvp = NULL;
1652                 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1653                     args->journal_disable) {
1654                     jnl_disable = 1;
1655                 }
1656
1657                 //
1658                 // We only initialize the journal here if the last person
1659                 // to mount this volume was journaling aware.  Otherwise
1660                 // we delay journal initialization until later at the end
1661                 // of hfs_MountHFSPlusVolume() because the last person who
1662                 // mounted it could have messed things up behind our back
1663                 // (so we need to go find the .journal file, make sure it's
1664                 // the right size, re-sync up if it was moved, etc).
1665                 //
1666                 if (   (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1667                         && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1668                         && !jnl_disable) {
1669
1670                         // if we're able to init the journal, mark the mount
1671                         // point as journaled.
1672                         //
1673                         if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1674                                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1675                         } else {
1676                                 if (retval == EROFS) {
1677                                         // EROFS is a special error code that means the volume has an external
1678                                         // journal which we couldn't find.  in that case we do not want to
1679                                         // rewrite the volume header - we'll just refuse to mount the volume.
1680                                         if (HFS_MOUNT_DEBUG) {
1681                                                 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1682                                         }
1683                                         retval = EINVAL;
1684                                         goto error_exit;
1685                                 }
1686
1687                                 // if the journal failed to open, then set the lastMountedVersion
1688                                 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1689                                 // of just bailing out because the volume is journaled.
1690                                 if (!ronly) {
1691                                         if (HFS_MOUNT_DEBUG) {
1692                                                 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1693                                         }
1694
1695                                         HFSPlusVolumeHeader *jvhp;
1696
1697                                     hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1698
1699                                     if (mdb_offset == 0) {
1700                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1701                                     }
1702
1703                                     bp = NULL;
1704                                     retval = (int)buf_meta_bread(devvp,
1705                                                     HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1706                                                     phys_blksize, cred, &bp);
1707                                     if (retval == 0) {
1708                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1709
1710                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1711                                                 printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1712                                             jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1713                                             buf_bwrite(bp);
1714                                         } else {
1715                                             buf_brelse(bp);
1716                                         }
1717                                         bp = NULL;
1718                                     } else if (bp) {
1719                                         buf_brelse(bp);
1720                                         // clear this so the error exit path won't try to use it
1721                                         bp = NULL;
1722                                     }
1723                                 }
1724
1725                                 // if this isn't the root device just bail out.
1726                                 // If it is the root device we just continue on
1727                                 // in the hopes that fsck_hfs will be able to
1728                                 // fix any damage that exists on the volume.
1729                                 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1730                                         if (HFS_MOUNT_DEBUG) {
1731                                                 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1732                                         }
1733                                     retval = EINVAL;
1734                                     goto error_exit;
1735                                 }
1736                         }
1737                 }
1738                 // XXXdbg
1739
1740                 /* Either the journal is replayed successfully, or there
1741                  * was nothing to replay, or no journal exists.  In any case,
1742                  * return success.
1743                  */
1744                 if (journal_replay_only) {
1745                         retval = 0;
1746                         goto error_exit;
1747                 }
1748
1749                 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1750
1751                 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1752                 /*
1753                  * If the backend didn't like our physical blocksize
1754                  * then retry with physical blocksize of 512.
1755                  */
1756                 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1757                         printf("hfs_mountfs: could not use physical block size "
1758                                 "(%d) switching to 512\n", log_blksize);
1759                         log_blksize = 512;
1760                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1761                                 if (HFS_MOUNT_DEBUG) {
1762                                         printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1763                                 }
1764                                 retval = ENXIO;
1765                                 goto error_exit;
1766                         }
1767                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1768                                 if (HFS_MOUNT_DEBUG) {
1769                                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1770                                 }
1771                                 retval = ENXIO;
1772                                 goto error_exit;
1773                         }
1774                         devvp->v_specsize = log_blksize;
1775                         /* Note: relative block count adjustment (in case this is an embedded volume). */
1776                         hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1777                         hfsmp->hfs_logical_block_size = log_blksize;
1778                         hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1779
1780                         if (hfsmp->jnl && hfsmp->jvp == devvp) {
1781                             // close and re-open this with the new block size
1782                             journal_close(hfsmp->jnl);
1783                             hfsmp->jnl = NULL;
1784                             if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1785                                         vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1786                                 } else {
1787                                         // if the journal failed to open, then set the lastMountedVersion
1788                                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
1789                                         // of just bailing out because the volume is journaled.
1790                                         if (!ronly) {
1791                                                 if (HFS_MOUNT_DEBUG) {
1792                                                         printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1793                                                 }
1794                                         HFSPlusVolumeHeader *jvhp;
1795
1796                                         hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1797
1798                                         if (mdb_offset == 0) {
1799                                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1800                                         }
1801
1802                                                 bp = NULL;
1803                                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1804                                                         phys_blksize, cred, &bp);
1805                                         if (retval == 0) {
1806                                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1807
1808                                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1809                                                                 printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1810                                                         jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1811                                                         buf_bwrite(bp);
1812                                                         } else {
1813                                                         buf_brelse(bp);
1814                                                         }
1815                                                         bp = NULL;
1816                                         } else if (bp) {
1817                                                         buf_brelse(bp);
1818                                                         // clear this so the error exit path won't try to use it
1819                                                         bp = NULL;
1820                                         }
1821                                         }
1822
1823                                         // if this isn't the root device just bail out.
1824                                         // If it is the root device we just continue on
1825                                         // in the hopes that fsck_hfs will be able to
1826                                         // fix any damage that exists on the volume.
1827                                         if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1828                                                 if (HFS_MOUNT_DEBUG) {
1829                                                         printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1830                                                 }
1831                                         retval = EINVAL;
1832                                         goto error_exit;
1833                                         }
1834                                 }
1835                         }
1836
1837                         /* Try again with a smaller block size... */
1838                         retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1839                         if (retval && HFS_MOUNT_DEBUG) {
1840                                 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1841                         }
1842                 }
1843                 if (retval)
1844                         (void) hfs_relconverter(0);
1845         }
1846
1847         // save off a snapshot of the mtime from the previous mount
1848         // (for matador).
1849         hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1850
1851         if ( retval ) {
1852                 if (HFS_MOUNT_DEBUG) {
1853                         printf("hfs_mountfs: encountered failure %d \n", retval);
1854                 }
1855                 goto error_exit;
1856         }
1857
1858         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1859         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1860         vfs_setmaxsymlen(mp, 0);
1861
1862         mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1863 #if NAMEDSTREAMS
1864         mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1865 #endif
1866         if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1867                 /* Tell VFS that we support directory hard links. */
1868                 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1869         } else {
1870                 /* HFS standard doesn't support extended readdir! */
1871                 mount_set_noreaddirext (mp);
1872         }
1873
1874         if (args) {
1875                 /*
1876                  * Set the free space warning levels for a non-root volume:
1877                  *
1878                  * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1879                  * is less.  Set the "warning" limit to 2% of the volume size or 150MB,
1880                  * whichever is less.  And last, set the "desired" freespace level to
1881                  * to 3% of the volume size or 200MB, whichever is less.
1882                  */
1883                 hfsmp->hfs_freespace_notify_dangerlimit =
1884                         MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1885                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1886                 hfsmp->hfs_freespace_notify_warninglimit =
1887                         MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1888                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1889                 hfsmp->hfs_freespace_notify_desiredlevel =
1890                         MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1891                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1892         } else {
1893                 /*
1894                  * Set the free space warning levels for the root volume:
1895                  *
1896                  * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1897                  * is less.  Set the "warning" limit to 10% of the volume size or 1GB,
1898                  * whichever is less.  And last, set the "desired" freespace level to
1899                  * to 11% of the volume size or 1.25GB, whichever is less.
1900                  */
1901                 hfsmp->hfs_freespace_notify_dangerlimit =
1902                         MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1903                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1904                 hfsmp->hfs_freespace_notify_warninglimit =
1905                         MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1906                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1907                 hfsmp->hfs_freespace_notify_desiredlevel =
1908                         MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1909                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1910         };
1911
1912         /* Check if the file system exists on virtual device, like disk image */
1913         if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1914                 if (isvirtual) {
1915                         hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1916                 }
1917         }
1918
1919         /* do not allow ejectability checks on the root device */
1920         if (isroot == 0) {
1921                 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1922                                 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1923                         hfsmp->hfs_max_pending_io = 4096*1024;   // a reasonable value to start with.
1924                         hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1925                         if (hfsmp->hfs_syncer == NULL) {
1926                                 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1927                                                 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1928                         }
1929                 }
1930         }
1931
1932 #if CONFIG_HFS_ALLOC_RBTREE
1933         /*
1934          * We spawn a thread to create the pair of red-black trees for this volume.
1935          * However, in so doing, we must be careful to ensure that if this thread is still
1936          * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1937          * we'll need to set a bit that indicates we're in progress building the trees here.
1938          * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1939          * notifies the tree generation code that an unmount is waiting.  Also mark the bit that
1940          * indicates the tree is live and operating.
1941          *
1942          * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1943          */
1944
1945         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1946                 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1947
1948                 /* Initialize EOF counter so that the thread can assume it started at initial values */
1949                 hfsmp->offset_block_end = 0;
1950                 InitTree(hfsmp);
1951
1952                 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1953                 thread_deallocate(allocator_thread);
1954         }
1955
1956 #endif
1957
1958         /*
1959          * Start looking for free space to drop below this level and generate a
1960          * warning immediately if needed:
1961          */
1962         hfsmp->hfs_notification_conditions = 0;
1963         hfs_generate_volume_notifications(hfsmp);
1964
1965         if (ronly == 0) {
1966                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1967         }
1968         FREE(mdbp, M_TEMP);
1969         return (0);
1970
1971 error_exit:
1972         if (bp)
1973                 buf_brelse(bp);
1974         if (mdbp)
1975                 FREE(mdbp, M_TEMP);
1976
1977         if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1978                 vnode_clearmountedon(hfsmp->jvp);
1979                 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1980                 hfsmp->jvp = NULL;
1981         }
1982         if (hfsmp) {
1983                 if (hfsmp->hfs_devvp) {
1984                         vnode_rele(hfsmp->hfs_devvp);
1985                 }
1986                 hfs_delete_chash(hfsmp);
1987
1988                 FREE(hfsmp, M_HFSMNT);
1989                 vfs_setfsprivate(mp, NULL);
1990         }
1991         return (retval);
1992 }
1993
1994
1995 /*
1996  * Make a filesystem operational.
1997  * Nothing to do at the moment.
1998  */
1999 /* ARGSUSED */
2000 static int
2001 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2002 {
2003         return (0);
2004 }
2005
2006
2007 /*
2008  * unmount system call
2009  */
2010 int
2011 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2012 {
2013         struct proc *p = vfs_context_proc(context);
2014         struct hfsmount *hfsmp = VFSTOHFS(mp);
2015         int retval = E_NONE;
2016         int flags;
2017         int force;
2018         int started_tr = 0;
2019         int rb_used = 0;
2020
2021         flags = 0;
2022         force = 0;
2023         if (mntflags & MNT_FORCE) {
2024                 flags |= FORCECLOSE;
2025                 force = 1;
2026         }
2027
2028         if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2029                 return (retval);
2030
2031         if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2032                 (void) hfs_recording_suspend(hfsmp);
2033
2034         /*
2035          * Cancel any pending timers for this volume.  Then wait for any timers
2036          * which have fired, but whose callbacks have not yet completed.
2037          */
2038         if (hfsmp->hfs_syncer)
2039         {
2040                 struct timespec ts = {0, 100000000};    /* 0.1 seconds */
2041
2042                 /*
2043                  * Cancel any timers that have been scheduled, but have not
2044                  * fired yet.  NOTE: The kernel considers a timer complete as
2045                  * soon as it starts your callback, so the kernel does not
2046                  * keep track of the number of callbacks in progress.
2047                  */
2048                 if (thread_call_cancel(hfsmp->hfs_syncer))
2049                         OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2050                 thread_call_free(hfsmp->hfs_syncer);
2051                 hfsmp->hfs_syncer = NULL;
2052
2053                 /*
2054                  * This waits for all of the callbacks that were entered before
2055                  * we did thread_call_cancel above, but have not completed yet.
2056                  */
2057                 while(hfsmp->hfs_sync_incomplete > 0)
2058                 {
2059                         msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2060                 }
2061
2062                 if (hfsmp->hfs_sync_incomplete < 0)
2063                         panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2064         }
2065
2066 #if CONFIG_HFS_ALLOC_RBTREE
2067         rb_used = hfs_teardown_allocator(hfsmp);
2068 #endif
2069
2070         /*
2071          * Flush out the b-trees, volume bitmap and Volume Header
2072          */
2073         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2074                 retval = hfs_start_transaction(hfsmp);
2075                 if (retval == 0) {
2076                     started_tr = 1;
2077                 } else if (!force) {
2078                     goto err_exit;
2079                 }
2080
2081                 if (hfsmp->hfs_startup_vp) {
2082                         (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2083                         retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2084                         hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2085                         if (retval && !force)
2086                                 goto err_exit;
2087                 }
2088
2089                 if (hfsmp->hfs_attribute_vp) {
2090                         (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2091                         retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2092                         hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2093                         if (retval && !force)
2094                                 goto err_exit;
2095                 }
2096
2097                 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2098                 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2099                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2100                 if (retval && !force)
2101                         goto err_exit;
2102
2103                 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2104                 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2105                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2106                 if (retval && !force)
2107                         goto err_exit;
2108
2109                 if (hfsmp->hfs_allocation_vp) {
2110                         (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2111                         retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2112                         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2113                         if (retval && !force)
2114                                 goto err_exit;
2115                 }
2116
2117                 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2118                         retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2119                         if (retval && !force)
2120                                 goto err_exit;
2121                 }
2122
2123                 /* If runtime corruption was detected, indicate that the volume
2124                  * was not unmounted cleanly.
2125                  */
2126                 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2127                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2128                 } else {
2129                         HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2130                 }
2131
2132
2133                 if (rb_used) {
2134                         /* If the rb-tree was live, just set min_start to 0 */
2135                         hfsmp->nextAllocation = 0;
2136                 }
2137                 else {
2138                         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2139                                 int i;
2140                                 u_int32_t min_start = hfsmp->totalBlocks;
2141
2142                                 // set the nextAllocation pointer to the smallest free block number
2143                                 // we've seen so on the next mount we won't rescan unnecessarily
2144                                 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2145                                 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2146                                         if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2147                                                 min_start = hfsmp->vcbFreeExt[i].startBlock;
2148                                         }
2149                                 }
2150                                 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2151                                 if (min_start < hfsmp->nextAllocation) {
2152                                         hfsmp->nextAllocation = min_start;
2153                                 }
2154                         }
2155                 }
2156
2157
2158                 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2159                 if (retval) {
2160                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2161                         if (!force)
2162                                 goto err_exit;  /* could not flush everything */
2163                 }
2164
2165                 if (started_tr) {
2166                     hfs_end_transaction(hfsmp);
2167                     started_tr = 0;
2168                 }
2169         }
2170
2171         if (hfsmp->jnl) {
2172                 hfs_journal_flush(hfsmp, FALSE);
2173         }
2174
2175         /*
2176          *      Invalidate our caches and release metadata vnodes
2177          */
2178         (void) hfsUnmount(hfsmp, p);
2179
2180         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2181                 (void) hfs_relconverter(hfsmp->hfs_encoding);
2182
2183         // XXXdbg
2184         if (hfsmp->jnl) {
2185             journal_close(hfsmp->jnl);
2186             hfsmp->jnl = NULL;
2187         }
2188
2189         VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2190
2191         if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2192             vnode_clearmountedon(hfsmp->jvp);
2193             retval = VNOP_CLOSE(hfsmp->jvp,
2194                                hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2195                                vfs_context_kernel());
2196             vnode_put(hfsmp->jvp);
2197             hfsmp->jvp = NULL;
2198         }
2199         // XXXdbg
2200
2201         /*
2202          * Last chance to dump unreferenced system files.
2203          */
2204         (void) vflush(mp, NULLVP, FORCECLOSE);
2205
2206 #if HFS_SPARSE_DEV
2207         /* Drop our reference on the backing fs (if any). */
2208         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2209                 struct vnode * tmpvp;
2210
2211                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2212                 tmpvp = hfsmp->hfs_backingfs_rootvp;
2213                 hfsmp->hfs_backingfs_rootvp = NULLVP;
2214                 vnode_rele(tmpvp);
2215         }
2216 #endif /* HFS_SPARSE_DEV */
2217         lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2218         lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2219         vnode_rele(hfsmp->hfs_devvp);
2220
2221         hfs_delete_chash(hfsmp);
2222         FREE(hfsmp, M_HFSMNT);
2223
2224         return (0);
2225
2226   err_exit:
2227         if (started_tr) {
2228                 hfs_end_transaction(hfsmp);
2229         }
2230         return retval;
2231 }
2232
2233
2234 /*
2235  * Return the root of a filesystem.
2236  */
2237 static int
2238 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2239 {
2240         return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2241 }
2242
2243
2244 /*
2245  * Do operations associated with quotas
2246  */
2247 #if !QUOTA
2248 static int
2249 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2250 {
2251         return (ENOTSUP);
2252 }
2253 #else
2254 static int
2255 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2256 {
2257         struct proc *p = vfs_context_proc(context);
2258         int cmd, type, error;
2259
2260         if (uid == ~0U)
2261                 uid = kauth_cred_getuid(vfs_context_ucred(context));
2262         cmd = cmds >> SUBCMDSHIFT;
2263
2264         switch (cmd) {
2265         case Q_SYNC:
2266         case Q_QUOTASTAT:
2267                 break;
2268         case Q_GETQUOTA:
2269                 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2270                         break;
2271                 /* fall through */
2272         default:
2273                 if ( (error = vfs_context_suser(context)) )
2274                         return (error);
2275         }
2276
2277         type = cmds & SUBCMDMASK;
2278         if ((u_int)type >= MAXQUOTAS)
2279                 return (EINVAL);
2280         if (vfs_busy(mp, LK_NOWAIT))
2281                 return (0);
2282
2283         switch (cmd) {
2284
2285         case Q_QUOTAON:
2286                 error = hfs_quotaon(p, mp, type, datap);
2287                 break;
2288
2289         case Q_QUOTAOFF:
2290                 error = hfs_quotaoff(p, mp, type);
2291                 break;
2292
2293         case Q_SETQUOTA:
2294                 error = hfs_setquota(mp, uid, type, datap);
2295                 break;
2296
2297         case Q_SETUSE:
2298                 error = hfs_setuse(mp, uid, type, datap);
2299                 break;
2300
2301         case Q_GETQUOTA:
2302                 error = hfs_getquota(mp, uid, type, datap);
2303                 break;
2304
2305         case Q_SYNC:
2306                 error = hfs_qsync(mp);
2307                 break;
2308
2309         case Q_QUOTASTAT:
2310                 error = hfs_quotastat(mp, type, datap);
2311                 break;
2312
2313         default:
2314                 error = EINVAL;
2315                 break;
2316         }
2317         vfs_unbusy(mp);
2318
2319         return (error);
2320 }
2321 #endif /* QUOTA */
2322
2323 /* Subtype is composite of bits */
2324 #define HFS_SUBTYPE_JOURNALED      0x01
2325 #define HFS_SUBTYPE_CASESENSITIVE  0x02
2326 /* bits 2 - 6 reserved */
2327 #define HFS_SUBTYPE_STANDARDHFS    0x80
2328
2329 /*
2330  * Get file system statistics.
2331  */
2332 int
2333 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2334 {
2335         ExtendedVCB *vcb = VFSTOVCB(mp);
2336         struct hfsmount *hfsmp = VFSTOHFS(mp);
2337         u_int32_t freeCNIDs;
2338         u_int16_t subtype = 0;
2339
2340         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2341
2342         sbp->f_bsize = (u_int32_t)vcb->blockSize;
2343         sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2344         sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2345         sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2346         sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2347         sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2));  /* max files is constrained by total blocks */
2348         sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2349
2350         /*
2351          * Subtypes (flavors) for HFS
2352          *   0:   Mac OS Extended
2353          *   1:   Mac OS Extended (Journaled)
2354          *   2:   Mac OS Extended (Case Sensitive)
2355          *   3:   Mac OS Extended (Case Sensitive, Journaled)
2356          *   4 - 127:   Reserved
2357          * 128:   Mac OS Standard
2358          *
2359          */
2360         if (hfsmp->hfs_flags & HFS_STANDARD) {
2361                 subtype = HFS_SUBTYPE_STANDARDHFS;
2362         } else /* HFS Plus */ {
2363                 if (hfsmp->jnl)
2364                         subtype |= HFS_SUBTYPE_JOURNALED;
2365                 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2366                         subtype |= HFS_SUBTYPE_CASESENSITIVE;
2367         }
2368         sbp->f_fssubtype = subtype;
2369
2370         return (0);
2371 }
2372
2373
2374 //
2375 // XXXdbg -- this is a callback to be used by the journal to
2376 //           get meta data blocks flushed out to disk.
2377 //
2378 // XXXdbg -- be smarter and don't flush *every* block on each
2379 //           call.  try to only flush some so we don't wind up
2380 //           being too synchronous.
2381 //
2382 __private_extern__
2383 void
2384 hfs_sync_metadata(void *arg)
2385 {
2386         struct mount *mp = (struct mount *)arg;
2387         struct hfsmount *hfsmp;
2388         ExtendedVCB *vcb;
2389         buf_t   bp;
2390         int  retval;
2391         daddr64_t priIDSector;
2392         hfsmp = VFSTOHFS(mp);
2393         vcb = HFSTOVCB(hfsmp);
2394
2395         // now make sure the super block is flushed
2396         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2397                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2398
2399         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2400                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2401                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
2402         if ((retval != 0 ) && (retval != ENXIO)) {
2403                 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2404                        (int)priIDSector, retval);
2405         }
2406
2407         if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2408             buf_bwrite(bp);
2409         } else if (bp) {
2410             buf_brelse(bp);
2411         }
2412
2413         // the alternate super block...
2414         // XXXdbg - we probably don't need to do this each and every time.
2415         //          hfs_btreeio.c:FlushAlternate() should flag when it was
2416         //          written...
2417         if (hfsmp->hfs_alt_id_sector) {
2418                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2419                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2420                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2421                 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2422                     buf_bwrite(bp);
2423                 } else if (bp) {
2424                     buf_brelse(bp);
2425                 }
2426         }
2427 }
2428
2429
2430 struct hfs_sync_cargs {
2431         kauth_cred_t cred;
2432         struct proc  *p;
2433         int    waitfor;
2434         int    error;
2435 };
2436
2437
2438 static int
2439 hfs_sync_callback(struct vnode *vp, void *cargs)
2440 {
2441         struct cnode *cp;
2442         struct hfs_sync_cargs *args;
2443         int error;
2444
2445         args = (struct hfs_sync_cargs *)cargs;
2446
2447         if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2448                 return (VNODE_RETURNED);
2449         }
2450         cp = VTOC(vp);
2451
2452         if ((cp->c_flag & C_MODIFIED) ||
2453             (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2454             vnode_hasdirtyblks(vp)) {
2455                 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2456
2457                 if (error)
2458                         args->error = error;
2459         }
2460         hfs_unlock(cp);
2461         return (VNODE_RETURNED);
2462 }
2463
2464
2465
2466 /*
2467  * Go through the disk queues to initiate sandbagged IO;
2468  * go through the inodes to write those that have been modified;
2469  * initiate the writing of the super block if it has been modified.
2470  *
2471  * Note: we are always called with the filesystem marked `MPBUSY'.
2472  */
2473 int
2474 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2475 {
2476         struct proc *p = vfs_context_proc(context);
2477         struct cnode *cp;
2478         struct hfsmount *hfsmp;
2479         ExtendedVCB *vcb;
2480         struct vnode *meta_vp[4];
2481         int i;
2482         int error, allerror = 0;
2483         struct hfs_sync_cargs args;
2484
2485         hfsmp = VFSTOHFS(mp);
2486
2487         /*
2488          * hfs_changefs might be manipulating vnodes so back off
2489          */
2490         if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2491                 return (0);
2492
2493         if (hfsmp->hfs_flags & HFS_READ_ONLY)
2494                 return (EROFS);
2495
2496         /* skip over frozen volumes */
2497         if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2498                 return 0;
2499
2500         args.cred = kauth_cred_get();
2501         args.waitfor = waitfor;
2502         args.p = p;
2503         args.error = 0;
2504         /*
2505          * hfs_sync_callback will be called for each vnode
2506          * hung off of this mount point... the vnode will be
2507          * properly referenced and unreferenced around the callback
2508          */
2509         vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2510
2511         if (args.error)
2512                 allerror = args.error;
2513
2514         vcb = HFSTOVCB(hfsmp);
2515
2516         meta_vp[0] = vcb->extentsRefNum;
2517         meta_vp[1] = vcb->catalogRefNum;
2518         meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
2519         meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2520
2521         /* Now sync our three metadata files */
2522         for (i = 0; i < 4; ++i) {
2523                 struct vnode *btvp;
2524
2525                 btvp = meta_vp[i];;
2526                 if ((btvp==0) || (vnode_mount(btvp) != mp))
2527                         continue;
2528
2529                 /* XXX use hfs_systemfile_lock instead ? */
2530                 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2531                 cp = VTOC(btvp);
2532
2533                 if (((cp->c_flag &  C_MODIFIED) == 0) &&
2534                     (cp->c_touch_acctime == 0) &&
2535                     (cp->c_touch_chgtime == 0) &&
2536                     (cp->c_touch_modtime == 0) &&
2537                     vnode_hasdirtyblks(btvp) == 0) {
2538                         hfs_unlock(VTOC(btvp));
2539                         continue;
2540                 }
2541                 error = vnode_get(btvp);
2542                 if (error) {
2543                         hfs_unlock(VTOC(btvp));
2544                         continue;
2545                 }
2546                 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2547                         allerror = error;
2548
2549                 hfs_unlock(cp);
2550                 vnode_put(btvp);
2551         };
2552
2553         /*
2554          * Force stale file system control information to be flushed.
2555          */
2556         if (vcb->vcbSigWord == kHFSSigWord) {
2557                 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2558                         allerror = error;
2559                 }
2560         }
2561 #if QUOTA
2562         hfs_qsync(mp);
2563 #endif /* QUOTA */
2564
2565         hfs_hotfilesync(hfsmp, vfs_context_kernel());
2566
2567         /*
2568          * Write back modified superblock.
2569          */
2570         if (IsVCBDirty(vcb)) {
2571                 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2572                 if (error)
2573                         allerror = error;
2574         }
2575
2576         if (hfsmp->jnl) {
2577             hfs_journal_flush(hfsmp, FALSE);
2578         }
2579
2580         {
2581                 clock_sec_t secs;
2582                 clock_usec_t usecs;
2583                 uint64_t now;
2584
2585                 clock_get_calendar_microtime(&secs, &usecs);
2586                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2587                 hfsmp->hfs_last_sync_time = now;
2588         }
2589
2590         lck_rw_unlock_shared(&hfsmp->hfs_insync);
2591         return (allerror);
2592 }
2593
2594
2595 /*
2596  * File handle to vnode
2597  *
2598  * Have to be really careful about stale file handles:
2599  * - check that the cnode id is valid
2600  * - call hfs_vget() to get the locked cnode
2601  * - check for an unallocated cnode (i_mode == 0)
2602  * - check that the given client host has export rights and return
2603  *   those rights via. exflagsp and credanonp
2604  */
2605 static int
2606 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2607 {
2608         struct hfsfid *hfsfhp;
2609         struct vnode *nvp;
2610         int result;
2611
2612         *vpp = NULL;
2613         hfsfhp = (struct hfsfid *)fhp;
2614
2615         if (fhlen < (int)sizeof(struct hfsfid))
2616                 return (EINVAL);
2617
2618         result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2619         if (result) {
2620                 if (result == ENOENT)
2621                         result = ESTALE;
2622                 return result;
2623         }
2624
2625         /*
2626          * We used to use the create time as the gen id of the file handle,
2627          * but it is not static enough because it can change at any point
2628          * via system calls.  We still don't have another volume ID or other
2629          * unique identifier to use for a generation ID across reboots that
2630          * persists until the file is removed.  Using only the CNID exposes
2631          * us to the potential wrap-around case, but as of 2/2008, it would take
2632          * over 2 months to wrap around if the machine did nothing but allocate
2633          * CNIDs.  Using some kind of wrap counter would only be effective if
2634          * each file had the wrap counter associated with it.  For now,
2635          * we use only the CNID to identify the file as it's good enough.
2636          */
2637
2638         *vpp = nvp;
2639
2640         hfs_unlock(VTOC(nvp));
2641         return (0);
2642 }
2643
2644
2645 /*
2646  * Vnode pointer to File handle
2647  */
2648 /* ARGSUSED */
2649 static int
2650 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2651 {
2652         struct cnode *cp;
2653         struct hfsfid *hfsfhp;
2654
2655         if (ISHFS(VTOVCB(vp)))
2656                 return (ENOTSUP);       /* hfs standard is not exportable */
2657
2658         if (*fhlenp < (int)sizeof(struct hfsfid))
2659                 return (EOVERFLOW);
2660
2661         cp = VTOC(vp);
2662         hfsfhp = (struct hfsfid *)fhp;
2663         /* only the CNID is used to identify the file now */
2664         hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2665         hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2666         *fhlenp = sizeof(struct hfsfid);
2667
2668         return (0);
2669 }
2670
2671
2672 /*
2673  * Initial HFS filesystems, done only once.
2674  */
2675 static int
2676 hfs_init(__unused struct vfsconf *vfsp)
2677 {
2678         static int done = 0;
2679
2680         if (done)
2681                 return (0);
2682         done = 1;
2683         hfs_chashinit();
2684         hfs_converterinit();
2685
2686         BTReserveSetup();
2687
2688
2689         hfs_lock_attr    = lck_attr_alloc_init();
2690         hfs_group_attr   = lck_grp_attr_alloc_init();
2691         hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2692         hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2693         hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2694
2695 #if HFS_COMPRESSION
2696     decmpfs_init();
2697 #endif
2698
2699         return (0);
2700 }
2701
2702 static int
2703 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2704 {
2705         struct hfsmount * hfsmp;
2706         char fstypename[MFSNAMELEN];
2707
2708         if (vp == NULL)
2709                 return (EINVAL);
2710
2711         if (!vnode_isvroot(vp))
2712                 return (EINVAL);
2713
2714         vnode_vfsname(vp, fstypename);
2715         if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2716                 return (EINVAL);
2717
2718         hfsmp = VTOHFS(vp);
2719
2720         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2721                 return (EINVAL);
2722
2723         *hfsmpp = hfsmp;
2724
2725         return (0);
2726 }
2727
2728 // XXXdbg
2729 #include <sys/filedesc.h>
2730
2731 /*
2732  * HFS filesystem related variables.
2733  */
2734 int
2735 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2736                         user_addr_t newp, size_t newlen, vfs_context_t context)
2737 {
2738         struct proc *p = vfs_context_proc(context);
2739         int error;
2740         struct hfsmount *hfsmp;
2741
2742         /* all sysctl names at this level are terminal */
2743
2744         if (name[0] == HFS_ENCODINGBIAS) {
2745                 int bias;
2746
2747                 bias = hfs_getencodingbias();
2748                 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2749                 if (error == 0 && newp)
2750                         hfs_setencodingbias(bias);
2751                 return (error);
2752
2753         } else if (name[0] == HFS_EXTEND_FS) {
2754         u_int64_t  newsize;
2755                 vnode_t vp = vfs_context_cwd(context);
2756
2757                 if (newp == USER_ADDR_NULL || vp == NULLVP)
2758                         return (EINVAL);
2759                 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2760                         return (error);
2761                 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2762                 if (error)
2763                         return (error);
2764
2765                 error = hfs_extendfs(hfsmp, newsize, context);
2766                 return (error);
2767
2768         } else if (name[0] == HFS_ENCODINGHINT) {
2769                 size_t bufsize;
2770                 size_t bytes;
2771                 u_int32_t hint;
2772                 u_int16_t *unicode_name = NULL;
2773                 char *filename = NULL;
2774
2775                 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2776                         return (EINVAL);
2777
2778                 bufsize = MAX(newlen * 3, MAXPATHLEN);
2779                 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2780                 if (filename == NULL) {
2781                         error = ENOMEM;
2782                         goto encodinghint_exit;
2783                 }
2784                 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2785                 if (filename == NULL) {
2786                         error = ENOMEM;
2787                         goto encodinghint_exit;
2788                 }
2789
2790                 error = copyin(newp, (caddr_t)filename, newlen);
2791                 if (error == 0) {
2792                         error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2793                                                &bytes, bufsize, 0, UTF_DECOMPOSED);
2794                         if (error == 0) {
2795                                 hint = hfs_pickencoding(unicode_name, bytes / 2);
2796                                 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2797                         }
2798                 }
2799
2800 encodinghint_exit:
2801                 if (unicode_name)
2802                         FREE(unicode_name, M_TEMP);
2803                 if (filename)
2804                         FREE(filename, M_TEMP);
2805                 return (error);
2806
2807         } else if (name[0] == HFS_ENABLE_JOURNALING) {
2808                 // make the file system journaled...
2809                 vnode_t vp = vfs_context_cwd(context);
2810                 vnode_t jvp;
2811                 ExtendedVCB *vcb;
2812                 struct cat_attr jnl_attr, jinfo_attr;
2813                 struct cat_fork jnl_fork, jinfo_fork;
2814                 void *jnl = NULL;
2815                 int lockflags;
2816
2817                 /* Only root can enable journaling */
2818                 if (!is_suser()) {
2819                         return (EPERM);
2820                 }
2821                 if (vp == NULLVP)
2822                         return EINVAL;
2823
2824                 hfsmp = VTOHFS(vp);
2825                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2826                         return EROFS;
2827                 }
2828                 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2829                         printf("hfs: can't make a plain hfs volume journaled.\n");
2830                         return EINVAL;
2831                 }
2832
2833                 if (hfsmp->jnl) {
2834                     printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2835                     return EAGAIN;
2836                 }
2837
2838                 vcb = HFSTOVCB(hfsmp);
2839                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2840                 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2841                         BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2842
2843                         printf("hfs: volume has a btree w/non-contiguous nodes.  can not enable journaling.\n");
2844                         hfs_systemfile_unlock(hfsmp, lockflags);
2845                         return EINVAL;
2846                 }
2847                 hfs_systemfile_unlock(hfsmp, lockflags);
2848
2849                 // make sure these both exist!
2850                 if (   GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2851                         || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2852
2853                         return EINVAL;
2854                 }
2855
2856                 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2857
2858                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2859                            (off_t)name[2], (off_t)name[3]);
2860
2861                 //
2862                 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2863                 //          enabling the journal on a separate device so it is safe
2864                 //          to just copy hfs_devvp here.  If hfs_util gets the ability
2865                 //          to dynamically enable the journal on a separate device then
2866                 //          we will have to do the same thing as hfs_early_journal_init()
2867                 //          to locate and open the journal device.
2868                 //
2869                 jvp = hfsmp->hfs_devvp;
2870                 jnl = journal_create(jvp,
2871                                                          (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2872                                                          + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2873                                                          (off_t)((unsigned)name[3]),
2874                                                          hfsmp->hfs_devvp,
2875                                                          hfsmp->hfs_logical_block_size,
2876                                                          0,
2877                                                          0,
2878                                                          hfs_sync_metadata, hfsmp->hfs_mp);
2879
2880                 /*
2881                  * Set up the trim callback function so that we can add
2882                  * recently freed extents to the free extent cache once
2883                  * the transaction that freed them is written to the
2884                  * journal on disk.
2885                  */
2886                 if (jnl)
2887                         journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2888
2889                 if (jnl == NULL) {
2890                         printf("hfs: FAILED to create the journal!\n");
2891                         if (jvp && jvp != hfsmp->hfs_devvp) {
2892                                 vnode_clearmountedon(jvp);
2893                                 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2894                         }
2895                         jvp = NULL;
2896
2897                         return EINVAL;
2898                 }
2899
2900                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2901
2902                 /*
2903                  * Flush all dirty metadata buffers.
2904                  */
2905                 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2906                 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2907                 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2908                 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2909                 if (hfsmp->hfs_attribute_vp)
2910                         buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2911
2912                 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2913                 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2914                 hfsmp->jvp = jvp;
2915                 hfsmp->jnl = jnl;
2916
2917                 // save this off for the hack-y check in hfs_remove()
2918                 hfsmp->jnl_start        = (u_int32_t)name[2];
2919                 hfsmp->jnl_size         = (off_t)((unsigned)name[3]);
2920                 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2921                 hfsmp->hfs_jnlfileid    = jnl_attr.ca_fileid;
2922
2923                 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2924
2925                 hfs_unlock_global (hfsmp);
2926                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2927
2928                 {
2929                         fsid_t fsid;
2930
2931                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2932                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2933                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2934                 }
2935                 return 0;
2936         } else if (name[0] == HFS_DISABLE_JOURNALING) {
2937                 // clear the journaling bit
2938                 vnode_t vp = vfs_context_cwd(context);
2939
2940                 /* Only root can disable journaling */
2941                 if (!is_suser()) {
2942                         return (EPERM);
2943                 }
2944                 if (vp == NULLVP)
2945                         return EINVAL;
2946
2947                 hfsmp = VTOHFS(vp);
2948
2949                 /*
2950                  * Disabling journaling is disallowed on volumes with directory hard links
2951                  * because we have not tested the relevant code path.
2952                  */
2953                 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2954                         printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2955                         return EPERM;
2956                 }
2957
2958                 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2959
2960                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2961
2962                 // Lights out for you buddy!
2963                 journal_close(hfsmp->jnl);
2964                 hfsmp->jnl = NULL;
2965
2966                 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2967                         vnode_clearmountedon(hfsmp->jvp);
2968                         VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2969                         vnode_put(hfsmp->jvp);
2970                 }
2971                 hfsmp->jvp = NULL;
2972                 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2973                 hfsmp->jnl_start        = 0;
2974                 hfsmp->hfs_jnlinfoblkid = 0;
2975                 hfsmp->hfs_jnlfileid    = 0;
2976
2977                 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2978
2979                 hfs_unlock_global (hfsmp);
2980
2981                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2982
2983                 {
2984                         fsid_t fsid;
2985
2986                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2987                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2988                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2989                 }
2990                 return 0;
2991         } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2992                 vnode_t vp = vfs_context_cwd(context);
2993                 off_t jnl_start, jnl_size;
2994
2995                 if (vp == NULLVP)
2996                         return EINVAL;
2997
2998                 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
2999                 if (proc_is64bit(current_proc()))
3000                         return EINVAL;
3001
3002                 hfsmp = VTOHFS(vp);
3003             if (hfsmp->jnl == NULL) {
3004                         jnl_start = 0;
3005                         jnl_size  = 0;
3006             } else {
3007                         jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3008                         jnl_size  = (off_t)hfsmp->jnl_size;
3009             }
3010
3011             if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3012                         return error;
3013                 }
3014             if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3015                         return error;
3016                 }
3017
3018                 return 0;
3019         } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3020
3021             return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3022
3023         } else if (name[0] == VFS_CTL_QUERY) {
3024         struct sysctl_req *req;
3025         union union_vfsidctl vc;
3026         struct mount *mp;
3027             struct vfsquery vq;
3028
3029                 req = CAST_DOWN(struct sysctl_req *, oldp);     /* we're new style vfs sysctl. */
3030
3031         error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3032                 if (error) return (error);
3033
3034                 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3035         if (mp == NULL) return (ENOENT);
3036
3037                 hfsmp = VFSTOHFS(mp);
3038                 bzero(&vq, sizeof(vq));
3039                 vq.vq_flags = hfsmp->hfs_notification_conditions;
3040                 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3041         } else if (name[0] == HFS_REPLAY_JOURNAL) {
3042                 vnode_t devvp = NULL;
3043                 int device_fd;
3044                 if (namelen != 2) {
3045                         return (EINVAL);
3046                 }
3047                 device_fd = name[1];
3048                 error = file_vnode(device_fd, &devvp);
3049                 if (error) {
3050                         return error;
3051                 }
3052                 error = vnode_getwithref(devvp);
3053                 if (error) {
3054                         file_drop(device_fd);
3055                         return error;
3056                 }
3057                 error = hfs_journal_replay(devvp, context);
3058                 file_drop(device_fd);
3059                 vnode_put(devvp);
3060                 return error;
3061         } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3062                 hfs_resize_debug = 1;
3063                 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3064                 return 0;
3065         }
3066
3067         return (ENOTSUP);
3068 }
3069
3070 /*
3071  * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3072  * the build_path ioctl.  We use it to leverage the code below that updates
3073  * the origin list cache if necessary
3074  */
3075
3076 int
3077 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3078 {
3079         int error;
3080         int lockflags;
3081         struct hfsmount *hfsmp;
3082
3083         hfsmp = VFSTOHFS(mp);
3084
3085         error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3086         if (error)
3087                 return (error);
3088
3089         /*
3090          * ADLs may need to have their origin state updated
3091          * since build_path needs a valid parent.  The same is true
3092          * for hardlinked files as well.  There isn't a race window here
3093          * in re-acquiring the cnode lock since we aren't pulling any data
3094          * out of the cnode; instead, we're going to the catalog.
3095          */
3096         if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3097             (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3098                 cnode_t *cp = VTOC(*vpp);
3099                 struct cat_desc cdesc;
3100
3101                 if (!hfs_haslinkorigin(cp)) {
3102                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3103                         error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3104                         hfs_systemfile_unlock(hfsmp, lockflags);
3105                         if (error == 0) {
3106                                 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3107                                         (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3108                                         hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3109                                 }
3110                                 cat_releasedesc(&cdesc);
3111                         }
3112                 }
3113                 hfs_unlock(cp);
3114         }
3115         return (0);
3116 }
3117
3118
3119 /*
3120  * Look up an HFS object by ID.
3121  *
3122  * The object is returned with an iocount reference and the cnode locked.
3123  *
3124  * If the object is a file then it will represent the data fork.
3125  */
3126 int
3127 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3128 {
3129         struct vnode *vp = NULLVP;
3130         struct cat_desc cndesc;
3131         struct cat_attr cnattr;
3132         struct cat_fork cnfork;
3133         u_int32_t linkref = 0;
3134         int error;
3135
3136         /* Check for cnids that should't be exported. */
3137         if ((cnid < kHFSFirstUserCatalogNodeID) &&
3138             (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3139                 return (ENOENT);
3140         }
3141         /* Don't export our private directories. */
3142         if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3143             cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3144                 return (ENOENT);
3145         }
3146         /*
3147          * Check the hash first
3148          */
3149         vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3150         if (vp) {
3151                 *vpp = vp;
3152                 return(0);
3153         }
3154
3155         bzero(&cndesc, sizeof(cndesc));
3156         bzero(&cnattr, sizeof(cnattr));
3157         bzero(&cnfork, sizeof(cnfork));
3158
3159         /*
3160          * Not in hash, lookup in catalog
3161          */
3162         if (cnid == kHFSRootParentID) {
3163                 static char hfs_rootname[] = "/";
3164
3165                 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3166                 cndesc.cd_namelen = 1;
3167                 cndesc.cd_parentcnid = kHFSRootParentID;
3168                 cndesc.cd_cnid = kHFSRootFolderID;
3169                 cndesc.cd_flags = CD_ISDIR;
3170
3171                 cnattr.ca_fileid = kHFSRootFolderID;
3172                 cnattr.ca_linkcount = 1;
3173                 cnattr.ca_entries = 1;
3174                 cnattr.ca_dircount = 1;
3175                 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3176         } else {
3177                 int lockflags;
3178                 cnid_t pid;
3179                 const char *nameptr;
3180
3181                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3182                 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3183                 hfs_systemfile_unlock(hfsmp, lockflags);
3184
3185                 if (error) {
3186                         *vpp = NULL;
3187                         return (error);
3188                 }
3189
3190                 /*
3191                  * Check for a raw hardlink inode and save its linkref.
3192                  */
3193                 pid = cndesc.cd_parentcnid;
3194                 nameptr = (const char *)cndesc.cd_nameptr;
3195
3196                 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3197                     (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3198                         linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3199
3200                 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3201                            (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3202                         linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3203
3204                 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3205                            (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3206                         *vpp = NULL;
3207                         cat_releasedesc(&cndesc);
3208                         return (ENOENT);  /* open unlinked file */
3209                 }
3210         }
3211
3212         /*
3213          * Finish initializing cnode descriptor for hardlinks.
3214          *
3215          * We need a valid name and parent for reverse lookups.
3216          */
3217         if (linkref) {
3218                 cnid_t nextlinkid;
3219                 cnid_t prevlinkid;
3220                 struct cat_desc linkdesc;
3221                 int lockflags;
3222
3223                 cnattr.ca_linkref = linkref;
3224
3225                 /*
3226                  * Pick up the first link in the chain and get a descriptor for it.
3227                  * This allows blind volfs paths to work for hardlinks.
3228                  */
3229                 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
3230                     (nextlinkid != 0)) {
3231                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3232                         error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3233                         hfs_systemfile_unlock(hfsmp, lockflags);
3234                         if (error == 0) {
3235                                 cat_releasedesc(&cndesc);
3236                                 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3237                         }
3238                 }
3239         }
3240
3241         if (linkref) {
3242                 int newvnode_flags = 0;
3243
3244                 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3245                                                                 &cnfork, &vp, &newvnode_flags);
3246                 if (error == 0) {
3247                         VTOC(vp)->c_flag |= C_HARDLINK;
3248                         vnode_setmultipath(vp);
3249                 }
3250         } else {
3251                 struct componentname cn;
3252                 int newvnode_flags = 0;
3253
3254                 /* Supply hfs_getnewvnode with a component name. */
3255                 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3256                 cn.cn_nameiop = LOOKUP;
3257                 cn.cn_flags = ISLASTCN | HASBUF;
3258                 cn.cn_context = NULL;
3259                 cn.cn_pnlen = MAXPATHLEN;
3260                 cn.cn_nameptr = cn.cn_pnbuf;
3261                 cn.cn_namelen = cndesc.cd_namelen;
3262                 cn.cn_hash = 0;
3263                 cn.cn_consume = 0;
3264                 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3265
3266                 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3267                                                                 &cnfork, &vp, &newvnode_flags);
3268
3269                 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3270                         hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3271                 }
3272                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3273         }
3274         cat_releasedesc(&cndesc);
3275
3276         *vpp = vp;
3277         if (vp && skiplock) {
3278                 hfs_unlock(VTOC(vp));
3279         }
3280         return (error);
3281 }
3282
3283
3284 /*
3285  * Flush out all the files in a filesystem.
3286  */
3287 static int
3288 #if QUOTA
3289 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3290 #else
3291 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3292 #endif /* QUOTA */
3293 {
3294         struct hfsmount *hfsmp;
3295         struct vnode *skipvp = NULLVP;
3296         int error;
3297 #if QUOTA
3298         int quotafilecnt;
3299         int i;
3300 #endif
3301
3302         hfsmp = VFSTOHFS(mp);
3303
3304 #if QUOTA
3305         /*
3306          * The open quota files have an indirect reference on
3307          * the root directory vnode.  We must account for this
3308          * extra reference when doing the intial vflush.
3309          */
3310         quotafilecnt = 0;
3311         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3312
3313                 /* Find out how many quota files we have open. */
3314                 for (i = 0; i < MAXQUOTAS; i++) {
3315                         if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3316                                 ++quotafilecnt;
3317                 }
3318
3319                 /* Obtain the root vnode so we can skip over it. */
3320                 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3321         }
3322 #endif /* QUOTA */
3323
3324         error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3325         if (error != 0)
3326                 return(error);
3327
3328         error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3329
3330 #if QUOTA
3331         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3332                 if (skipvp) {
3333                         /*
3334                          * See if there are additional references on the
3335                          * root vp besides the ones obtained from the open
3336                          * quota files and the hfs_chash_getvnode call above.
3337                          */
3338                         if ((error == 0) &&
3339                             (vnode_isinuse(skipvp,  quotafilecnt))) {
3340                                 error = EBUSY;  /* root directory is still open */
3341                         }
3342                         hfs_unlock(VTOC(skipvp));
3343                         vnode_put(skipvp);
3344                 }
3345                 if (error && (flags & FORCECLOSE) == 0)
3346                         return (error);
3347
3348                 for (i = 0; i < MAXQUOTAS; i++) {
3349                         if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3350                                 continue;
3351                         hfs_quotaoff(p, mp, i);
3352                 }
3353                 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3354         }
3355 #endif /* QUOTA */
3356
3357         return (error);
3358 }
3359
3360 /*
3361  * Update volume encoding bitmap (HFS Plus only)
3362  */
3363 __private_extern__
3364 void
3365 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3366 {
3367 #define  kIndexMacUkrainian     48  /* MacUkrainian encoding is 152 */
3368 #define  kIndexMacFarsi         49  /* MacFarsi encoding is 140 */
3369
3370         u_int32_t       index;
3371
3372         switch (encoding) {
3373         case kTextEncodingMacUkrainian:
3374                 index = kIndexMacUkrainian;
3375                 break;
3376         case kTextEncodingMacFarsi:
3377                 index = kIndexMacFarsi;
3378                 break;
3379         default:
3380                 index = encoding;
3381                 break;
3382         }
3383
3384         if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3385                 HFS_MOUNT_LOCK(hfsmp, TRUE)
3386                 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3387                 MarkVCBDirty(hfsmp);
3388                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3389         }
3390 }
3391
3392 /*
3393  * Update volume stats
3394  *
3395  * On journal volumes this will cause a volume header flush
3396  */
3397 int
3398 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3399 {
3400         struct timeval tv;
3401
3402         microtime(&tv);
3403
3404         lck_mtx_lock(&hfsmp->hfs_mutex);
3405
3406         MarkVCBDirty(hfsmp);
3407         hfsmp->hfs_mtime = tv.tv_sec;
3408
3409         switch (op) {
3410         case VOL_UPDATE:
3411                 break;
3412         case VOL_MKDIR:
3413                 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3414                         ++hfsmp->hfs_dircount;
3415                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3416                         ++hfsmp->vcbNmRtDirs;
3417                 break;
3418         case VOL_RMDIR:
3419                 if (hfsmp->hfs_dircount != 0)
3420                         --hfsmp->hfs_dircount;
3421                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3422                         --hfsmp->vcbNmRtDirs;
3423                 break;
3424         case VOL_MKFILE:
3425                 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3426                         ++hfsmp->hfs_filecount;
3427                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3428                         ++hfsmp->vcbNmFls;
3429                 break;
3430         case VOL_RMFILE:
3431                 if (hfsmp->hfs_filecount != 0)
3432                         --hfsmp->hfs_filecount;
3433                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3434                         --hfsmp->vcbNmFls;
3435                 break;
3436         }
3437
3438         lck_mtx_unlock(&hfsmp->hfs_mutex);
3439
3440         if (hfsmp->jnl) {
3441                 hfs_flushvolumeheader(hfsmp, 0, 0);
3442         }
3443
3444         return (0);
3445 }
3446
3447
3448 static int
3449 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3450 {
3451         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3452         struct filefork *fp;
3453         HFSMasterDirectoryBlock *mdb;
3454         struct buf *bp = NULL;
3455         int retval;
3456         int sectorsize;
3457         ByteCount namelen;
3458
3459         sectorsize = hfsmp->hfs_logical_block_size;
3460         retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3461         if (retval) {
3462                 if (bp)
3463                         buf_brelse(bp);
3464                 return retval;
3465         }
3466
3467         lck_mtx_lock(&hfsmp->hfs_mutex);
3468
3469         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3470
3471         mdb->drCrDate   = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3472         mdb->drLsMod    = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3473         mdb->drAtrb     = SWAP_BE16 (vcb->vcbAtrb);
3474         mdb->drNmFls    = SWAP_BE16 (vcb->vcbNmFls);
3475         mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3476         mdb->drClpSiz   = SWAP_BE32 (vcb->vcbClpSiz);
3477         mdb->drNxtCNID  = SWAP_BE32 (vcb->vcbNxtCNID);
3478         mdb->drFreeBks  = SWAP_BE16 (vcb->freeBlocks);
3479
3480         namelen = strlen((char *)vcb->vcbVN);
3481         retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3482         /* Retry with MacRoman in case that's how it was exported. */
3483         if (retval)
3484                 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3485
3486         mdb->drVolBkUp  = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3487         mdb->drWrCnt    = SWAP_BE32 (vcb->vcbWrCnt);
3488         mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3489         mdb->drFilCnt   = SWAP_BE32 (vcb->vcbFilCnt);
3490         mdb->drDirCnt   = SWAP_BE32 (vcb->vcbDirCnt);
3491
3492         bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3493
3494         fp = VTOF(vcb->extentsRefNum);
3495         mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3496         mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3497         mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3498         mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3499         mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3500         mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3501         mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3502         mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3503         FTOC(fp)->c_flag &= ~C_MODIFIED;
3504
3505         fp = VTOF(vcb->catalogRefNum);
3506         mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3507         mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3508         mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3509         mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3510         mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3511         mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3512         mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3513         mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3514         FTOC(fp)->c_flag &= ~C_MODIFIED;
3515
3516         MarkVCBClean( vcb );
3517
3518         lck_mtx_unlock(&hfsmp->hfs_mutex);
3519
3520         /* If requested, flush out the alternate MDB */
3521         if (altflush) {
3522                 struct buf *alt_bp = NULL;
3523
3524                 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3525                         bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3526
3527                         (void) VNOP_BWRITE(alt_bp);
3528                 } else if (alt_bp)
3529                         buf_brelse(alt_bp);
3530         }
3531
3532         if (waitfor != MNT_WAIT)
3533                 buf_bawrite(bp);
3534         else
3535                 retval = VNOP_BWRITE(bp);
3536
3537         return (retval);
3538 }
3539
3540 /*
3541  *  Flush any dirty in-memory mount data to the on-disk
3542  *  volume header.
3543  *
3544  *  Note: the on-disk volume signature is intentionally
3545  *  not flushed since the on-disk "H+" and "HX" signatures
3546  *  are always stored in-memory as "H+".
3547  */
3548 int
3549 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3550 {
3551         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3552         struct filefork *fp;
3553         HFSPlusVolumeHeader *volumeHeader, *altVH;
3554         int retval;
3555         struct buf *bp, *alt_bp;
3556         int i;
3557         daddr64_t priIDSector;
3558         int critical;
3559         u_int16_t  signature;
3560         u_int16_t  hfsversion;
3561
3562         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3563                 return(0);
3564         }
3565         if (hfsmp->hfs_flags & HFS_STANDARD) {
3566                 return hfs_flushMDB(hfsmp, waitfor, altflush);
3567         }
3568         critical = altflush;
3569         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3570                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3571
3572         if (hfs_start_transaction(hfsmp) != 0) {
3573             return EINVAL;
3574         }
3575
3576         bp = NULL;
3577         alt_bp = NULL;
3578
3579         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3580                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3581                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
3582         if (retval) {
3583                 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3584                 goto err_exit;
3585         }
3586
3587         volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3588                         HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3589
3590         /*
3591          * Sanity check what we just read.  If it's bad, try the alternate
3592          * instead.
3593          */
3594         signature = SWAP_BE16 (volumeHeader->signature);
3595         hfsversion   = SWAP_BE16 (volumeHeader->version);
3596         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3597             (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3598             (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3599                 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3600                       vcb->vcbVN, signature, hfsversion,
3601                       SWAP_BE32 (volumeHeader->blockSize),
3602                       hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3603                 hfs_mark_volume_inconsistent(hfsmp);
3604
3605                 if (hfsmp->hfs_alt_id_sector) {
3606                         retval = buf_meta_bread(hfsmp->hfs_devvp,
3607                             HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3608                             hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3609                         if (retval) {
3610                                 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3611                                 goto err_exit;
3612                         }
3613
3614                         altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3615                                 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3616                         signature = SWAP_BE16(altVH->signature);
3617                         hfsversion = SWAP_BE16(altVH->version);
3618
3619                         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3620                             (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3621                             (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3622                                 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3623                                     vcb->vcbVN, signature, hfsversion,
3624                                     SWAP_BE32(altVH->blockSize));
3625                                 retval = EIO;
3626                                 goto err_exit;
3627                         }
3628
3629                         /* The alternate is plausible, so use it. */
3630                         bcopy(altVH, volumeHeader, kMDBSize);
3631                         buf_brelse(alt_bp);
3632                         alt_bp = NULL;
3633                 } else {
3634                         /* No alternate VH, nothing more we can do. */
3635                         retval = EIO;
3636                         goto err_exit;
3637                 }
3638         }
3639
3640         if (hfsmp->jnl) {
3641                 journal_modify_block_start(hfsmp->jnl, bp);
3642         }
3643
3644         /*
3645          * For embedded HFS+ volumes, update create date if it changed
3646          * (ie from a setattrlist call)
3647          */
3648         if ((vcb->hfsPlusIOPosOffset != 0) &&
3649             (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3650                 struct buf *bp2;
3651                 HFSMasterDirectoryBlock *mdb;
3652
3653                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3654                                 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3655                                 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3656                 if (retval) {
3657                         if (bp2)
3658                                 buf_brelse(bp2);
3659                         retval = 0;
3660                 } else {
3661                         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3662                                 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3663
3664                         if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3665                           {
3666                                 if (hfsmp->jnl) {
3667                                     journal_modify_block_start(hfsmp->jnl, bp2);
3668                                 }
3669
3670                                 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);       /* pick up the new create date */
3671
3672                                 if (hfsmp->jnl) {
3673                                         journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3674                                 } else {
3675                                         (void) VNOP_BWRITE(bp2);                /* write out the changes */
3676                                 }
3677                           }
3678                         else
3679                           {
3680                                 buf_brelse(bp2);                                                /* just release it */
3681                           }
3682                   }
3683         }
3684
3685         lck_mtx_lock(&hfsmp->hfs_mutex);
3686
3687         /* Note: only update the lower 16 bits worth of attributes */
3688         volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
3689         volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3690         if (hfsmp->jnl) {
3691                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3692         } else {
3693                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3694         }
3695         volumeHeader->createDate        = SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
3696         volumeHeader->modifyDate        = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3697         volumeHeader->backupDate        = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3698         volumeHeader->fileCount         = SWAP_BE32 (vcb->vcbFilCnt);
3699         volumeHeader->folderCount       = SWAP_BE32 (vcb->vcbDirCnt);
3700         volumeHeader->totalBlocks       = SWAP_BE32 (vcb->totalBlocks);
3701         volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks);
3702         volumeHeader->nextAllocation    = SWAP_BE32 (vcb->nextAllocation);
3703         volumeHeader->rsrcClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3704         volumeHeader->dataClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3705         volumeHeader->nextCatalogID     = SWAP_BE32 (vcb->vcbNxtCNID);
3706         volumeHeader->writeCount        = SWAP_BE32 (vcb->vcbWrCnt);
3707         volumeHeader->encodingsBitmap   = SWAP_BE64 (vcb->encodingsBitmap);
3708
3709         if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3710                 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3711                 critical = 1;
3712         }
3713
3714         /*
3715          * System files are only dirty when altflush is set.
3716          */
3717         if (altflush == 0) {
3718                 goto done;
3719         }
3720
3721         /* Sync Extents over-flow file meta data */
3722         fp = VTOF(vcb->extentsRefNum);
3723         if (FTOC(fp)->c_flag & C_MODIFIED) {
3724                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3725                         volumeHeader->extentsFile.extents[i].startBlock =
3726                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3727                         volumeHeader->extentsFile.extents[i].blockCount =
3728                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3729                 }
3730                 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3731                 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3732                 volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3733                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3734         }
3735
3736         /* Sync Catalog file meta data */
3737         fp = VTOF(vcb->catalogRefNum);
3738         if (FTOC(fp)->c_flag & C_MODIFIED) {
3739                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3740                         volumeHeader->catalogFile.extents[i].startBlock =
3741                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3742                         volumeHeader->catalogFile.extents[i].blockCount =
3743                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3744                 }
3745                 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3746                 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3747                 volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3748                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3749         }
3750
3751         /* Sync Allocation file meta data */
3752         fp = VTOF(vcb->allocationsRefNum);
3753         if (FTOC(fp)->c_flag & C_MODIFIED) {
3754                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3755                         volumeHeader->allocationFile.extents[i].startBlock =
3756                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3757                         volumeHeader->allocationFile.extents[i].blockCount =
3758                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3759                 }
3760                 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3761                 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3762                 volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3763                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3764         }
3765
3766         /* Sync Attribute file meta data */
3767         if (hfsmp->hfs_attribute_vp) {
3768                 fp = VTOF(hfsmp->hfs_attribute_vp);
3769                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3770                         volumeHeader->attributesFile.extents[i].startBlock =
3771                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3772                         volumeHeader->attributesFile.extents[i].blockCount =
3773                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3774                 }
3775                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3776                 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3777                 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3778                 volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3779         }
3780
3781         /* Sync Startup file meta data */
3782         if (hfsmp->hfs_startup_vp) {
3783                 fp = VTOF(hfsmp->hfs_startup_vp);
3784                 if (FTOC(fp)->c_flag & C_MODIFIED) {
3785                         for (i = 0; i < kHFSPlusExtentDensity; i++) {
3786                                 volumeHeader->startupFile.extents[i].startBlock =
3787                                         SWAP_BE32 (fp->ff_extents[i].startBlock);
3788                                 volumeHeader->startupFile.extents[i].blockCount =
3789                                         SWAP_BE32 (fp->ff_extents[i].blockCount);
3790                         }
3791                         volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3792                         volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3793                         volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3794                         FTOC(fp)->c_flag &= ~C_MODIFIED;
3795                 }
3796         }
3797
3798 done:
3799         MarkVCBClean(hfsmp);
3800         lck_mtx_unlock(&hfsmp->hfs_mutex);
3801
3802         /* If requested, flush out the alternate volume header */
3803         if (altflush && hfsmp->hfs_alt_id_sector) {
3804                 if (buf_meta_bread(hfsmp->hfs_devvp,
3805                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3806                                 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3807                         if (hfsmp->jnl) {
3808                                 journal_modify_block_start(hfsmp->jnl, alt_bp);
3809                         }
3810
3811                         bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3812                                         HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3813                                         kMDBSize);
3814
3815                         if (hfsmp->jnl) {
3816                                 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3817                         } else {
3818                                 (void) VNOP_BWRITE(alt_bp);
3819                         }
3820                 } else if (alt_bp)
3821                         buf_brelse(alt_bp);
3822         }
3823
3824         if (hfsmp->jnl) {
3825                 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3826         } else {
3827                 if (waitfor != MNT_WAIT)
3828                         buf_bawrite(bp);
3829                 else {
3830                     retval = VNOP_BWRITE(bp);
3831                     /* When critical data changes, flush the device cache */
3832                     if (critical && (retval == 0)) {
3833                         (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3834                                          NULL, FWRITE, NULL);
3835                     }
3836                 }
3837         }
3838         hfs_end_transaction(hfsmp);
3839
3840         return (retval);
3841
3842 err_exit:
3843         if (alt_bp)
3844                 buf_brelse(alt_bp);
3845         if (bp)
3846                 buf_brelse(bp);
3847         hfs_end_transaction(hfsmp);
3848         return retval;
3849 }
3850
3851
3852 /*
3853  * Extend a file system.
3854  */
3855 int
3856 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3857 {
3858         struct proc *p = vfs_context_proc(context);
3859         kauth_cred_t cred = vfs_context_ucred(context);
3860         struct  vnode *vp;
3861         struct  vnode *devvp;
3862         struct  buf *bp;
3863         struct  filefork *fp = NULL;
3864         ExtendedVCB  *vcb;
3865         struct  cat_fork forkdata;
3866         u_int64_t  oldsize;
3867         u_int64_t  newblkcnt;
3868         u_int64_t  prev_phys_block_count;
3869         u_int32_t  addblks;
3870         u_int64_t  sectorcnt;
3871         u_int32_t  sectorsize;
3872         u_int32_t  phys_sectorsize;
3873         daddr64_t  prev_alt_sector;
3874         daddr_t    bitmapblks;
3875         int  lockflags = 0;
3876         int  error;
3877         int64_t oldBitmapSize;
3878         Boolean  usedExtendFileC = false;
3879         int transaction_begun = 0;
3880
3881         devvp = hfsmp->hfs_devvp;
3882         vcb = HFSTOVCB(hfsmp);
3883
3884         /*
3885          * - HFS Plus file systems only.
3886          * - Journaling must be enabled.
3887          * - No embedded volumes.
3888          */
3889         if ((vcb->vcbSigWord == kHFSSigWord) ||
3890              (hfsmp->jnl == NULL) ||
3891              (vcb->hfsPlusIOPosOffset != 0)) {
3892                 return (EPERM);
3893         }
3894         /*
3895          * If extending file system by non-root, then verify
3896          * ownership and check permissions.
3897          */
3898         if (suser(cred, NULL)) {
3899                 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3900
3901                 if (error)
3902                         return (error);
3903                 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3904                 if (error == 0) {
3905                         error = hfs_write_access(vp, cred, p, false);
3906                 }
3907                 hfs_unlock(VTOC(vp));
3908                 vnode_put(vp);
3909                 if (error)
3910                         return (error);
3911
3912                 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3913                 if (error)
3914                         return (error);
3915         }
3916         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3917                 return (ENXIO);
3918         }
3919         if (sectorsize != hfsmp->hfs_logical_block_size) {
3920                 return (ENXIO);
3921         }
3922         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3923                 return (ENXIO);
3924         }
3925         if ((sectorsize * sectorcnt) < newsize) {
3926                 printf("hfs_extendfs: not enough space on device\n");
3927                 return (ENOSPC);
3928         }
3929         error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3930         if (error) {
3931                 if ((error != ENOTSUP) && (error != ENOTTY)) {
3932                         return (ENXIO);
3933                 }
3934                 /* If ioctl is not supported, force physical and logical sector size to be same */
3935                 phys_sectorsize = sectorsize;
3936         }
3937         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3938
3939         /*
3940          * Validate new size.
3941          */
3942         if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3943                 printf("hfs_extendfs: invalid size\n");
3944                 return (EINVAL);
3945         }
3946         newblkcnt = newsize / vcb->blockSize;
3947         if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3948                 return (EOVERFLOW);
3949
3950         addblks = newblkcnt - vcb->totalBlocks;
3951
3952         if (hfs_resize_debug) {
3953                 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3954                 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3955         }
3956         printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3957
3958         HFS_MOUNT_LOCK(hfsmp, TRUE);
3959         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3960                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3961                 error = EALREADY;
3962                 goto out;
3963         }
3964         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3965         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3966
3967         /*
3968          * Enclose changes inside a transaction.
3969          */
3970         if (hfs_start_transaction(hfsmp) != 0) {
3971                 error = EINVAL;
3972                 goto out;
3973         }
3974         transaction_begun = 1;
3975
3976         /*
3977          * Note: we take the attributes lock in case we have an attribute data vnode
3978          * which needs to change size.
3979          */
3980         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3981         vp = vcb->allocationsRefNum;
3982         fp = VTOF(vp);
3983         bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3984
3985         /*
3986          * Calculate additional space required (if any) by allocation bitmap.
3987          */
3988         oldBitmapSize = fp->ff_size;
3989         bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3990         if (bitmapblks > (daddr_t)fp->ff_blocks)
3991                 bitmapblks -= fp->ff_blocks;
3992         else
3993                 bitmapblks = 0;
3994
3995         /*
3996          * The allocation bitmap can contain unused bits that are beyond end of
3997          * current volume's allocation blocks.  Usually they are supposed to be
3998          * zero'ed out but there can be cases where they might be marked as used.
3999          * After extending the file system, those bits can represent valid
4000          * allocation blocks, so we mark all the bits from the end of current
4001          * volume to end of allocation bitmap as "free".
4002          */
4003         BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4004                         (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4005
4006         if (bitmapblks > 0) {
4007                 daddr64_t blkno;
4008                 daddr_t blkcnt;
4009                 off_t bytesAdded;
4010
4011                 /*
4012                  * Get the bitmap's current size (in allocation blocks) so we know
4013                  * where to start zero filling once the new space is added.  We've
4014                  * got to do this before the bitmap is grown.
4015                  */
4016                 blkno  = (daddr64_t)fp->ff_blocks;
4017
4018                 /*
4019                  * Try to grow the allocation file in the normal way, using allocation
4020                  * blocks already existing in the file system.  This way, we might be
4021                  * able to grow the bitmap contiguously, or at least in the metadata
4022                  * zone.
4023                  */
4024                 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4025                                 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4026                                 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4027
4028                 if (error == 0) {
4029                         usedExtendFileC = true;
4030                 } else {
4031                         /*
4032                          * If the above allocation failed, fall back to allocating the new
4033                          * extent of the bitmap from the space we're going to add.  Since those
4034                          * blocks don't yet belong to the file system, we have to update the
4035                          * extent list directly, and manually adjust the file size.
4036                          */
4037                         bytesAdded = 0;
4038                         error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4039                         if (error) {
4040                                 printf("hfs_extendfs: error %d adding extents\n", error);
4041                                 goto out;
4042                         }
4043                         fp->ff_blocks += bitmapblks;
4044                         VTOC(vp)->c_blocks = fp->ff_blocks;
4045                         VTOC(vp)->c_flag |= C_MODIFIED;
4046                 }
4047
4048                 /*
4049                  * Update the allocation file's size to include the newly allocated
4050                  * blocks.  Note that ExtendFileC doesn't do this, which is why this
4051                  * statement is outside the above "if" statement.
4052                  */
4053                 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4054
4055                 /*
4056                  * Zero out the new bitmap blocks.
4057                  */
4058                 {
4059
4060                         bp = NULL;
4061                         blkcnt = bitmapblks;
4062                         while (blkcnt > 0) {
4063                                 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4064                                 if (error) {
4065                                         if (bp) {
4066                                                 buf_brelse(bp);
4067                                         }
4068                                         break;
4069                                 }
4070                                 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4071                                 buf_markaged(bp);
4072                                 error = (int)buf_bwrite(bp);
4073                                 if (error)
4074                                         break;
4075                                 --blkcnt;
4076                                 ++blkno;
4077                         }
4078                 }
4079                 if (error) {
4080                         printf("hfs_extendfs: error %d  clearing blocks\n", error);
4081                         goto out;
4082                 }
4083                 /*
4084                  * Mark the new bitmap space as allocated.
4085                  *
4086                  * Note that ExtendFileC will have marked any blocks it allocated, so
4087                  * this is only needed if we used AddFileExtent.  Also note that this
4088                  * has to come *after* the zero filling of new blocks in the case where
4089                  * we used AddFileExtent (since the part of the bitmap we're touching
4090                  * is in those newly allocated blocks).
4091                  */
4092                 if (!usedExtendFileC) {
4093                         error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4094                         if (error) {
4095                                 printf("hfs_extendfs: error %d setting bitmap\n", error);
4096                                 goto out;
4097                         }
4098                         vcb->freeBlocks -= bitmapblks;
4099                 }
4100         }
4101         /*
4102          * Mark the new alternate VH as allocated.
4103          */
4104         if (vcb->blockSize == 512)
4105                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4106         else
4107                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4108         if (error) {
4109                 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4110                 goto out;
4111         }
4112         /*
4113          * Mark the old alternate VH as free.
4114          */
4115         if (vcb->blockSize == 512)
4116                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4117         else
4118                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4119         /*
4120          * Adjust file system variables for new space.
4121          */
4122         prev_phys_block_count = hfsmp->hfs_logical_block_count;
4123         prev_alt_sector = hfsmp->hfs_alt_id_sector;
4124
4125         vcb->totalBlocks += addblks;
4126         vcb->freeBlocks += addblks;
4127         hfsmp->hfs_logical_block_count = newsize / sectorsize;
4128         hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4129                                   HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4130         MarkVCBDirty(vcb);
4131         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4132         if (error) {
4133                 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4134                 /*
4135                  * Restore to old state.
4136                  */
4137                 if (usedExtendFileC) {
4138                         (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4139                                                                  FTOC(fp)->c_fileid, false);
4140                 } else {
4141                         fp->ff_blocks -= bitmapblks;
4142                         fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4143                         /*
4144                          * No need to mark the excess blocks free since those bitmap blocks
4145                          * are no longer part of the bitmap.  But we do need to undo the
4146                          * effect of the "vcb->freeBlocks -= bitmapblks" above.
4147                          */
4148                         vcb->freeBlocks += bitmapblks;
4149                 }
4150                 vcb->totalBlocks -= addblks;
4151                 vcb->freeBlocks -= addblks;
4152                 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4153                 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4154                 MarkVCBDirty(vcb);
4155                 if (vcb->blockSize == 512) {
4156                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4157                                 hfs_mark_volume_inconsistent(hfsmp);
4158                         }
4159                 } else {
4160                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4161                                 hfs_mark_volume_inconsistent(hfsmp);
4162                         }
4163                 }
4164                 goto out;
4165         }
4166         /*
4167          * Invalidate the old alternate volume header.
4168          */
4169         bp = NULL;
4170         if (prev_alt_sector) {
4171                 if (buf_meta_bread(hfsmp->hfs_devvp,
4172                                 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4173                                 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4174                         journal_modify_block_start(hfsmp->jnl, bp);
4175
4176                         bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4177
4178                         journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4179                 } else if (bp) {
4180                         buf_brelse(bp);
4181                 }
4182         }
4183
4184         /*
4185          * Update the metadata zone size based on current volume size
4186          */
4187         hfs_metadatazone_init(hfsmp, false);
4188
4189         /*
4190          * Adjust the size of hfsmp->hfs_attrdata_vp
4191          */
4192         if (hfsmp->hfs_attrdata_vp) {
4193                 struct cnode *attr_cp;
4194                 struct filefork *attr_fp;
4195
4196                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4197                         attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4198                         attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4199
4200                         attr_cp->c_blocks = newblkcnt;
4201                         attr_fp->ff_blocks = newblkcnt;
4202                         attr_fp->ff_extents[0].blockCount = newblkcnt;
4203                         attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4204                         ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4205                         vnode_put(hfsmp->hfs_attrdata_vp);
4206                 }
4207         }
4208
4209         /*
4210          * Update the R/B Tree if necessary.  Since we don't have to drop the systemfile
4211          * locks in the middle of these operations like we do in the truncate case
4212          * where we have to relocate files, we can only update the red-black tree
4213          * if there were actual changes made to the bitmap.  Also, we can't really scan the
4214          * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4215          * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4216          * not currently controlled by the tree.
4217          *
4218          * We only update hfsmp->allocLimit if totalBlocks actually increased.
4219          */
4220
4221         if (error == 0) {
4222                 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4223         }
4224
4225         /* Log successful extending */
4226         printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4227                hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4228
4229 out:
4230         if (error && fp) {
4231                 /* Restore allocation fork. */
4232                 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4233                 VTOC(vp)->c_blocks = fp->ff_blocks;
4234
4235         }
4236
4237         HFS_MOUNT_LOCK(hfsmp, TRUE);
4238         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4239         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4240         if (lockflags) {
4241                 hfs_systemfile_unlock(hfsmp, lockflags);
4242         }
4243         if (transaction_begun) {
4244                 hfs_end_transaction(hfsmp);
4245         }
4246
4247         return MacToVFSError(error);
4248 }
4249
4250 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
4251
4252 /*
4253  * Truncate a file system (while still mounted).
4254  */
4255 int
4256 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4257 {
4258         struct  buf *bp = NULL;
4259         u_int64_t oldsize;
4260         u_int32_t newblkcnt;
4261         u_int32_t reclaimblks = 0;
4262         int lockflags = 0;
4263         int transaction_begun = 0;
4264         Boolean updateFreeBlocks = false;
4265         Boolean disable_sparse = false;
4266         int error = 0;
4267
4268         lck_mtx_lock(&hfsmp->hfs_mutex);
4269         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4270                 lck_mtx_unlock(&hfsmp->hfs_mutex);
4271                 return (EALREADY);
4272         }
4273         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4274         hfsmp->hfs_resize_blocksmoved = 0;
4275         hfsmp->hfs_resize_totalblocks = 0;
4276         hfsmp->hfs_resize_progress = 0;
4277         lck_mtx_unlock(&hfsmp->hfs_mutex);
4278
4279         /*
4280          * - Journaled HFS Plus volumes only.
4281          * - No embedded volumes.
4282          */
4283         if ((hfsmp->jnl == NULL) ||
4284             (hfsmp->hfsPlusIOPosOffset != 0)) {
4285                 error = EPERM;
4286                 goto out;
4287         }
4288         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4289         newblkcnt = newsize / hfsmp->blockSize;
4290         reclaimblks = hfsmp->totalBlocks - newblkcnt;
4291
4292         if (hfs_resize_debug) {
4293                 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4294                 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4295         }
4296
4297         /* Make sure new size is valid. */
4298         if ((newsize < HFS_MIN_SIZE) ||
4299             (newsize >= oldsize) ||
4300             (newsize % hfsmp->hfs_logical_block_size) ||
4301             (newsize % hfsmp->hfs_physical_block_size)) {
4302                 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4303                 error = EINVAL;
4304                 goto out;
4305         }
4306
4307         /*
4308          * Make sure that the file system has enough free blocks reclaim.
4309          *
4310          * Before resize, the disk is divided into four zones -
4311          *      A. Allocated_Stationary - These are allocated blocks that exist
4312          *         before the new end of disk.  These blocks will not be
4313          *         relocated or modified during resize.
4314          *      B. Free_Stationary - These are free blocks that exist before the
4315          *         new end of disk.  These blocks can be used for any new
4316          *         allocations during resize, including allocation for relocating
4317          *         data from the area of disk being reclaimed.
4318          *      C. Allocated_To-Reclaim - These are allocated blocks that exist
4319          *         beyond the new end of disk.  These blocks need to be reclaimed
4320          *         during resize by allocating equal number of blocks in Free
4321          *         Stationary zone and copying the data.
4322          *      D. Free_To-Reclaim - These are free blocks that exist beyond the
4323          *         new end of disk.  Nothing special needs to be done to reclaim
4324          *         them.
4325          *
4326          * Total number of blocks on the disk before resize:
4327          * ------------------------------------------------
4328          *      Total Blocks = Allocated_Stationary + Free_Stationary +
4329          *                     Allocated_To-Reclaim + Free_To-Reclaim
4330          *
4331          * Total number of blocks that need to be reclaimed:
4332          * ------------------------------------------------
4333          *      Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4334          *
4335          * Note that the check below also makes sure that we have enough space
4336          * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4337          * Therefore we do not need to check total number of blocks to relocate
4338          * later in the code.
4339          *
4340          * The condition below gets converted to:
4341          *
4342          * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4343          *
4344          * which is equivalent to:
4345          *
4346          *              Allocated To-Reclaim >= Free Stationary
4347          */
4348         if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4349                 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4350                 error = ENOSPC;
4351                 goto out;
4352         }
4353
4354         /* Start with a clean journal. */
4355         hfs_journal_flush(hfsmp, TRUE);
4356
4357         if (hfs_start_transaction(hfsmp) != 0) {
4358                 error = EINVAL;
4359                 goto out;
4360         }
4361         transaction_begun = 1;
4362
4363         /* Take the bitmap lock to update the alloc limit field */
4364         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4365
4366         /*
4367          * Prevent new allocations from using the part we're trying to truncate.
4368          *
4369          * NOTE: allocLimit is set to the allocation block number where the new
4370          * alternate volume header will be.  That way there will be no files to
4371          * interfere with allocating the new alternate volume header, and no files
4372          * in the allocation blocks beyond (i.e. the blocks we're trying to
4373          * truncate away.
4374          *
4375          * Also shrink the red-black tree if needed.
4376          */
4377         if (hfsmp->blockSize == 512) {
4378                 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4379         }
4380         else {
4381                 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4382         }
4383
4384         /* Sparse devices use first fit allocation which is not ideal
4385          * for volume resize which requires best fit allocation.  If a
4386          * sparse device is being truncated, disable the sparse device
4387          * property temporarily for the duration of resize.  Also reset
4388          * the free extent cache so that it is rebuilt as sorted by
4389          * totalBlocks instead of startBlock.
4390          *
4391          * Note that this will affect all allocations on the volume and
4392          * ideal fix would be just to modify resize-related allocations,
4393          * but it will result in complexity like handling of two free
4394          * extent caches sorted differently, etc.  So we stick to this
4395          * solution for now.
4396          */
4397         HFS_MOUNT_LOCK(hfsmp, TRUE);
4398         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4399                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4400                 ResetVCBFreeExtCache(hfsmp);
4401                 disable_sparse = true;
4402         }
4403
4404         /*
4405          * Update the volume free block count to reflect the total number
4406          * of free blocks that will exist after a successful resize.
4407          * Relocation of extents will result in no net change in the total
4408          * free space on the disk.  Therefore the code that allocates
4409          * space for new extent and deallocates the old extent explicitly
4410          * prevents updating the volume free block count.  It will also
4411          * prevent false disk full error when the number of blocks in
4412          * an extent being relocated is more than the free blocks that
4413          * will exist after the volume is resized.
4414          */
4415         hfsmp->freeBlocks -= reclaimblks;
4416         updateFreeBlocks = true;
4417         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4418
4419         if (lockflags) {
4420                 hfs_systemfile_unlock(hfsmp, lockflags);
4421                 lockflags = 0;
4422         }
4423
4424         /*
4425          * Update the metadata zone size to match the new volume size,
4426          * and if it too less, metadata zone might be disabled.
4427          */
4428         hfs_metadatazone_init(hfsmp, false);
4429
4430         /*
4431          * If some files have blocks at or beyond the location of the
4432          * new alternate volume header, recalculate free blocks and
4433          * reclaim blocks.  Otherwise just update free blocks count.
4434          *
4435          * The current allocLimit is set to the location of new alternate
4436          * volume header, and reclaimblks are the total number of blocks
4437          * that need to be reclaimed.  So the check below is really
4438          * ignoring the blocks allocated for old alternate volume header.
4439          */
4440         if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4441                 /*
4442                  * hfs_reclaimspace will use separate transactions when
4443                  * relocating files (so we don't overwhelm the journal).
4444                  */
4445                 hfs_end_transaction(hfsmp);
4446                 transaction_begun = 0;
4447
4448                 /* Attempt to reclaim some space. */
4449                 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4450                 if (error != 0) {
4451                         printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4452                         error = ENOSPC;
4453                         goto out;
4454                 }
4455                 if (hfs_start_transaction(hfsmp) != 0) {
4456                         error = EINVAL;
4457                         goto out;
4458                 }
4459                 transaction_begun = 1;
4460
4461                 /* Check if we're clear now. */
4462                 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4463                 if (error != 0) {
4464                         printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4465                         error = EAGAIN;  /* tell client to try again */
4466                         goto out;
4467                 }
4468         }
4469
4470         /*
4471          * Note: we take the attributes lock in case we have an attribute data vnode
4472          * which needs to change size.
4473          */
4474         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4475
4476         /*
4477          * Allocate last 1KB for alternate volume header.
4478          */
4479         error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4480         if (error) {
4481                 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4482                 goto out;
4483         }
4484
4485         /*
4486          * Mark the old alternate volume header as free.
4487          * We don't bother shrinking allocation bitmap file.
4488          */
4489         if (hfsmp->blockSize == 512)
4490                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4491         else
4492                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4493
4494         /*
4495          * Invalidate the existing alternate volume header.
4496          *
4497          * Don't include this in a transaction (don't call journal_modify_block)
4498          * since this block will be outside of the truncated file system!
4499          */
4500         if (hfsmp->hfs_alt_id_sector) {
4501                 error = buf_meta_bread(hfsmp->hfs_devvp,
4502                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4503                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4504                 if (error == 0) {
4505                         bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4506                         (void) VNOP_BWRITE(bp);
4507                 } else {
4508                         if (bp) {
4509                                 buf_brelse(bp);
4510                         }
4511                 }
4512                 bp = NULL;
4513         }
4514
4515         /* Log successful shrinking. */
4516         printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4517                hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4518
4519         /*
4520          * Adjust file system variables and flush them to disk.
4521          */
4522         hfsmp->totalBlocks = newblkcnt;
4523         hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4524         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4525         MarkVCBDirty(hfsmp);
4526         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4527         if (error)
4528                 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4529
4530         /*
4531          * Adjust the size of hfsmp->hfs_attrdata_vp
4532          */
4533         if (hfsmp->hfs_attrdata_vp) {
4534                 struct cnode *cp;
4535                 struct filefork *fp;
4536
4537                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4538                         cp = VTOC(hfsmp->hfs_attrdata_vp);
4539                         fp = VTOF(hfsmp->hfs_attrdata_vp);
4540
4541                         cp->c_blocks = newblkcnt;
4542                         fp->ff_blocks = newblkcnt;
4543                         fp->ff_extents[0].blockCount = newblkcnt;
4544                         fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4545                         ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4546                         vnode_put(hfsmp->hfs_attrdata_vp);
4547                 }
4548         }
4549
4550 out:
4551         /*
4552          * Update the allocLimit to acknowledge the last one or two blocks now.
4553          * Add it to the tree as well if necessary.
4554          */
4555         UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4556
4557         HFS_MOUNT_LOCK(hfsmp, TRUE);
4558         if (disable_sparse == true) {
4559                 /* Now that resize is completed, set the volume to be sparse
4560                  * device again so that all further allocations will be first
4561                  * fit instead of best fit.  Reset free extent cache so that
4562                  * it is rebuilt.
4563                  */
4564                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4565                 ResetVCBFreeExtCache(hfsmp);
4566         }
4567
4568         if (error && (updateFreeBlocks == true)) {
4569                 hfsmp->freeBlocks += reclaimblks;
4570         }
4571
4572         if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4573                 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4574         }
4575         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4576         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4577
4578         /* On error, reset the metadata zone for original volume size */
4579         if (error && (updateFreeBlocks == true)) {
4580                 hfs_metadatazone_init(hfsmp, false);
4581         }
4582
4583         if (lockflags) {
4584                 hfs_systemfile_unlock(hfsmp, lockflags);
4585         }
4586         if (transaction_begun) {
4587                 hfs_end_transaction(hfsmp);
4588                 hfs_journal_flush(hfsmp, FALSE);
4589                 /* Just to be sure, sync all data to the disk */
4590                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4591         }
4592
4593         return MacToVFSError(error);
4594 }
4595
4596
4597 /*
4598  * Invalidate the physical block numbers associated with buffer cache blocks
4599  * in the given extent of the given vnode.
4600  */
4601 struct hfs_inval_blk_no {
4602         daddr64_t sectorStart;
4603         daddr64_t sectorCount;
4604 };
4605 static int
4606 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4607 {
4608         daddr64_t blkno;
4609         struct hfs_inval_blk_no *args;
4610
4611         blkno = buf_blkno(bp);
4612         args = args_in;
4613
4614         if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4615                 buf_setblkno(bp, buf_lblkno(bp));
4616
4617         return BUF_RETURNED;
4618 }
4619 static void
4620 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4621 {
4622         struct hfs_inval_blk_no args;
4623         args.sectorStart = sectorStart;
4624         args.sectorCount = sectorCount;
4625
4626         buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4627 }
4628
4629
4630 /*
4631  * Copy the contents of an extent to a new location.  Also invalidates the
4632  * physical block number of any buffer cache block in the copied extent
4633  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4634  * determine the new physical block number).
4635  */
4636 static int
4637 hfs_copy_extent(
4638         struct hfsmount *hfsmp,
4639         struct vnode *vp,               /* The file whose extent is being copied. */
4640         u_int32_t oldStart,             /* The start of the source extent. */
4641         u_int32_t newStart,             /* The start of the destination extent. */
4642         u_int32_t blockCount,   /* The number of allocation blocks to copy. */
4643         vfs_context_t context)
4644 {
4645         int err = 0;
4646         size_t bufferSize;
4647         void *buffer = NULL;
4648         struct vfsioattr ioattr;
4649         buf_t bp = NULL;
4650         off_t resid;
4651         size_t ioSize;
4652         u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
4653         daddr64_t srcSector, destSector;
4654         u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4655 #if CONFIG_PROTECT
4656         int cpenabled = 0;
4657 #endif
4658
4659         /*
4660          * Sanity check that we have locked the vnode of the file we're copying.
4661          *
4662          * But since hfs_systemfile_lock() doesn't actually take the lock on
4663          * the allocation file if a journal is active, ignore the check if the
4664          * file being copied is the allocation file.
4665          */
4666         struct cnode *cp = VTOC(vp);
4667         if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4668                 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4669
4670 #if CONFIG_PROTECT
4671         /* Prepare the CP blob and get it ready for use */
4672         if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4673                         cp_fs_protected (hfsmp->hfs_mp)) {
4674                 int cp_err = 0;
4675                 cp_err = cp_handle_relocate (cp);
4676                 if (cp_err) {
4677                         /*
4678                          * can't copy the file because we couldn't set up keys.
4679                          * bail out
4680                          */
4681                         return cp_err;
4682                 }
4683                 else {
4684                         cpenabled = 1;
4685                 }
4686         }
4687 #endif
4688
4689         /*
4690          * Determine the I/O size to use
4691          *
4692          * NOTE: Many external drives will result in an ioSize of 128KB.
4693          * TODO: Should we use a larger buffer, doing several consecutive
4694          * reads, then several consecutive writes?
4695          */
4696         vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4697         bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4698         if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4699                 return ENOMEM;
4700
4701         /* Get a buffer for doing the I/O */
4702         bp = buf_alloc(hfsmp->hfs_devvp);
4703         buf_setdataptr(bp, (uintptr_t)buffer);
4704
4705         resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4706         srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4707         destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4708         while (resid > 0) {
4709                 ioSize = MIN(bufferSize, (size_t) resid);
4710                 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4711
4712                 /* Prepare the buffer for reading */
4713                 buf_reset(bp, B_READ);
4714                 buf_setsize(bp, ioSize);
4715                 buf_setcount(bp, ioSize);
4716                 buf_setblkno(bp, srcSector);
4717                 buf_setlblkno(bp, srcSector);
4718
4719                 /* Attach the CP to the buffer */
4720 #if CONFIG_PROTECT
4721                 if (cpenabled) {
4722                         buf_setcpaddr (bp, cp->c_cpentry);
4723                 }
4724 #endif
4725
4726                 /* Do the read */
4727                 err = VNOP_STRATEGY(bp);
4728                 if (!err)
4729                         err = buf_biowait(bp);
4730                 if (err) {
4731                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4732                         break;
4733                 }
4734
4735                 /* Prepare the buffer for writing */
4736                 buf_reset(bp, B_WRITE);
4737                 buf_setsize(bp, ioSize);
4738                 buf_setcount(bp, ioSize);
4739                 buf_setblkno(bp, destSector);
4740                 buf_setlblkno(bp, destSector);
4741                 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4742                         buf_markfua(bp);
4743
4744 #if CONFIG_PROTECT
4745                 /* Attach the CP to the buffer */
4746                 if (cpenabled) {
4747                         buf_setcpaddr (bp, cp->c_cpentry);
4748                 }
4749 #endif
4750
4751                 /* Do the write */
4752                 vnode_startwrite(hfsmp->hfs_devvp);
4753                 err = VNOP_STRATEGY(bp);
4754                 if (!err)
4755                         err = buf_biowait(bp);
4756                 if (err) {
4757                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4758                         break;
4759                 }
4760
4761                 resid -= ioSize;
4762                 srcSector += ioSizeSectors;
4763                 destSector += ioSizeSectors;
4764         }
4765         if (bp)
4766                 buf_free(bp);
4767         if (buffer)
4768                 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4769
4770         /* Make sure all writes have been flushed to disk. */
4771         if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4772                 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4773                 if (err) {
4774                         printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4775                         err = 0;        /* Don't fail the copy. */
4776                 }
4777         }
4778
4779         if (!err)
4780                 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4781
4782         return err;
4783 }
4784
4785
4786 /* Structure to store state of reclaiming extents from a
4787  * given file.  hfs_reclaim_file()/hfs_reclaim_xattr()
4788  * initializes the values in this structure which are then
4789  * used by code that reclaims and splits the extents.
4790  */
4791 struct hfs_reclaim_extent_info {
4792         struct vnode *vp;
4793         u_int32_t fileID;
4794         u_int8_t forkType;
4795         u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
4796         u_int8_t is_sysfile;                 /* Extent belongs to system file */
4797         u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
4798         u_int8_t extent_index;
4799         int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
4800         u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
4801         u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
4802         u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
4803         struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
4804         union record {
4805                 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4806                 HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
4807         } record;
4808         HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
4809                                               * For catalog extent record, points to the correct
4810                                               * extent information in filefork.  For overflow extent
4811                                               * record, or xattr record, points to extent record
4812                                               * in the structure above
4813                                               */
4814         struct cat_desc *dirlink_desc;
4815         struct cat_attr *dirlink_attr;
4816         struct filefork *dirlink_fork;        /* For directory hard links, fp points actually to this */
4817         struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr()
4818                                                * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4819                                                * use it for writing updated extent record
4820                                                */
4821         struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
4822         u_int16_t recordlen;
4823         int overflow_count;                   /* For debugging, counter for overflow extent record */
4824         FCB *fcb;                             /* Pointer to the current btree being traversed */
4825 };
4826
4827 /*
4828  * Split the current extent into two extents, with first extent
4829  * to contain given number of allocation blocks.  Splitting of
4830  * extent creates one new extent entry which can result in
4831  * shifting of many entries through all the extent records of a
4832  * file, and/or creating a new extent record in the overflow
4833  * extent btree.
4834  *
4835  * Example:
4836  * The diagram below represents two consecutive extent records,
4837  * for simplicity, lets call them record X and X+1 respectively.
4838  * Interesting extent entries have been denoted by letters.
4839  * If the letter is unchanged before and after split, it means
4840  * that the extent entry was not modified during the split.
4841  * A '.' means that the entry remains unchanged after the split
4842  * and is not relevant for our example.  A '0' means that the
4843  * extent entry is empty.
4844  *
4845  * If there isn't sufficient contiguous free space to relocate
4846  * an extent (extent "C" below), we will have to break the one
4847  * extent into multiple smaller extents, and relocate each of
4848  * the smaller extents individually.  The way we do this is by
4849  * finding the largest contiguous free space that is currently
4850  * available (N allocation blocks), and then convert extent "C"
4851  * into two extents, C1 and C2, that occupy exactly the same
4852  * allocation blocks as extent C.  Extent C1 is the first
4853  * N allocation blocks of extent C, and extent C2 is the remainder
4854  * of extent C.  Then we can relocate extent C1 since we know
4855  * we have enough contiguous free space to relocate it in its
4856  * entirety.  We then repeat the process starting with extent C2.
4857  *
4858  * In record X, only the entries following entry C are shifted, and
4859  * the original entry C is replaced with two entries C1 and C2 which
4860  * are actually two extent entries for contiguous allocation blocks.
4861  *
4862  * Note that the entry E from record X is shifted into record X+1 as
4863  * the new first entry.  Since the first entry of record X+1 is updated,
4864  * the FABN will also get updated with the blockCount of entry E.
4865  * This also results in shifting of all extent entries in record X+1.
4866  * Note that the number of empty entries after the split has been
4867  * changed from 3 to 2.
4868  *
4869  * Before:
4870  *               record X                           record X+1
4871  *  ---------------------===---------     ---------------------------------
4872  *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
4873  *  ---------------------===---------     ---------------------------------
4874  *
4875  * After:
4876  *  ---------------------=======-----     ---------------------------------
4877  *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
4878  *  ---------------------=======-----     ---------------------------------
4879  *
4880  *  C1.startBlock = C.startBlock
4881  *  C1.blockCount = N
4882  *
4883  *  C2.startBlock = C.startBlock + N
4884  *  C2.blockCount = C.blockCount - N
4885  *
4886  *                                        FABN = old FABN - E.blockCount
4887  *
4888  * Inputs:
4889  *      extent_info - This is the structure that contains state about
4890  *                    the current file, extent, and extent record that
4891  *                    is being relocated.  This structure is shared
4892  *                    among code that traverses through all the extents
4893  *                    of the file, code that relocates extents, and
4894  *                    code that splits the extent.
4895  * Output:
4896  *      Zero on success, non-zero on failure.
4897  */
4898 static int
4899 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4900 {
4901         int error = 0;
4902         int index = extent_info->extent_index;
4903         int i;
4904         HFSPlusExtentDescriptor shift_extent;
4905         HFSPlusExtentDescriptor last_extent;
4906         HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4907         HFSPlusExtentRecord *extents_rec = NULL;
4908         HFSPlusExtentKey *extents_key = NULL;
4909         HFSPlusAttrRecord *xattr_rec = NULL;
4910         HFSPlusAttrKey *xattr_key = NULL;
4911         struct BTreeIterator iterator;
4912         struct FSBufferDescriptor btdata;
4913         uint16_t reclen;
4914         uint32_t read_recStartBlock;    /* Starting allocation block number to read old extent record */
4915         uint32_t write_recStartBlock;   /* Starting allocation block number to insert newly updated extent record */
4916         Boolean create_record = false;
4917         Boolean is_xattr;
4918
4919         is_xattr = extent_info->is_xattr;
4920         extents = extent_info->extents;
4921
4922         if (hfs_resize_debug) {
4923                 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4924         }
4925
4926         /* Determine the starting allocation block number for the following
4927          * overflow extent record, if any, before the current record
4928          * gets modified.
4929          */
4930         read_recStartBlock = extent_info->recStartBlock;
4931         for (i = 0; i < kHFSPlusExtentDensity; i++) {
4932                 if (extents[i].blockCount == 0) {
4933                         break;
4934                 }
4935                 read_recStartBlock += extents[i].blockCount;
4936         }
4937
4938         /* Shift and split */
4939         if (index == kHFSPlusExtentDensity-1) {
4940                 /* The new extent created after split will go into following overflow extent record */
4941                 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4942                 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4943
4944                 /* Last extent in the record will be split, so nothing to shift */
4945         } else {
4946                 /* Splitting of extents can result in at most of one
4947                  * extent entry to be shifted into following overflow extent
4948                  * record.  So, store the last extent entry for later.
4949                  */
4950                 shift_extent = extents[kHFSPlusExtentDensity-1];
4951
4952                 /* Start shifting extent information from the end of the extent
4953                  * record to the index where we want to insert the new extent.
4954                  * Note that kHFSPlusExtentDensity-1 is already saved above, and
4955                  * does not need to be shifted.  The extent entry that is being
4956                  * split does not get shifted.
4957                  */
4958                 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4959                         if (hfs_resize_debug) {
4960                                 if (extents[i].blockCount) {
4961                                         printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4962                                 }
4963                         }
4964                         extents[i+1] = extents[i];
4965                 }
4966         }
4967
4968         if (index == kHFSPlusExtentDensity-1) {
4969                 /* The second half of the extent being split will be the overflow
4970                  * entry that will go into following overflow extent record.  The
4971                  * value has been stored in 'shift_extent' above, so there is
4972                  * nothing to be done here.
4973                  */
4974         } else {
4975                 /* Update the values in the second half of the extent being split
4976                  * before updating the first half of the split.  Note that the
4977                  * extent to split or first half of the split is at index 'index'
4978                  * and a new extent or second half of the split will be inserted at
4979                  * 'index+1' or into following overflow extent record.
4980                  */
4981                 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
4982                 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
4983         }
4984         /* Update the extent being split, only the block count will change */
4985         extents[index].blockCount = newBlockCount;
4986
4987         if (hfs_resize_debug) {
4988                 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
4989                 if (index != kHFSPlusExtentDensity-1) {
4990                         printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
4991                 } else {
4992                         printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
4993                 }
4994         }
4995
4996         /* If the newly split extent is for large EAs or in overflow extent
4997          * record, so update it directly in the btree using the iterator
4998          * information from the shared extent_info structure
4999          */
5000         if (extent_info->catalog_fp == NULL) {
5001                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5002                                 &(extent_info->btdata), extent_info->recordlen);
5003                 if (error) {
5004                         printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5005                         goto out;
5006                 }
5007         }
5008
5009         /* No extent entry to be shifted into another extent overflow record */
5010         if (shift_extent.blockCount == 0) {
5011                 if (hfs_resize_debug) {
5012                         printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5013                 }
5014                 error = 0;
5015                 goto out;
5016         }
5017
5018         /* The overflow extent entry has to be shifted into an extent
5019          * overflow record.  This would mean that we have to shift
5020          * extent entries from all overflow records by one.  We will
5021          * start iteration from the first record to the last record,
5022          * and shift the extent entry from one record to another.
5023          * We might have to create a new record for the last extent
5024          * entry for the file.
5025          */
5026
5027         /* Initialize iterator to search the next record */
5028         bzero(&iterator, sizeof(iterator));
5029         if (is_xattr) {
5030                 /* Copy the key from the iterator that was to update the modified attribute record. */
5031                 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5032                 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5033                 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5034
5035                 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5036                                 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5037                 if (xattr_rec == NULL) {
5038                         error = ENOMEM;
5039                         goto out;
5040                 }
5041                 btdata.bufferAddress = xattr_rec;
5042                 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5043                 btdata.itemCount = 1;
5044                 extents = xattr_rec->overflowExtents.extents;
5045         } else {
5046                 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5047                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5048                 extents_key->forkType = extent_info->forkType;
5049                 extents_key->fileID = extent_info->fileID;
5050                 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5051
5052                 MALLOC(extents_rec, HFSPlusExtentRecord *,
5053                                 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5054                 if (extents_rec == NULL) {
5055                         error = ENOMEM;
5056                         goto out;
5057                 }
5058                 btdata.bufferAddress = extents_rec;
5059                 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5060                 btdata.itemCount = 1;
5061                 extents = extents_rec[0];
5062         }
5063
5064         /* An extent entry still needs to be shifted into following overflow
5065          * extent record.  This will result in the starting allocation block
5066          * number of the extent record being changed which is part of the key
5067          * for the extent record.  Since the extent record key is changing,
5068          * the record can not be updated, instead has to be deleted and
5069          * inserted again.
5070          */
5071         while (shift_extent.blockCount) {
5072                 if (hfs_resize_debug) {
5073                         printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5074                 }
5075
5076                 /* Search if there is any existing overflow extent record.
5077                  * For this, the logical start block number in the key is
5078                  * the value calculated based on the logical start block
5079                  * number of the current extent record and the total number
5080                  * of blocks existing in the current extent record.
5081                  */
5082                 if (is_xattr) {
5083                         xattr_key->startBlock = read_recStartBlock;
5084                 } else {
5085                         extents_key->startBlock = read_recStartBlock;
5086                 }
5087                 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5088                 if (error) {
5089                         if (error != btNotFound) {
5090                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5091                                 goto out;
5092                         }
5093                         create_record = true;
5094                 }
5095
5096                 /* The extra extent entry from the previous record is being inserted
5097                  * as the first entry in the current extent record.  This will change
5098                  * the file allocation block number (FABN) of the current extent
5099                  * record, which is the startBlock value from the extent record key.
5100                  * Since one extra entry is being inserted in the record, the new
5101                  * FABN for the record will less than old FABN by the number of blocks
5102                  * in the new extent entry being inserted at the start.  We have to
5103                  * do this before we update read_recStartBlock to point at the
5104                  * startBlock of the following record.
5105                  */
5106                 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5107                 if (hfs_resize_debug) {
5108                         if (create_record) {
5109                                 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5110                         }
5111                 }
5112
5113                 /* Now update the read_recStartBlock to account for total number
5114                  * of blocks in this extent record.  It will now point to the
5115                  * starting allocation block number for the next extent record.
5116                  */
5117                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5118                         if (extents[i].blockCount == 0) {
5119                                 break;
5120                         }
5121                         read_recStartBlock += extents[i].blockCount;
5122                 }
5123
5124                 if (create_record == true) {
5125                         /* Initialize new record content with only one extent entry */
5126                         bzero(extents, sizeof(HFSPlusExtentRecord));
5127                         /* The new record will contain only one extent entry */
5128                         extents[0] = shift_extent;
5129                         /* There are no more overflow extents to be shifted */
5130                         shift_extent.startBlock = shift_extent.blockCount = 0;
5131
5132                         if (is_xattr) {
5133                                 xattr_rec->recordType = kHFSPlusAttrExtents;
5134                                 xattr_rec->overflowExtents.reserved = 0;
5135                                 reclen = sizeof(HFSPlusAttrExtents);
5136                         } else {
5137                                 reclen = sizeof(HFSPlusExtentRecord);
5138                         }
5139                 } else {
5140                         /* The overflow extent entry from previous record will be
5141                          * the first entry in this extent record.  If the last
5142                          * extent entry in this record is valid, it will be shifted
5143                          * into the following extent record as its first entry.  So
5144                          * save the last entry before shifting entries in current
5145                          * record.
5146                          */
5147                         last_extent = extents[kHFSPlusExtentDensity-1];
5148
5149                         /* Shift all entries by one index towards the end */
5150                         for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5151                                 extents[i+1] = extents[i];
5152                         }
5153
5154                         /* Overflow extent entry saved from previous record
5155                          * is now the first entry in the current record.
5156                          */
5157                         extents[0] = shift_extent;
5158
5159                         if (hfs_resize_debug) {
5160                                 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5161                         }
5162
5163                         /* The last entry from current record will be the
5164                          * overflow entry which will be the first entry for
5165                          * the following extent record.
5166                          */
5167                         shift_extent = last_extent;
5168
5169                         /* Since the key->startBlock is being changed for this record,
5170                          * it should be deleted and inserted with the new key.
5171                          */
5172                         error = BTDeleteRecord(extent_info->fcb, &iterator);
5173                         if (error) {
5174                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5175                                 goto out;
5176                         }
5177                         if (hfs_resize_debug) {
5178                                 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5179                         }
5180                 }
5181
5182                 /* Insert the newly created or modified extent record */
5183                 bzero(&iterator.hint, sizeof(iterator.hint));
5184                 if (is_xattr) {
5185                         xattr_key->startBlock = write_recStartBlock;
5186                 } else {
5187                         extents_key->startBlock = write_recStartBlock;
5188                 }
5189                 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5190                 if (error) {
5191                         printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5192                         goto out;
5193                 }
5194                 if (hfs_resize_debug) {
5195                         printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5196                 }
5197         }
5198         BTFlushPath(extent_info->fcb);
5199 out:
5200         if (extents_rec) {
5201                 FREE (extents_rec, M_TEMP);
5202         }
5203         if (xattr_rec) {
5204                 FREE (xattr_rec, M_TEMP);
5205         }
5206         return error;
5207 }
5208
5209
5210 /*
5211  * Relocate an extent if it lies beyond the expected end of volume.
5212  *
5213  * This function is called for every extent of the file being relocated.
5214  * It allocates space for relocation, copies the data, deallocates
5215  * the old extent, and update corresponding on-disk extent.  If the function
5216  * does not find contiguous space to  relocate an extent, it splits the
5217  * extent in smaller size to be able to relocate it out of the area of
5218  * disk being reclaimed.  As an optimization, if an extent lies partially
5219  * in the area of the disk being reclaimed, it is split so that we only
5220  * have to relocate the area that was overlapping with the area of disk
5221  * being reclaimed.
5222  *
5223  * Note that every extent is relocated in its own transaction so that
5224  * they do not overwhelm the journal.  This function handles the extent
5225  * record that exists in the catalog record, extent record from overflow
5226  * extents btree, and extents for large EAs.
5227  *
5228  * Inputs:
5229  *      extent_info - This is the structure that contains state about
5230  *                    the current file, extent, and extent record that
5231  *                    is being relocated.  This structure is shared
5232  *                    among code that traverses through all the extents
5233  *                    of the file, code that relocates extents, and
5234  *                    code that splits the extent.
5235  */
5236 static int
5237 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5238 {
5239         int error = 0;
5240         int index;
5241         struct cnode *cp;
5242         u_int32_t oldStartBlock;
5243         u_int32_t oldBlockCount;
5244         u_int32_t newStartBlock;
5245         u_int32_t newBlockCount;
5246         u_int32_t alloc_flags;
5247         int blocks_allocated = false;
5248
5249         index = extent_info->extent_index;
5250         cp = VTOC(extent_info->vp);
5251
5252         oldStartBlock = extent_info->extents[index].startBlock;
5253         oldBlockCount = extent_info->extents[index].blockCount;
5254
5255         if (0 && hfs_resize_debug) {
5256                 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5257         }
5258
5259         /* Check if the current extent lies completely within allocLimit */
5260         if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5261                 extent_info->cur_blockCount += oldBlockCount;
5262                 return error;
5263         }
5264
5265         /* Every extent should be relocated in its own transaction
5266          * to make sure that we don't overflow the journal buffer.
5267          */
5268         error = hfs_start_transaction(hfsmp);
5269         if (error) {
5270                 return error;
5271         }
5272         extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5273
5274         /* Check if the extent lies partially in the area to reclaim,
5275          * i.e. it starts before allocLimit and ends beyond allocLimit.
5276          * We have already skipped extents that lie completely within
5277          * allocLimit in the check above, so we only check for the
5278          * startBlock.  If it lies partially, split it so that we
5279          * only relocate part of the extent.
5280          */
5281         if (oldStartBlock < allocLimit) {
5282                 newBlockCount = allocLimit - oldStartBlock;
5283                 error = hfs_split_extent(extent_info, newBlockCount);
5284                 if (error == 0) {
5285                         /* After successful split, the current extent does not
5286                          * need relocation, so just return back.
5287                          */
5288                         goto out;
5289                 }
5290                 /* Ignore error and try relocating the entire extent instead */
5291         }
5292
5293         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5294         if (extent_info->is_sysfile) {
5295                 alloc_flags |= HFS_ALLOC_METAZONE;
5296         }
5297
5298         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5299                         &newStartBlock, &newBlockCount);
5300         if ((extent_info->is_sysfile == false) &&
5301             ((error == dskFulErr) || (error == ENOSPC))) {
5302                 /* For non-system files, try reallocating space in metadata zone */
5303                 alloc_flags |= HFS_ALLOC_METAZONE;
5304                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5305                                 alloc_flags, &newStartBlock, &newBlockCount);
5306         }
5307         if ((error == dskFulErr) || (error == ENOSPC)) {
5308                 /* We did not find desired contiguous space for this extent.
5309                  * So try to allocate the maximum contiguous space available.
5310                  */
5311                 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5312
5313                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5314                                 alloc_flags, &newStartBlock, &newBlockCount);
5315                 if (error) {
5316                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5317                         goto out;
5318                 }
5319                 blocks_allocated = true;
5320
5321                 error = hfs_split_extent(extent_info, newBlockCount);
5322                 if (error) {
5323                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5324                         goto out;
5325                 }
5326                 oldBlockCount = newBlockCount;
5327         }
5328         if (error) {
5329                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5330                 goto out;
5331         }
5332         blocks_allocated = true;
5333
5334         /* Copy data from old location to new location */
5335         error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5336                         newStartBlock, newBlockCount, context);
5337         if (error) {
5338                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5339                 goto out;
5340         }
5341
5342         /* Update the extent record with the new start block information */
5343         extent_info->extents[index].startBlock = newStartBlock;
5344
5345         /* Sync the content back to the disk */
5346         if (extent_info->catalog_fp) {
5347                 /* Update the extents in catalog record */
5348                 if (extent_info->is_dirlink) {
5349                         error = cat_update_dirlink(hfsmp, extent_info->forkType,
5350                                         extent_info->dirlink_desc, extent_info->dirlink_attr,
5351                                         &(extent_info->dirlink_fork->ff_data));
5352                 } else {
5353                         cp->c_flag |= C_MODIFIED;
5354                         /* If this is a system file, sync volume headers on disk */
5355                         if (extent_info->is_sysfile) {
5356                                 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5357                         }
5358                 }
5359         } else {
5360                 /* Replace record for extents overflow or extents-based xattrs */
5361                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5362                                 &(extent_info->btdata), extent_info->recordlen);
5363         }
5364         if (error) {
5365                 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5366                 goto out;
5367         }
5368
5369         /* Deallocate the old extent */
5370         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5371         if (error) {
5372                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5373                 goto out;
5374         }
5375         extent_info->blocks_relocated += newBlockCount;
5376
5377         if (hfs_resize_debug) {
5378                 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5379         }
5380
5381 out:
5382         if (error != 0) {
5383                 if (blocks_allocated == true) {
5384                         BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5385                 }
5386         } else {
5387                 /* On success, increment the total allocation blocks processed */
5388                 extent_info->cur_blockCount += newBlockCount;
5389         }
5390
5391         hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5392
5393         /* For a non-system file, if an extent entry from catalog record
5394          * was modified, sync the in-memory changes to the catalog record
5395          * on disk before ending the transaction.
5396          */
5397         if ((error == 0) &&
5398             (extent_info->overflow_count < kHFSPlusExtentDensity) &&
5399             (extent_info->is_sysfile == false)) {
5400                 (void) hfs_update(extent_info->vp, MNT_WAIT);
5401         }
5402
5403         hfs_end_transaction(hfsmp);
5404
5405         return error;
5406 }
5407
5408 /* Report intermediate progress during volume resize */
5409 static void
5410 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5411 {
5412         u_int32_t cur_progress;
5413
5414         hfs_resize_progress(hfsmp, &cur_progress);
5415         if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5416                 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5417                 hfsmp->hfs_resize_progress = cur_progress;
5418         }
5419         return;
5420 }
5421
5422 /*
5423  * Reclaim space at the end of a volume for given file and forktype.
5424  *
5425  * This routine attempts to move any extent which contains allocation blocks
5426  * at or after "allocLimit."  A separate transaction is used for every extent
5427  * that needs to be moved.  If there is not contiguous space available for
5428  * moving an extent, it can be split into smaller extents.  The contents of
5429  * any moved extents are read and written via the volume's device vnode --
5430  * NOT via "vp."  During the move, moved blocks which are part of a transaction
5431  * have their physical block numbers invalidated so they will eventually be
5432  * written to their new locations.
5433  *
5434  * This function is also called for directory hard links.  Directory hard links
5435  * are regular files with no data fork and resource fork that contains alias
5436  * information for backward compatibility with pre-Leopard systems.  However
5437  * non-Mac OS X implementation can add/modify data fork or resource fork
5438  * information to directory hard links, so we check, and if required, relocate
5439  * both data fork and resource fork.
5440  *
5441  * Inputs:
5442  *    hfsmp       The volume being resized.
5443  *    vp          The vnode for the system file.
5444  *    fileID      ID of the catalog record that needs to be relocated
5445  *    forktype    The type of fork that needs relocated,
5446  *                      kHFSResourceForkType for resource fork,
5447  *                      kHFSDataForkType for data fork
5448  *    allocLimit  Allocation limit for the new volume size,
5449  *                do not use this block or beyond.  All extents
5450  *                that use this block or any blocks beyond this limit
5451  *                will be relocated.
5452  *
5453  * Side Effects:
5454  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5455  * blocks that were relocated.
5456  */
5457 static int
5458 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5459                 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5460 {
5461         int error = 0;
5462         struct hfs_reclaim_extent_info *extent_info;
5463         int i;
5464         int lockflags = 0;
5465         struct cnode *cp;
5466         struct filefork *fp;
5467         int took_truncate_lock = false;
5468         int release_desc = false;
5469         HFSPlusExtentKey *key;
5470
5471         /* If there is no vnode for this file, then there's nothing to do. */
5472         if (vp == NULL) {
5473                 return 0;
5474         }
5475
5476         cp = VTOC(vp);
5477
5478         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5479                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5480         if (extent_info == NULL) {
5481                 return ENOMEM;
5482         }
5483         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5484         extent_info->vp = vp;
5485         extent_info->fileID = fileID;
5486         extent_info->forkType = forktype;
5487         extent_info->is_sysfile = vnode_issystem(vp);
5488         if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5489                 extent_info->is_dirlink = true;
5490         }
5491         /* We always need allocation bitmap and extent btree lock */
5492         lockflags = SFL_BITMAP | SFL_EXTENTS;
5493         if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5494                 lockflags |= SFL_CATALOG;
5495         } else if (fileID == kHFSAttributesFileID) {
5496                 lockflags |= SFL_ATTRIBUTE;
5497         } else if (fileID == kHFSStartupFileID) {
5498                 lockflags |= SFL_STARTUP;
5499         }
5500         extent_info->lockflags = lockflags;
5501         extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5502
5503         /* Flush data associated with current file on disk.
5504          *
5505          * If the current vnode is directory hard link, no flushing of
5506          * journal or vnode is required.  The current kernel does not
5507          * modify data/resource fork of directory hard links, so nothing
5508          * will be in the cache.  If a directory hard link is newly created,
5509          * the resource fork data is written directly using devvp and
5510          * the code that actually relocates data (hfs_copy_extent()) also
5511          * uses devvp for its I/O --- so they will see a consistent copy.
5512          */
5513         if (extent_info->is_sysfile) {
5514                 /* If the current vnode is system vnode, flush journal
5515                  * to make sure that all data is written to the disk.
5516                  */
5517                 error = hfs_journal_flush(hfsmp, TRUE);
5518                 if (error) {
5519                         printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5520                         goto out;
5521                 }
5522         } else if (extent_info->is_dirlink == false) {
5523                 /* Flush all blocks associated with this regular file vnode.
5524                  * Normally there should not be buffer cache blocks for regular
5525                  * files, but for objects like symlinks, we can have buffer cache
5526                  * blocks associated with the vnode.  Therefore we call
5527                  * buf_flushdirtyblks() also.
5528                  */
5529                 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5530
5531                 hfs_unlock(cp);
5532                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5533                 took_truncate_lock = true;
5534                 (void) cluster_push(vp, 0);
5535                 error = hfs_lock(cp, HFS_FORCE_LOCK);
5536                 if (error) {
5537                         goto out;
5538                 }
5539
5540                 /* If the file no longer exists, nothing left to do */
5541                 if (cp->c_flag & C_NOEXISTS) {
5542                         error = 0;
5543                         goto out;
5544                 }
5545
5546                 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5547                  * be copying consistent bits.  (Otherwise, it's possible that an async
5548                  * write will complete to the old extent after we read from it.  That
5549                  * could lead to corruption.)
5550                  */
5551                 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5552                 if (error) {
5553                         goto out;
5554                 }
5555         }
5556
5557         if (hfs_resize_debug) {
5558                 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5559         }
5560
5561         if (extent_info->is_dirlink) {
5562                 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5563                                 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5564                 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5565                                 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5566                 MALLOC(extent_info->dirlink_fork, struct filefork *,
5567                                 sizeof(struct filefork), M_TEMP, M_WAITOK);
5568                 if ((extent_info->dirlink_desc == NULL) ||
5569                     (extent_info->dirlink_attr == NULL) ||
5570                     (extent_info->dirlink_fork == NULL)) {
5571                         error = ENOMEM;
5572                         goto out;
5573                 }
5574
5575                 /* Lookup catalog record for directory hard link and
5576                  * create a fake filefork for the value looked up from
5577                  * the disk.
5578                  */
5579                 fp = extent_info->dirlink_fork;
5580                 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5581                 extent_info->dirlink_fork->ff_cp = cp;
5582                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5583                 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5584                                 extent_info->dirlink_desc, extent_info->dirlink_attr,
5585                                 &(extent_info->dirlink_fork->ff_data));
5586                 hfs_systemfile_unlock(hfsmp, lockflags);
5587                 if (error) {
5588                         printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5589                         goto out;
5590                 }
5591                 release_desc = true;
5592         } else {
5593                 fp = VTOF(vp);
5594         }
5595
5596         extent_info->catalog_fp = fp;
5597         extent_info->recStartBlock = 0;
5598         extent_info->extents = extent_info->catalog_fp->ff_extents;
5599         /* Relocate extents from the catalog record */
5600         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5601                 if (fp->ff_extents[i].blockCount == 0) {
5602                         break;
5603                 }
5604                 extent_info->extent_index = i;
5605                 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5606                 if (error) {
5607                         printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5608                         goto out;
5609                 }
5610         }
5611
5612         /* If the number of allocation blocks processed for reclaiming
5613          * are less than total number of blocks for the file, continuing
5614          * working on overflow extents record.
5615          */
5616         if (fp->ff_blocks <= extent_info->cur_blockCount) {
5617                 if (0 && hfs_resize_debug) {
5618                         printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5619                 }
5620                 goto out;
5621         }
5622
5623         if (hfs_resize_debug) {
5624                 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5625         }
5626
5627         MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5628         if (extent_info->iterator == NULL) {
5629                 error = ENOMEM;
5630                 goto out;
5631         }
5632         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5633         key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5634         key->keyLength = kHFSPlusExtentKeyMaximumLength;
5635         key->forkType = forktype;
5636         key->fileID = fileID;
5637         key->startBlock = extent_info->cur_blockCount;
5638
5639         extent_info->btdata.bufferAddress = extent_info->record.overflow;
5640         extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5641         extent_info->btdata.itemCount = 1;
5642
5643         extent_info->catalog_fp = NULL;
5644
5645         /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5646         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5647         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5648                         &(extent_info->btdata), &(extent_info->recordlen),
5649                         extent_info->iterator);
5650         hfs_systemfile_unlock(hfsmp, lockflags);
5651         while (error == 0) {
5652                 extent_info->overflow_count++;
5653                 extent_info->recStartBlock = key->startBlock;
5654                 extent_info->extents = extent_info->record.overflow;
5655                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5656                         if (extent_info->record.overflow[i].blockCount == 0) {
5657                                 goto out;
5658                         }
5659                         extent_info->extent_index = i;
5660                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5661                         if (error) {
5662                                 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5663                                 goto out;
5664                         }
5665                 }
5666
5667                 /* Look for more overflow records */
5668                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5669                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5670                                 extent_info->iterator, &(extent_info->btdata),
5671                                 &(extent_info->recordlen));
5672                 hfs_systemfile_unlock(hfsmp, lockflags);
5673                 if (error) {
5674                         break;
5675                 }
5676                 /* Stop when we encounter a different file or fork. */
5677                 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5678                         break;
5679                 }
5680         }
5681         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5682                 error = 0;
5683         }
5684
5685 out:
5686         /* If any blocks were relocated, account them and report progress */
5687         if (extent_info->blocks_relocated) {
5688                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5689                 hfs_truncatefs_progress(hfsmp);
5690                 if (fileID < kHFSFirstUserCatalogNodeID) {
5691                         printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5692                                         extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5693                 }
5694         }
5695         if (extent_info->iterator) {
5696                 FREE(extent_info->iterator, M_TEMP);
5697         }
5698         if (release_desc == true) {
5699                 cat_releasedesc(extent_info->dirlink_desc);
5700         }
5701         if (extent_info->dirlink_desc) {
5702                 FREE(extent_info->dirlink_desc, M_TEMP);
5703         }
5704         if (extent_info->dirlink_attr) {
5705                 FREE(extent_info->dirlink_attr, M_TEMP);
5706         }
5707         if (extent_info->dirlink_fork) {
5708                 FREE(extent_info->dirlink_fork, M_TEMP);
5709         }
5710         if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5711                 (void) hfs_update(vp, MNT_WAIT);
5712         }
5713         if (took_truncate_lock) {
5714                 hfs_unlock_truncate(cp, 0);
5715         }
5716         if (extent_info) {
5717                 FREE(extent_info, M_TEMP);
5718         }
5719         if (hfs_resize_debug) {
5720                 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5721         }
5722
5723         return error;
5724 }
5725
5726
5727 /*
5728  * This journal_relocate callback updates the journal info block to point
5729  * at the new journal location.  This write must NOT be done using the
5730  * transaction.  We must write the block immediately.  We must also force
5731  * it to get to the media so that the new journal location will be seen by
5732  * the replay code before we can safely let journaled blocks be written
5733  * to their normal locations.
5734  *
5735  * The tests for journal_uses_fua below are mildly hacky.  Since the journal
5736  * and the file system are both on the same device, I'm leveraging what
5737  * the journal has decided about FUA.
5738  */
5739 struct hfs_journal_relocate_args {
5740         struct hfsmount *hfsmp;
5741         vfs_context_t context;
5742         u_int32_t newStartBlock;
5743 };
5744
5745 static errno_t
5746 hfs_journal_relocate_callback(void *_args)
5747 {
5748         int error;
5749         struct hfs_journal_relocate_args *args = _args;
5750         struct hfsmount *hfsmp = args->hfsmp;
5751         buf_t bp;
5752         JournalInfoBlock *jibp;
5753
5754         error = buf_meta_bread(hfsmp->hfs_devvp,
5755                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5756                 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5757         if (error) {
5758                 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5759                 if (bp) {
5760                         buf_brelse(bp);
5761                 }
5762                 return error;
5763         }
5764         jibp = (JournalInfoBlock*) buf_dataptr(bp);
5765         jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5766         jibp->size = SWAP_BE64(hfsmp->jnl_size);
5767         if (journal_uses_fua(hfsmp->jnl))
5768                 buf_markfua(bp);
5769         error = buf_bwrite(bp);
5770         if (error) {
5771                 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5772                 return error;
5773         }
5774         if (!journal_uses_fua(hfsmp->jnl)) {
5775                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5776                 if (error) {
5777                         printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5778                         error = 0;              /* Don't fail the operation. */
5779                 }
5780         }
5781
5782         return error;
5783 }
5784
5785
5786 static int
5787 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5788 {
5789         int error;
5790         int journal_err;
5791         int lockflags;
5792         u_int32_t oldStartBlock;
5793         u_int32_t newStartBlock;
5794         u_int32_t oldBlockCount;
5795         u_int32_t newBlockCount;
5796         struct cat_desc journal_desc;
5797         struct cat_attr journal_attr;
5798         struct cat_fork journal_fork;
5799         struct hfs_journal_relocate_args callback_args;
5800
5801         if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5802                 /* The journal does not require relocation */
5803                 return 0;
5804         }
5805
5806         error = hfs_start_transaction(hfsmp);
5807         if (error) {
5808                 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5809                 return error;
5810         }
5811         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5812
5813         oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5814
5815         /* TODO: Allow the journal to change size based on the new volume size. */
5816         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5817                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5818                          &newStartBlock, &newBlockCount);
5819         if (error) {
5820                 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5821                 goto fail;
5822         }
5823         if (newBlockCount != oldBlockCount) {
5824                 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5825                 goto free_fail;
5826         }
5827
5828         error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5829         if (error) {
5830                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5831                 goto free_fail;
5832         }
5833
5834         /* Update the catalog record for .journal */
5835         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5836         if (error) {
5837                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5838                 goto free_fail;
5839         }
5840         oldStartBlock = journal_fork.cf_extents[0].startBlock;
5841         journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5842         journal_fork.cf_extents[0].startBlock = newStartBlock;
5843         journal_fork.cf_extents[0].blockCount = newBlockCount;
5844         journal_fork.cf_blocks = newBlockCount;
5845         error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
5846         cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
5847         if (error) {
5848                 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
5849                 goto free_fail;
5850         }
5851         callback_args.hfsmp = hfsmp;
5852         callback_args.context = context;
5853         callback_args.newStartBlock = newStartBlock;
5854
5855         error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
5856                 (off_t)newBlockCount*hfsmp->blockSize, 0,
5857                 hfs_journal_relocate_callback, &callback_args);
5858         if (error) {
5859                 /* NOTE: journal_relocate will mark the journal invalid. */
5860                 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
5861                 goto fail;
5862         }
5863         hfsmp->jnl_start = newStartBlock;
5864         hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
5865
5866         hfs_systemfile_unlock(hfsmp, lockflags);
5867         error = hfs_end_transaction(hfsmp);
5868         if (error) {
5869                 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
5870         }
5871
5872         /* Account for the blocks relocated and print progress */
5873         hfsmp->hfs_resize_blocksmoved += oldBlockCount;
5874         hfs_truncatefs_progress(hfsmp);
5875         if (!error) {
5876                 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
5877                                 oldBlockCount, hfsmp->vcbVN);
5878                 if (hfs_resize_debug) {
5879                         printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5880                 }
5881         }
5882         return error;
5883
5884 free_fail:
5885         journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5886         if (journal_err) {
5887                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5888                 hfs_mark_volume_inconsistent(hfsmp);
5889         }
5890 fail:
5891         hfs_systemfile_unlock(hfsmp, lockflags);
5892         (void) hfs_end_transaction(hfsmp);
5893         if (hfs_resize_debug) {
5894                 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
5895         }
5896         return error;
5897 }
5898
5899
5900 /*
5901  * Move the journal info block to a new location.  We have to make sure the
5902  * new copy of the journal info block gets to the media first, then change
5903  * the field in the volume header and the catalog record.
5904  */
5905 static int
5906 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5907 {
5908         int error;
5909         int journal_err;
5910         int lockflags;
5911         u_int32_t oldBlock;
5912         u_int32_t newBlock;
5913         u_int32_t blockCount;
5914         struct cat_desc jib_desc;
5915         struct cat_attr jib_attr;
5916         struct cat_fork jib_fork;
5917         buf_t old_bp, new_bp;
5918
5919         if (hfsmp->vcbJinfoBlock <= allocLimit) {
5920                 /* The journal info block does not require relocation */
5921                 return 0;
5922         }
5923
5924         error = hfs_start_transaction(hfsmp);
5925         if (error) {
5926                 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
5927                 return error;
5928         }
5929         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5930
5931         error = BlockAllocate(hfsmp, 1, 1, 1,
5932                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5933                         &newBlock, &blockCount);
5934         if (error) {
5935                 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
5936                 goto fail;
5937         }
5938         if (blockCount != 1) {
5939                 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
5940                 goto free_fail;
5941         }
5942         error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
5943         if (error) {
5944                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
5945                 goto free_fail;
5946         }
5947
5948         /* Copy the old journal info block content to the new location */
5949         error = buf_meta_bread(hfsmp->hfs_devvp,
5950                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5951                 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
5952         if (error) {
5953                 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
5954                 if (old_bp) {
5955                         buf_brelse(old_bp);
5956                 }
5957                 goto free_fail;
5958         }
5959         new_bp = buf_getblk(hfsmp->hfs_devvp,
5960                 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5961                 hfsmp->blockSize, 0, 0, BLK_META);
5962         bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
5963         buf_brelse(old_bp);
5964         if (journal_uses_fua(hfsmp->jnl))
5965                 buf_markfua(new_bp);
5966         error = buf_bwrite(new_bp);
5967         if (error) {
5968                 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
5969                 goto free_fail;
5970         }
5971         if (!journal_uses_fua(hfsmp->jnl)) {
5972                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5973                 if (error) {
5974                         printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5975                         /* Don't fail the operation. */
5976                 }
5977         }
5978
5979         /* Update the catalog record for .journal_info_block */
5980         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
5981         if (error) {
5982                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5983                 goto fail;
5984         }
5985         oldBlock = jib_fork.cf_extents[0].startBlock;
5986         jib_fork.cf_size = hfsmp->blockSize;
5987         jib_fork.cf_extents[0].startBlock = newBlock;
5988         jib_fork.cf_extents[0].blockCount = 1;
5989         jib_fork.cf_blocks = 1;
5990         error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
5991         cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
5992         if (error) {
5993                 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
5994                 goto fail;
5995         }
5996
5997         /* Update the pointer to the journal info block in the volume header. */
5998         hfsmp->vcbJinfoBlock = newBlock;
5999         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6000         if (error) {
6001                 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6002                 goto fail;
6003         }
6004         hfs_systemfile_unlock(hfsmp, lockflags);
6005         error = hfs_end_transaction(hfsmp);
6006         if (error) {
6007                 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6008         }
6009         error = hfs_journal_flush(hfsmp, FALSE);
6010         if (error) {
6011                 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6012         }
6013
6014         /* Account for the block relocated and print progress */
6015         hfsmp->hfs_resize_blocksmoved += 1;
6016         hfs_truncatefs_progress(hfsmp);
6017         if (!error) {
6018                 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6019                                 hfsmp->vcbVN);
6020                 if (hfs_resize_debug) {
6021                         printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6022                 }
6023         }
6024         return error;
6025
6026 free_fail:
6027         journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6028         if (journal_err) {
6029                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6030                 hfs_mark_volume_inconsistent(hfsmp);
6031         }
6032
6033 fail:
6034         hfs_systemfile_unlock(hfsmp, lockflags);
6035         (void) hfs_end_transaction(hfsmp);
6036         if (hfs_resize_debug) {
6037                 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6038         }
6039         return error;
6040 }
6041
6042
6043 /*
6044  * This function traverses through all extended attribute records for a given
6045  * fileID, and calls function that reclaims data blocks that exist in the
6046  * area of the disk being reclaimed which in turn is responsible for allocating
6047  * new space, copying extent data, deallocating new space, and if required,
6048  * splitting the extent.
6049  *
6050  * Note: The caller has already acquired the cnode lock on the file.  Therefore
6051  * we are assured that no other thread would be creating/deleting/modifying
6052  * extended attributes for this file.
6053  *
6054  * Side Effects:
6055  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6056  * blocks that were relocated.
6057  *
6058  * Returns:
6059  *      0 on success, non-zero on failure.
6060  */
6061 static int
6062 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6063 {
6064         int error = 0;
6065         struct hfs_reclaim_extent_info *extent_info;
6066         int i;
6067         HFSPlusAttrKey *key;
6068         int *lockflags;
6069
6070         if (hfs_resize_debug) {
6071                 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6072         }
6073
6074         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6075                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6076         if (extent_info == NULL) {
6077                 return ENOMEM;
6078         }
6079         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6080         extent_info->vp = vp;
6081         extent_info->fileID = fileID;
6082         extent_info->is_xattr = true;
6083         extent_info->is_sysfile = vnode_issystem(vp);
6084         extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6085         lockflags = &(extent_info->lockflags);
6086         *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6087
6088         /* Initialize iterator from the extent_info structure */
6089         MALLOC(extent_info->iterator, struct BTreeIterator *,
6090                sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6091         if (extent_info->iterator == NULL) {
6092                 error = ENOMEM;
6093                 goto out;
6094         }
6095         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6096
6097         /* Build attribute key */
6098         key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6099         error = hfs_buildattrkey(fileID, NULL, key);
6100         if (error) {
6101                 goto out;
6102         }
6103
6104         /* Initialize btdata from extent_info structure.  Note that the
6105          * buffer pointer actually points to the xattr record from the
6106          * extent_info structure itself.
6107          */
6108         extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6109         extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6110         extent_info->btdata.itemCount = 1;
6111
6112         /*
6113          * Sync all extent-based attribute data to the disk.
6114          *
6115          * All extent-based attribute data I/O is performed via cluster
6116          * I/O using a virtual file that spans across entire file system
6117          * space.
6118          */
6119         hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6120         (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6121         error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6122         hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6123         if (error) {
6124                 goto out;
6125         }
6126
6127         /* Search for extended attribute for current file.  This
6128          * will place the iterator before the first matching record.
6129          */
6130         *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6131         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6132                         &(extent_info->btdata), &(extent_info->recordlen),
6133                         extent_info->iterator);
6134         hfs_systemfile_unlock(hfsmp, *lockflags);
6135         if (error) {
6136                 if (error != btNotFound) {
6137                         goto out;
6138                 }
6139                 /* btNotFound is expected here, so just mask it */
6140                 error = 0;
6141         }
6142
6143         while (1) {
6144                 /* Iterate to the next record */
6145                 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6146                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6147                                 extent_info->iterator, &(extent_info->btdata),
6148                                 &(extent_info->recordlen));
6149                 hfs_systemfile_unlock(hfsmp, *lockflags);
6150
6151                 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6152                 if (error || key->fileID != fileID) {
6153                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6154                                 error = 0;
6155                         }
6156                         break;
6157                 }
6158
6159                 /* We only care about extent-based EAs */
6160                 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6161                     (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6162                         continue;
6163                 }
6164
6165                 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6166                         extent_info->overflow_count = 0;
6167                         extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6168                 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6169                         extent_info->overflow_count++;
6170                         extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6171                 }
6172
6173                 extent_info->recStartBlock = key->startBlock;
6174                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6175                         if (extent_info->extents[i].blockCount == 0) {
6176                                 break;
6177                         }
6178                         extent_info->extent_index = i;
6179                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6180                         if (error) {
6181                                 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6182                                 goto out;
6183                         }
6184                 }
6185         }
6186
6187 out:
6188         /* If any blocks were relocated, account them and report progress */
6189         if (extent_info->blocks_relocated) {
6190                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6191                 hfs_truncatefs_progress(hfsmp);
6192         }
6193         if (extent_info->iterator) {
6194                 FREE(extent_info->iterator, M_TEMP);
6195         }
6196         if (extent_info) {
6197                 FREE(extent_info, M_TEMP);
6198         }
6199         if (hfs_resize_debug) {
6200                 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6201         }
6202         return error;
6203 }
6204
6205 /*
6206  * Reclaim any extent-based extended attributes allocation blocks from
6207  * the area of the disk that is being truncated.
6208  *
6209  * The function traverses the attribute btree to find out the fileIDs
6210  * of the extended attributes that need to be relocated.  For every
6211  * file whose large EA requires relocation, it looks up the cnode and
6212  * calls hfs_reclaim_xattr() to do all the work for allocating
6213  * new space, copying data, deallocating old space, and if required,
6214  * splitting the extents.
6215  *
6216  * Inputs:
6217  *      allocLimit    - starting block of the area being reclaimed
6218  *
6219  * Returns:
6220  *      returns 0 on success, non-zero on failure.
6221  */
6222 static int
6223 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6224 {
6225         int error = 0;
6226         FCB *fcb;
6227         struct BTreeIterator *iterator = NULL;
6228         struct FSBufferDescriptor btdata;
6229         HFSPlusAttrKey *key;
6230         HFSPlusAttrRecord rec;
6231         int lockflags = 0;
6232         cnid_t prev_fileid = 0;
6233         struct vnode *vp;
6234         int need_relocate;
6235         int btree_operation;
6236         u_int32_t files_moved = 0;
6237         u_int32_t prev_blocksmoved;
6238         int i;
6239
6240         fcb = VTOF(hfsmp->hfs_attribute_vp);
6241         /* Store the value to print total blocks moved by this function in end */
6242         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6243
6244         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6245                 return ENOMEM;
6246         }
6247         bzero(iterator, sizeof(*iterator));
6248         key = (HFSPlusAttrKey *)&iterator->key;
6249         btdata.bufferAddress = &rec;
6250         btdata.itemSize = sizeof(rec);
6251         btdata.itemCount = 1;
6252
6253         need_relocate = false;
6254         btree_operation = kBTreeFirstRecord;
6255         /* Traverse the attribute btree to find extent-based EAs to reclaim */
6256         while (1) {
6257                 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6258                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6259                 hfs_systemfile_unlock(hfsmp, lockflags);
6260                 if (error) {
6261                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6262                                 error = 0;
6263                         }
6264                         break;
6265                 }
6266                 btree_operation = kBTreeNextRecord;
6267
6268                 /* If the extents of current fileID were already relocated, skip it */
6269                 if (prev_fileid == key->fileID) {
6270                         continue;
6271                 }
6272
6273                 /* Check if any of the extents in the current record need to be relocated */
6274                 need_relocate = false;
6275                 switch(rec.recordType) {
6276                         case kHFSPlusAttrForkData:
6277                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6278                                         if (rec.forkData.theFork.extents[i].blockCount == 0) {
6279                                                 break;
6280                                         }
6281                                         if ((rec.forkData.theFork.extents[i].startBlock +
6282                                              rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6283                                                 need_relocate = true;
6284                                                 break;
6285                                         }
6286                                 }
6287                                 break;
6288
6289                         case kHFSPlusAttrExtents:
6290                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6291                                         if (rec.overflowExtents.extents[i].blockCount == 0) {
6292                                                 break;
6293                                         }
6294                                         if ((rec.overflowExtents.extents[i].startBlock +
6295                                              rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6296                                                 need_relocate = true;
6297                                                 break;
6298                                         }
6299                                 }
6300                                 break;
6301                 };
6302
6303                 /* Continue iterating to next attribute record */
6304                 if (need_relocate == false) {
6305                         continue;
6306                 }
6307
6308                 /* Look up the vnode for corresponding file.  The cnode
6309                  * will be locked which will ensure that no one modifies
6310                  * the xattrs when we are relocating them.
6311                  *
6312                  * We want to allow open-unlinked files to be moved,
6313                  * so provide allow_deleted == 1 for hfs_vget().
6314                  */
6315                 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6316                         continue;
6317                 }
6318
6319                 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6320                 hfs_unlock(VTOC(vp));
6321                 vnode_put(vp);
6322                 if (error) {
6323                         printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6324                         break;
6325                 }
6326                 prev_fileid = key->fileID;
6327                 files_moved++;
6328         }
6329
6330         if (files_moved) {
6331                 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6332                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6333                                 files_moved, hfsmp->vcbVN);
6334         }
6335
6336         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6337         return error;
6338 }
6339
6340 /*
6341  * Reclaim blocks from regular files.
6342  *
6343  * This function iterates over all the record in catalog btree looking
6344  * for files with extents that overlap into the space we're trying to
6345  * free up.  If a file extent requires relocation, it looks up the vnode
6346  * and calls function to relocate the data.
6347  *
6348  * Returns:
6349  *      Zero on success, non-zero on failure.
6350  */
6351 static int
6352 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6353 {
6354         int error;
6355         FCB *fcb;
6356         struct BTreeIterator *iterator = NULL;
6357         struct FSBufferDescriptor btdata;
6358         int btree_operation;
6359         int lockflags;
6360         struct HFSPlusCatalogFile filerec;
6361         struct vnode *vp;
6362         struct vnode *rvp;
6363         struct filefork *datafork;
6364         u_int32_t files_moved = 0;
6365         u_int32_t prev_blocksmoved;
6366
6367         fcb = VTOF(hfsmp->hfs_catalog_vp);
6368         /* Store the value to print total blocks moved by this function at the end */
6369         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6370
6371         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6372                 return ENOMEM;
6373         }
6374         bzero(iterator, sizeof(*iterator));
6375
6376         btdata.bufferAddress = &filerec;
6377         btdata.itemSize = sizeof(filerec);
6378         btdata.itemCount = 1;
6379
6380         btree_operation = kBTreeFirstRecord;
6381         while (1) {
6382                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6383                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6384                 hfs_systemfile_unlock(hfsmp, lockflags);
6385                 if (error) {
6386                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6387                                 error = 0;
6388                         }
6389                         break;
6390                 }
6391                 btree_operation = kBTreeNextRecord;
6392
6393                 if (filerec.recordType != kHFSPlusFileRecord) {
6394                         continue;
6395                 }
6396
6397                 /* Check if any of the extents require relocation */
6398                 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6399                         continue;
6400                 }
6401
6402                 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6403                 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6404                         continue;
6405                 }
6406
6407                 /* If data fork exists or item is a directory hard link, relocate blocks */
6408                 datafork = VTOF(vp);
6409                 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6410                         error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6411                                         kHFSDataForkType, allocLimit, context);
6412                         if (error)  {
6413                                 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6414                                 hfs_unlock(VTOC(vp));
6415                                 vnode_put(vp);
6416                                 break;
6417                         }
6418                 }
6419
6420                 /* If resource fork exists or item is a directory hard link, relocate blocks */
6421                 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6422                         if (vnode_isdir(vp)) {
6423                                 /* Resource fork vnode lookup is invalid for directory hard link.
6424                                  * So we fake data fork vnode as resource fork vnode.
6425                                  */
6426                                 rvp = vp;
6427                         } else {
6428                                 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6429                                 if (error) {
6430                                         printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6431                                         hfs_unlock(VTOC(vp));
6432                                         vnode_put(vp);
6433                                         break;
6434                                 }
6435                                 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6436                         }
6437
6438                         error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6439                                         kHFSResourceForkType, allocLimit, context);
6440                         if (error) {
6441                                 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6442                                 hfs_unlock(VTOC(vp));
6443                                 vnode_put(vp);
6444                                 break;
6445                         }
6446                 }
6447
6448                 /* The file forks were relocated successfully, now drop the
6449                  * cnode lock and vnode reference, and continue iterating to
6450                  * next catalog record.
6451                  */
6452                 hfs_unlock(VTOC(vp));
6453                 vnode_put(vp);
6454                 files_moved++;
6455         }
6456
6457         if (files_moved) {
6458                 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6459                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6460                                 files_moved, hfsmp->vcbVN);
6461         }
6462
6463         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6464         return error;
6465 }
6466
6467 /*
6468  * Reclaim space at the end of a file system.
6469  *
6470  * Inputs -
6471  *      allocLimit      - start block of the space being reclaimed
6472  *      reclaimblks     - number of allocation blocks to reclaim
6473  */
6474 static int
6475 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6476 {
6477         int error = 0;
6478
6479         /*
6480          * Preflight the bitmap to find out total number of blocks that need
6481          * relocation.
6482          *
6483          * Note: Since allocLimit is set to the location of new alternate volume
6484          * header, the check below does not account for blocks allocated for old
6485          * alternate volume header.
6486          */
6487         error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6488         if (error) {
6489                 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6490                 return error;
6491         }
6492         if (hfs_resize_debug) {
6493                 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6494         }
6495
6496         /* Relocate extents of the Allocation file if they're in the way. */
6497         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6498                         kHFSDataForkType, allocLimit, context);
6499         if (error) {
6500                 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6501                 return error;
6502         }
6503
6504         /* Relocate extents of the Extents B-tree if they're in the way. */
6505         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6506                         kHFSDataForkType, allocLimit, context);
6507         if (error) {
6508                 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6509                 return error;
6510         }
6511
6512         /* Relocate extents of the Catalog B-tree if they're in the way. */
6513         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6514                         kHFSDataForkType, allocLimit, context);
6515         if (error) {
6516                 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6517                 return error;
6518         }
6519
6520         /* Relocate extents of the Attributes B-tree if they're in the way. */
6521         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6522                         kHFSDataForkType, allocLimit, context);
6523         if (error) {
6524                 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6525                 return error;
6526         }
6527
6528         /* Relocate extents of the Startup File if there is one and they're in the way. */
6529         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6530                         kHFSDataForkType, allocLimit, context);
6531         if (error) {
6532                 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6533                 return error;
6534         }
6535
6536         /*
6537          * We need to make sure the alternate volume header gets flushed if we moved
6538          * any extents in the volume header.  But we need to do that before
6539          * shrinking the size of the volume, or else the journal code will panic
6540          * with an invalid (too large) block number.
6541          *
6542          * Note that blks_moved will be set if ANY extent was moved, even
6543          * if it was just an overflow extent.  In this case, the journal_flush isn't
6544          * strictly required, but shouldn't hurt.
6545          */
6546         if (hfsmp->hfs_resize_blocksmoved) {
6547                 hfs_journal_flush(hfsmp, FALSE);
6548         }
6549
6550         /* Relocate journal file blocks if they're in the way. */
6551         error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6552         if (error) {
6553                 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6554                 return error;
6555         }
6556
6557         /* Relocate journal info block blocks if they're in the way. */
6558         error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6559         if (error) {
6560                 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6561                 return error;
6562         }
6563
6564         /* Reclaim extents from catalog file records */
6565         error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6566         if (error) {
6567                 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6568                 return error;
6569         }
6570
6571         /* Reclaim extents from extent-based extended attributes, if any */
6572         error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6573         if (error) {
6574                 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6575                 return error;
6576         }
6577
6578         return error;
6579 }
6580
6581
6582 /*
6583  * Check if there are any extents (including overflow extents) that overlap
6584  * into the disk space that is being reclaimed.
6585  *
6586  * Output -
6587  *      true  - One of the extents need to be relocated
6588  *      false - No overflow extents need to be relocated, or there was an error
6589  */
6590 static int
6591 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6592 {
6593         struct BTreeIterator * iterator = NULL;
6594         struct FSBufferDescriptor btdata;
6595         HFSPlusExtentRecord extrec;
6596         HFSPlusExtentKey *extkeyptr;
6597         FCB *fcb;
6598         int overlapped = false;
6599         int i, j;
6600         int error;
6601         int lockflags = 0;
6602         u_int32_t endblock;
6603
6604         /* Check if data fork overlaps the target space */
6605         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6606                 if (filerec->dataFork.extents[i].blockCount == 0) {
6607                         break;
6608                 }
6609                 endblock = filerec->dataFork.extents[i].startBlock +
6610                         filerec->dataFork.extents[i].blockCount;
6611                 if (endblock > allocLimit) {
6612                         overlapped = true;
6613                         goto out;
6614                 }
6615         }
6616
6617         /* Check if resource fork overlaps the target space */
6618         for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6619                 if (filerec->resourceFork.extents[j].blockCount == 0) {
6620                         break;
6621                 }
6622                 endblock = filerec->resourceFork.extents[j].startBlock +
6623                         filerec->resourceFork.extents[j].blockCount;
6624                 if (endblock > allocLimit) {
6625                         overlapped = true;
6626                         goto out;
6627                 }
6628         }
6629
6630         /* Return back if there are no overflow extents for this file */
6631         if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6632                 goto out;
6633         }
6634
6635         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6636                 return 0;
6637         }
6638         bzero(iterator, sizeof(*iterator));
6639         extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6640         extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6641         extkeyptr->forkType = 0;
6642         extkeyptr->fileID = filerec->fileID;
6643         extkeyptr->startBlock = 0;
6644
6645         btdata.bufferAddress = &extrec;
6646         btdata.itemSize = sizeof(extrec);
6647         btdata.itemCount = 1;
6648
6649         fcb = VTOF(hfsmp->hfs_extents_vp);
6650
6651         lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6652
6653         /* This will position the iterator just before the first overflow
6654          * extent record for given fileID.  It will always return btNotFound,
6655          * so we special case the error code.
6656          */
6657         error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6658         if (error && (error != btNotFound)) {
6659                 goto out;
6660         }
6661
6662         /* BTIterateRecord() might return error if the btree is empty, and
6663          * therefore we return that the extent does not overflow to the caller
6664          */
6665         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6666         while (error == 0) {
6667                 /* Stop when we encounter a different file. */
6668                 if (extkeyptr->fileID != filerec->fileID) {
6669                         break;
6670                 }
6671                 /* Check if any of the forks exist in the target space. */
6672                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6673                         if (extrec[i].blockCount == 0) {
6674                                 break;
6675                         }
6676                         endblock = extrec[i].startBlock + extrec[i].blockCount;
6677                         if (endblock > allocLimit) {
6678                                 overlapped = true;
6679                                 goto out;
6680                         }
6681                 }
6682                 /* Look for more records. */
6683                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6684         }
6685
6686 out:
6687         if (lockflags) {
6688                 hfs_systemfile_unlock(hfsmp, lockflags);
6689         }
6690         if (iterator) {
6691                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6692         }
6693         return overlapped;
6694 }
6695
6696
6697 /*
6698  * Calculate the progress of a file system resize operation.
6699  */
6700 __private_extern__
6701 int
6702 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6703 {
6704         if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6705                 return (ENXIO);
6706         }
6707
6708         if (hfsmp->hfs_resize_totalblocks > 0) {
6709                 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6710         } else {
6711                 *progress = 0;
6712         }
6713
6714         return (0);
6715 }
6716
6717
6718 /*
6719  * Creates a UUID from a unique "name" in the HFS UUID Name space.
6720  * See version 3 UUID.
6721  */
6722 static void
6723 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6724 {
6725         MD5_CTX  md5c;
6726         uint8_t  rawUUID[8];
6727
6728         ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6729         ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6730
6731         MD5Init( &md5c );
6732         MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6733         MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6734         MD5Final( result, &md5c );
6735
6736         result[6] = 0x30 | ( result[6] & 0x0F );
6737         result[8] = 0x80 | ( result[8] & 0x3F );
6738 }
6739
6740 /*
6741  * Get file system attributes.
6742  */
6743 static int
6744 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6745 {
6746 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6747 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6748 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6749
6750         ExtendedVCB *vcb = VFSTOVCB(mp);
6751         struct hfsmount *hfsmp = VFSTOHFS(mp);
6752         u_int32_t freeCNIDs;
6753
6754         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6755
6756         VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6757         VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6758         VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6759         VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6760         VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6761         VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6762         VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6763         VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6764         VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6765         /* XXX needs clarification */
6766         VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6767         /* Maximum files is constrained by total blocks. */
6768         VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6769         VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6770
6771         fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6772         fsap->f_fsid.val[1] = vfs_typenum(mp);
6773         VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6774
6775         VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6776         VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6777
6778         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6779                 vol_capabilities_attr_t *cap;
6780
6781                 cap = &fsap->f_capabilities;
6782
6783                 if (hfsmp->hfs_flags & HFS_STANDARD) {
6784                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6785                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6786                                 VOL_CAP_FMT_CASE_PRESERVING |
6787                                 VOL_CAP_FMT_FAST_STATFS |
6788                                 VOL_CAP_FMT_HIDDEN_FILES |
6789                                 VOL_CAP_FMT_PATH_FROM_ID;
6790                 } else {
6791                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6792                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6793                                 VOL_CAP_FMT_SYMBOLICLINKS |
6794                                 VOL_CAP_FMT_HARDLINKS |
6795                                 VOL_CAP_FMT_JOURNAL |
6796                                 VOL_CAP_FMT_ZERO_RUNS |
6797                                 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6798                                 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6799                                 VOL_CAP_FMT_CASE_PRESERVING |
6800                                 VOL_CAP_FMT_FAST_STATFS |
6801                                 VOL_CAP_FMT_2TB_FILESIZE |
6802                                 VOL_CAP_FMT_HIDDEN_FILES |
6803 #if HFS_COMPRESSION
6804                                 VOL_CAP_FMT_PATH_FROM_ID |
6805                                 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6806 #else
6807                                 VOL_CAP_FMT_PATH_FROM_ID;
6808 #endif
6809                 }
6810                 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6811                         VOL_CAP_INT_SEARCHFS |
6812                         VOL_CAP_INT_ATTRLIST |
6813                         VOL_CAP_INT_NFSEXPORT |
6814                         VOL_CAP_INT_READDIRATTR |
6815                         VOL_CAP_INT_EXCHANGEDATA |
6816                         VOL_CAP_INT_ALLOCATE |
6817                         VOL_CAP_INT_VOL_RENAME |
6818                         VOL_CAP_INT_ADVLOCK |
6819                         VOL_CAP_INT_FLOCK |
6820 #if NAMEDSTREAMS
6821                         VOL_CAP_INT_EXTENDED_ATTR |
6822                         VOL_CAP_INT_NAMEDSTREAMS;
6823 #else
6824                         VOL_CAP_INT_EXTENDED_ATTR;
6825 #endif
6826                 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6827                 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6828
6829                 cap->valid[VOL_CAPABILITIES_FORMAT] =
6830                         VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6831                         VOL_CAP_FMT_SYMBOLICLINKS |
6832                         VOL_CAP_FMT_HARDLINKS |
6833                         VOL_CAP_FMT_JOURNAL |
6834                         VOL_CAP_FMT_JOURNAL_ACTIVE |
6835                         VOL_CAP_FMT_NO_ROOT_TIMES |
6836                         VOL_CAP_FMT_SPARSE_FILES |
6837                         VOL_CAP_FMT_ZERO_RUNS |
6838                         VOL_CAP_FMT_CASE_SENSITIVE |
6839                         VOL_CAP_FMT_CASE_PRESERVING |
6840                         VOL_CAP_FMT_FAST_STATFS |
6841                         VOL_CAP_FMT_2TB_FILESIZE |
6842                         VOL_CAP_FMT_OPENDENYMODES |
6843                         VOL_CAP_FMT_HIDDEN_FILES |
6844 #if HFS_COMPRESSION
6845                         VOL_CAP_FMT_PATH_FROM_ID |
6846                         VOL_CAP_FMT_DECMPFS_COMPRESSION;
6847 #else
6848                         VOL_CAP_FMT_PATH_FROM_ID;
6849 #endif
6850                 cap->valid[VOL_CAPABILITIES_INTERFACES] =
6851                         VOL_CAP_INT_SEARCHFS |
6852                         VOL_CAP_INT_ATTRLIST |
6853                         VOL_CAP_INT_NFSEXPORT |
6854                         VOL_CAP_INT_READDIRATTR |
6855                         VOL_CAP_INT_EXCHANGEDATA |
6856                         VOL_CAP_INT_COPYFILE |
6857                         VOL_CAP_INT_ALLOCATE |
6858                         VOL_CAP_INT_VOL_RENAME |
6859                         VOL_CAP_INT_ADVLOCK |
6860                         VOL_CAP_INT_FLOCK |
6861                         VOL_CAP_INT_MANLOCK |
6862 #if NAMEDSTREAMS
6863                         VOL_CAP_INT_EXTENDED_ATTR |
6864                         VOL_CAP_INT_NAMEDSTREAMS;
6865 #else
6866                         VOL_CAP_INT_EXTENDED_ATTR;
6867 #endif
6868                 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
6869                 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
6870                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
6871         }
6872         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
6873                 vol_attributes_attr_t *attrp = &fsap->f_attributes;
6874
6875                 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6876                 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6877                 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
6878                 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6879                 attrp->validattr.forkattr = 0;
6880
6881                 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6882                 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6883                 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
6884                 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6885                 attrp->nativeattr.forkattr = 0;
6886                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
6887         }
6888         fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
6889         fsap->f_create_time.tv_nsec = 0;
6890         VFSATTR_SET_SUPPORTED(fsap, f_create_time);
6891         fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
6892         fsap->f_modify_time.tv_nsec = 0;
6893         VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
6894
6895         fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
6896         fsap->f_backup_time.tv_nsec = 0;
6897         VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
6898         if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
6899                 u_int16_t subtype = 0;
6900
6901                 /*
6902                  * Subtypes (flavors) for HFS
6903                  *   0:   Mac OS Extended
6904                  *   1:   Mac OS Extended (Journaled)
6905                  *   2:   Mac OS Extended (Case Sensitive)
6906                  *   3:   Mac OS Extended (Case Sensitive, Journaled)
6907                  *   4 - 127:   Reserved
6908                  * 128:   Mac OS Standard
6909                  *
6910                  */
6911                 if (hfsmp->hfs_flags & HFS_STANDARD) {
6912                         subtype = HFS_SUBTYPE_STANDARDHFS;
6913                 } else /* HFS Plus */ {
6914                         if (hfsmp->jnl)
6915                                 subtype |= HFS_SUBTYPE_JOURNALED;
6916                         if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
6917                                 subtype |= HFS_SUBTYPE_CASESENSITIVE;
6918                 }
6919                 fsap->f_fssubtype = subtype;
6920                 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
6921         }
6922
6923         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
6924                 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
6925                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
6926         }
6927         if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
6928                 hfs_getvoluuid(hfsmp, fsap->f_uuid);
6929                 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
6930         }
6931         return (0);
6932 }
6933
6934 /*
6935  * Perform a volume rename.  Requires the FS' root vp.
6936  */
6937 static int
6938 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
6939 {
6940         ExtendedVCB *vcb = VTOVCB(vp);
6941         struct cnode *cp = VTOC(vp);
6942         struct hfsmount *hfsmp = VTOHFS(vp);
6943         struct cat_desc to_desc;
6944         struct cat_desc todir_desc;
6945         struct cat_desc new_desc;
6946         cat_cookie_t cookie;
6947         int lockflags;
6948         int error = 0;
6949         char converted_volname[256];
6950         size_t volname_length = 0;
6951         size_t conv_volname_length = 0;
6952
6953
6954         /*
6955          * Ignore attempts to rename a volume to a zero-length name.
6956          */
6957         if (name[0] == 0)
6958                 return(0);
6959
6960         bzero(&to_desc, sizeof(to_desc));
6961         bzero(&todir_desc, sizeof(todir_desc));
6962         bzero(&new_desc, sizeof(new_desc));
6963         bzero(&cookie, sizeof(cookie));
6964
6965         todir_desc.cd_parentcnid = kHFSRootParentID;
6966         todir_desc.cd_cnid = kHFSRootFolderID;
6967         todir_desc.cd_flags = CD_ISDIR;
6968
6969         to_desc.cd_nameptr = (const u_int8_t *)name;
6970         to_desc.cd_namelen = strlen(name);
6971         to_desc.cd_parentcnid = kHFSRootParentID;
6972         to_desc.cd_cnid = cp->c_cnid;
6973         to_desc.cd_flags = CD_ISDIR;
6974
6975         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
6976                 if ((error = hfs_start_transaction(hfsmp)) == 0) {
6977                         if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
6978                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
6979
6980                                 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
6981
6982                                 /*
6983                                  * If successful, update the name in the VCB, ensure it's terminated.
6984                                  */
6985                                 if (!error) {
6986                                         strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6987                                         volname_length = strlen ((const char*)vcb->vcbVN);
6988 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
6989                                         /* Send the volume name down to CoreStorage if necessary */
6990                                         error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
6991                                         if (error == 0) {
6992                                                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
6993                                         }
6994                                         error = 0;
6995                                 }
6996
6997                                 hfs_systemfile_unlock(hfsmp, lockflags);
6998                                 cat_postflight(hfsmp, &cookie, p);
6999
7000                                 if (error)
7001                                         MarkVCBDirty(vcb);
7002                                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7003                         }
7004                         hfs_end_transaction(hfsmp);
7005                 }
7006                 if (!error) {
7007                         /* Release old allocated name buffer */
7008                         if (cp->c_desc.cd_flags & CD_HASBUF) {
7009                                 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7010
7011                                 cp->c_desc.cd_nameptr = 0;
7012                                 cp->c_desc.cd_namelen = 0;
7013                                 cp->c_desc.cd_flags &= ~CD_HASBUF;
7014                                 vfs_removename(tmp_name);
7015                         }
7016                         /* Update cnode's catalog descriptor */
7017                         replace_desc(cp, &new_desc);
7018                         vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7019                         cp->c_touch_chgtime = TRUE;
7020                 }
7021
7022                 hfs_unlock(cp);
7023         }
7024
7025         return(error);
7026 }
7027
7028 /*
7029  * Get file system attributes.
7030  */
7031 static int
7032 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7033 {
7034         kauth_cred_t cred = vfs_context_ucred(context);
7035         int error = 0;
7036
7037         /*
7038          * Must be superuser or owner of filesystem to change volume attributes
7039          */
7040         if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7041                 return(EACCES);
7042
7043         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7044                 vnode_t root_vp;
7045
7046                 error = hfs_vfs_root(mp, &root_vp, context);
7047                 if (error)
7048                         goto out;
7049
7050                 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7051                 (void) vnode_put(root_vp);
7052                 if (error)
7053                         goto out;
7054
7055                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7056         }
7057
7058 out:
7059         return error;
7060 }
7061
7062 /* If a runtime corruption is detected, set the volume inconsistent
7063  * bit in the volume attributes.  The volume inconsistent bit is a persistent
7064  * bit which represents that the volume is corrupt and needs repair.
7065  * The volume inconsistent bit can be set from the kernel when it detects
7066  * runtime corruption or from file system repair utilities like fsck_hfs when
7067  * a repair operation fails.  The bit should be cleared only from file system
7068  * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7069  */
7070 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7071 {
7072         HFS_MOUNT_LOCK(hfsmp, TRUE);
7073         if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7074                 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7075                 MarkVCBDirty(hfsmp);
7076         }
7077         if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7078                 /* Log information to ASL log */
7079                 fslog_fs_corrupt(hfsmp->hfs_mp);
7080                 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7081         }
7082         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7083 }
7084
7085 /* Replay the journal on the device node provided.  Returns zero if
7086  * journal replay succeeded or no journal was supposed to be replayed.
7087  */
7088 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7089 {
7090         int retval = 0;
7091         struct mount *mp = NULL;
7092         struct hfs_mount_args *args = NULL;
7093
7094         /* Replay allowed only on raw devices */
7095         if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7096                 retval = EINVAL;
7097                 goto out;
7098         }
7099
7100         /* Create dummy mount structures */
7101         MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7102         if (mp == NULL) {
7103                 retval = ENOMEM;
7104                 goto out;
7105         }
7106         bzero(mp, sizeof(struct mount));
7107         mount_lock_init(mp);
7108
7109         MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7110         if (args == NULL) {
7111                 retval = ENOMEM;
7112                 goto out;
7113         }
7114         bzero(args, sizeof(struct hfs_mount_args));
7115
7116         retval = hfs_mountfs(devvp, mp, args, 1, context);
7117         buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7118
7119         /* FSYNC the devnode to be sure all data has been flushed */
7120         retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7121
7122 out:
7123         if (mp) {
7124                 mount_lock_destroy(mp);
7125                 FREE(mp, M_TEMP);
7126         }
7127         if (args) {
7128                 FREE(args, M_TEMP);
7129         }
7130         return retval;
7131 }
7132
7133 /*
7134  * hfs vfs operations.
7135  */
7136 struct vfsops hfs_vfsops = {
7137         hfs_mount,
7138         hfs_start,
7139         hfs_unmount,
7140         hfs_vfs_root,
7141         hfs_quotactl,
7142         hfs_vfs_getattr,        /* was hfs_statfs */
7143         hfs_sync,
7144         hfs_vfs_vget,
7145         hfs_fhtovp,
7146         hfs_vptofh,
7147         hfs_init,
7148         hfs_sysctl,
7149         hfs_vfs_setattr,
7150         {NULL}
7151 };