bsd/hfs/hfs_vfsops.c

   1 /*
   2  * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1991, 1993, 1994
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      hfs_vfsops.c
  66  *  derived from        @(#)ufs_vfsops.c        8.8 (Berkeley) 5/20/95
  67  *
  68  *      (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
  69  *
  70  *      hfs_vfsops.c -- VFS layer for loadable HFS file system.
  71  *
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/kauth.h>
  76
  77 #include <sys/ubc.h>
  78 #include <sys/ubc_internal.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/mount_internal.h>
  81 #include <sys/sysctl.h>
  82 #include <sys/malloc.h>
  83 #include <sys/stat.h>
  84 #include <sys/quota.h>
  85 #include <sys/disk.h>
  86 #include <sys/paths.h>
  87 #include <sys/utfconv.h>
  88 #include <sys/kdebug.h>
  89 #include <sys/fslog.h>
  90 #include <sys/ubc.h>
  91
  92 #include <kern/locks.h>
  93
  94 #include <vfs/vfs_journal.h>
  95
  96 #include <miscfs/specfs/specdev.h>
  97 #include <hfs/hfs_mount.h>
  98
  99 #include <libkern/crypto/md5.h>
 100 #include <uuid/uuid.h>
 101
 102 #include "hfs.h"
 103 #include "hfs_catalog.h"
 104 #include "hfs_cnode.h"
 105 #include "hfs_dbg.h"
 106 #include "hfs_endian.h"
 107 #include "hfs_hotfiles.h"
 108 #include "hfs_quota.h"
 109
 110 #include "hfscommon/headers/FileMgrInternal.h"
 111 #include "hfscommon/headers/BTreesInternal.h"
 112
 113 #if CONFIG_PROTECT
 114 #include <sys/cprotect.h>
 115 #endif
 116
 117 #if CONFIG_HFS_ALLOC_RBTREE
 118 #include "hfscommon/headers/HybridAllocator.h"
 119 #endif
 120
 121 #define HFS_MOUNT_DEBUG 1
 122
 123 #if     HFS_DIAGNOSTIC
 124 int hfs_dbg_all = 0;
 125 int hfs_dbg_err = 0;
 126 #endif
 127
 128 /* Enable/disable debugging code for live volume resizing */
 129 int hfs_resize_debug = 0;
 130
 131 lck_grp_attr_t *  hfs_group_attr;
 132 lck_attr_t *  hfs_lock_attr;
 133 lck_grp_t *  hfs_mutex_group;
 134 lck_grp_t *  hfs_rwlock_group;
 135 lck_grp_t *  hfs_spinlock_group;
 136
 137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 138 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
 139
 140 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
 141 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 142
 143 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
 144 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
 145 static int hfs_flushfiles(struct mount *, int, struct proc *);
 146 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
 147 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
 148 static int hfs_init(struct vfsconf *vfsp);
 149 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
 150 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
 151 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
 152 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 153 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
 154 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 155 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
 156
 157 void hfs_initialize_allocator (struct hfsmount *hfsmp);
 158 int hfs_teardown_allocator (struct hfsmount *hfsmp);
 159
 160 int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
 161 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
 162 int hfs_reload(struct mount *mp);
 163 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
 164 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
 165 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 166                       user_addr_t newp, size_t newlen, vfs_context_t context);
 167 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 168
 169 /*
 170  * Called by vfs_mountroot when mounting HFS Plus as root.
 171  */
 172
 173 int
 174 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 175 {
 176         struct hfsmount *hfsmp;
 177         ExtendedVCB *vcb;
 178         struct vfsstatfs *vfsp;
 179         int error;
 180
 181         if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
 182                 if (HFS_MOUNT_DEBUG) {
 183                         printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
 184                                         error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
 185                 }
 186                 return (error);
 187         }
 188
 189         /* Init hfsmp */
 190         hfsmp = VFSTOHFS(mp);
 191
 192         hfsmp->hfs_uid = UNKNOWNUID;
 193         hfsmp->hfs_gid = UNKNOWNGID;
 194         hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 195         hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 196
 197         /* Establish the free block reserve. */
 198         vcb = HFSTOVCB(hfsmp);
 199         vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
 200         vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
 201
 202         vfsp = vfs_statfs(mp);
 203         (void)hfs_statfs(mp, vfsp, NULL);
 204
 205         return (0);
 206 }
 207
 208
 209 /*
 210  * VFS Operations.
 211  *
 212  * mount system call
 213  */
 214
 215 int
 216 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
 217 {
 218         struct proc *p = vfs_context_proc(context);
 219         struct hfsmount *hfsmp = NULL;
 220         struct hfs_mount_args args;
 221         int retval = E_NONE;
 222         u_int32_t cmdflags;
 223
 224         if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
 225                 if (HFS_MOUNT_DEBUG) {
 226                         printf("hfs_mount: copyin returned %d for fs\n", retval);
 227                 }
 228                 return (retval);
 229         }
 230         cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
 231         if (cmdflags & MNT_UPDATE) {
 232                 hfsmp = VFSTOHFS(mp);
 233
 234                 /* Reload incore data after an fsck. */
 235                 if (cmdflags & MNT_RELOAD) {
 236                         if (vfs_isrdonly(mp)) {
 237                                 int error = hfs_reload(mp);
 238                                 if (error && HFS_MOUNT_DEBUG) {
 239                                         printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
 240                                 }
 241                                 return error;
 242                         }
 243                         else {
 244                                 if (HFS_MOUNT_DEBUG) {
 245                                         printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
 246                                 }
 247                                 return (EINVAL);
 248                         }
 249                 }
 250
 251                 /* Change to a read-only file system. */
 252                 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 253                     vfs_isrdonly(mp)) {
 254                         int flags;
 255
 256                         /* Set flag to indicate that a downgrade to read-only
 257                          * is in progress and therefore block any further
 258                          * modifications to the file system.
 259                          */
 260                         hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 261                         hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
 262                         hfsmp->hfs_downgrading_proc = current_thread();
 263                         hfs_unlock_global (hfsmp);
 264
 265                         /* use VFS_SYNC to push out System (btree) files */
 266                         retval = VFS_SYNC(mp, MNT_WAIT, context);
 267                         if (retval && ((cmdflags & MNT_FORCE) == 0)) {
 268                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 269                                 hfsmp->hfs_downgrading_proc = NULL;
 270                                 if (HFS_MOUNT_DEBUG) {
 271                                         printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
 272                                 }
 273                                 goto out;
 274                         }
 275
 276                         flags = WRITECLOSE;
 277                         if (cmdflags & MNT_FORCE)
 278                                 flags |= FORCECLOSE;
 279
 280                         if ((retval = hfs_flushfiles(mp, flags, p))) {
 281                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 282                                 hfsmp->hfs_downgrading_proc = NULL;
 283                                 if (HFS_MOUNT_DEBUG) {
 284                                         printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
 285                                 }
 286                                 goto out;
 287                         }
 288
 289                         /* mark the volume cleanly unmounted */
 290                         hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
 291                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 292                         hfsmp->hfs_flags |= HFS_READ_ONLY;
 293
 294                         /* also get the volume bitmap blocks */
 295                         if (!retval) {
 296                                 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
 297                                         retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
 298                                 } else {
 299                                         vnode_get(hfsmp->hfs_devvp);
 300                                         retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
 301                                         vnode_put(hfsmp->hfs_devvp);
 302                                 }
 303                         }
 304                         if (retval) {
 305                                 if (HFS_MOUNT_DEBUG) {
 306                                         printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
 307                                 }
 308                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 309                                 hfsmp->hfs_downgrading_proc = NULL;
 310                                 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 311                                 goto out;
 312                         }
 313                         if (hfsmp->jnl) {
 314                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 315
 316                             journal_close(hfsmp->jnl);
 317                             hfsmp->jnl = NULL;
 318
 319                             // Note: we explicitly don't want to shutdown
 320                             //       access to the jvp because we may need
 321                             //       it later if we go back to being read-write.
 322
 323                                 hfs_unlock_global (hfsmp);
 324                         }
 325
 326 #if CONFIG_HFS_ALLOC_RBTREE
 327                         (void) hfs_teardown_allocator(hfsmp);
 328 #endif
 329                         hfsmp->hfs_downgrading_proc = NULL;
 330                 }
 331
 332                 /* Change to a writable file system. */
 333                 if (vfs_iswriteupgrade(mp)) {
 334 #if CONFIG_HFS_ALLOC_RBTREE
 335                                 thread_t allocator_thread;
 336 #endif
 337
 338                         /*
 339                          * On inconsistent disks, do not allow read-write mount
 340                          * unless it is the boot volume being mounted.
 341                          */
 342                         if (!(vfs_flags(mp) & MNT_ROOTFS) &&
 343                                         (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
 344                                 if (HFS_MOUNT_DEBUG) {
 345                                         printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n",  (hfsmp->vcbVN));
 346                                 }
 347                                 retval = EINVAL;
 348                                 goto out;
 349                         }
 350
 351                         // If the journal was shut-down previously because we were
 352                         // asked to be read-only, let's start it back up again now
 353
 354                         if (   (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
 355                             && hfsmp->jnl == NULL
 356                             && hfsmp->jvp != NULL) {
 357                             int jflags;
 358
 359                             if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
 360                                         jflags = JOURNAL_RESET;
 361                                 } else {
 362                                         jflags = 0;
 363                                 }
 364
 365                                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 366
 367                                 hfsmp->jnl = journal_open(hfsmp->jvp,
 368                                                 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
 369                                                 hfsmp->jnl_size,
 370                                                 hfsmp->hfs_devvp,
 371                                                 hfsmp->hfs_logical_block_size,
 372                                                 jflags,
 373                                                 0,
 374                                                 hfs_sync_metadata, hfsmp->hfs_mp);
 375
 376                                 /*
 377                                  * Set up the trim callback function so that we can add
 378                                  * recently freed extents to the free extent cache once
 379                                  * the transaction that freed them is written to the
 380                                  * journal on disk.
 381                                  */
 382                                 if (hfsmp->jnl)
 383                                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 384
 385                                 hfs_unlock_global (hfsmp);
 386
 387                                 if (hfsmp->jnl == NULL) {
 388                                         if (HFS_MOUNT_DEBUG) {
 389                                                 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
 390                                         }
 391                                         retval = EINVAL;
 392                                         goto out;
 393                                 } else {
 394                                         hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
 395                                 }
 396
 397                         }
 398
 399                         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 400                         retval = hfs_erase_unused_nodes(hfsmp);
 401                         if (retval != E_NONE) {
 402                                 if (HFS_MOUNT_DEBUG) {
 403                                         printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
 404                                 }
 405                                 goto out;
 406                         }
 407
 408                         /* If this mount point was downgraded from read-write
 409                          * to read-only, clear that information as we are now
 410                          * moving back to read-write.
 411                          */
 412                         hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 413                         hfsmp->hfs_downgrading_proc = NULL;
 414
 415                         /* mark the volume dirty (clear clean unmount bit) */
 416                         hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 417
 418                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 419                         if (retval != E_NONE) {
 420                                 if (HFS_MOUNT_DEBUG) {
 421                                         printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
 422                                 }
 423                                 goto out;
 424                         }
 425
 426                         /* Only clear HFS_READ_ONLY after a successful write */
 427                         hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 428
 429
 430                         if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
 431                                 /* Setup private/hidden directories for hardlinks. */
 432                                 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 433                                 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 434
 435                                 hfs_remove_orphans(hfsmp);
 436
 437                                 /*
 438                                  * Allow hot file clustering if conditions allow.
 439                                  */
 440                                 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
 441                                                 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
 442                                         (void) hfs_recording_init(hfsmp);
 443                                 }
 444                                 /* Force ACLs on HFS+ file systems. */
 445                                 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
 446                                         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 447                                 }
 448                         }
 449
 450 #if CONFIG_HFS_ALLOC_RBTREE
 451                         /*
 452                          * Like the normal mount case, we need to handle creation of the allocation red-black tree
 453                          * if we're upgrading from read-only to read-write.
 454                          *
 455                          * We spawn a thread to create the pair of red-black trees for this volume.
 456                          * However, in so doing, we must be careful to ensure that if this thread is still
 457                          * running after mount has finished, it doesn't interfere with an unmount. Specifically,
 458                          * we'll need to set a bit that indicates we're in progress building the trees here.
 459                          * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
 460                          * notifies the tree generation code that an unmount is waiting.  Also, mark the extent
 461                          * tree flags that the allocator is enabled for use before we spawn the thread that will start
 462                          * scanning the RB tree.
 463                          *
 464                          * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
 465                          * which has not previously encountered a bad error on the red-black tree code.  Also, don't
 466                          * try to re-build a tree that already exists.
 467                          */
 468
 469                         if (hfsmp->extent_tree_flags == 0) {
 470                                 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
 471                                 /* Initialize EOF counter so that the thread can assume it started at initial values */
 472                                 hfsmp->offset_block_end = 0;
 473
 474                                 InitTree(hfsmp);
 475
 476                                 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
 477                                 thread_deallocate(allocator_thread);
 478                         }
 479
 480 #endif
 481                 }
 482
 483                 /* Update file system parameters. */
 484                 retval = hfs_changefs(mp, &args);
 485                 if (retval &&  HFS_MOUNT_DEBUG) {
 486                         printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
 487                 }
 488
 489         } else /* not an update request */ {
 490
 491                 /* Set the mount flag to indicate that we support volfs  */
 492                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
 493
 494                 retval = hfs_mountfs(devvp, mp, &args, 0, context);
 495                 if (retval && HFS_MOUNT_DEBUG) {
 496                         printf("hfs_mount: hfs_mountfs returned %d\n", retval);
 497                 }
 498 #if CONFIG_PROTECT
 499                 /*
 500                  * If above mount call was successful, and this mount is content protection
 501                  * enabled, then verify the on-disk EA on the root to ensure that the filesystem
 502                  * is of a suitable vintage to allow the mount to proceed.
 503                  */
 504                 if ((retval == 0) && (cp_fs_protected (mp))) {
 505                         int err = 0;
 506                         struct cp_root_xattr xattr;
 507                         bzero (&xattr, sizeof(struct cp_root_xattr));
 508                         hfsmp = vfs_fsprivate(mp);
 509
 510                         /* go get the EA to get the version information */
 511                         err = cp_getrootxattr (hfsmp, &xattr);
 512                         /* If there was no EA there, then write one out. */
 513                         if (err == ENOATTR) {
 514                                 bzero(&xattr, sizeof(struct cp_root_xattr));
 515                                 xattr.major_version = CP_CURRENT_MAJOR_VERS;
 516                                 xattr.minor_version = CP_CURRENT_MINOR_VERS;
 517                                 xattr.flags = 0;
 518
 519                                 err = cp_setrootxattr (hfsmp, &xattr);
 520                         }
 521                         /*
 522                          * For any other error, including having an out of date CP version in the
 523                          * EA, or for an error out of cp_setrootxattr, deny the mount
 524                          * and do not proceed further.
 525                          */
 526                         if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS)  {
 527                                 /* Deny the mount and tear down. */
 528                                 retval = EPERM;
 529                                 (void) hfs_unmount (mp, MNT_FORCE, context);
 530                         }
 531                 }
 532 #endif
 533         }
 534 out:
 535         if (retval == 0) {
 536                 (void)hfs_statfs(mp, vfs_statfs(mp), context);
 537         }
 538         return (retval);
 539 }
 540
 541
 542 struct hfs_changefs_cargs {
 543         struct hfsmount *hfsmp;
 544         int             namefix;
 545         int             permfix;
 546         int             permswitch;
 547 };
 548
 549 static int
 550 hfs_changefs_callback(struct vnode *vp, void *cargs)
 551 {
 552         ExtendedVCB *vcb;
 553         struct cnode *cp;
 554         struct cat_desc cndesc;
 555         struct cat_attr cnattr;
 556         struct hfs_changefs_cargs *args;
 557         int lockflags;
 558         int error;
 559
 560         args = (struct hfs_changefs_cargs *)cargs;
 561
 562         cp = VTOC(vp);
 563         vcb = HFSTOVCB(args->hfsmp);
 564
 565         lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 566         error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
 567         hfs_systemfile_unlock(args->hfsmp, lockflags);
 568         if (error) {
 569                 /*
 570                  * If we couldn't find this guy skip to the next one
 571                  */
 572                 if (args->namefix)
 573                         cache_purge(vp);
 574
 575                 return (VNODE_RETURNED);
 576         }
 577         /*
 578          * Get the real uid/gid and perm mask from disk.
 579          */
 580         if (args->permswitch || args->permfix) {
 581                 cp->c_uid = cnattr.ca_uid;
 582                 cp->c_gid = cnattr.ca_gid;
 583                 cp->c_mode = cnattr.ca_mode;
 584         }
 585         /*
 586          * If we're switching name converters then...
 587          *   Remove the existing entry from the namei cache.
 588          *   Update name to one based on new encoder.
 589          */
 590         if (args->namefix) {
 591                 cache_purge(vp);
 592                 replace_desc(cp, &cndesc);
 593
 594                 if (cndesc.cd_cnid == kHFSRootFolderID) {
 595                         strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
 596                         cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
 597                 }
 598         } else {
 599                 cat_releasedesc(&cndesc);
 600         }
 601         return (VNODE_RETURNED);
 602 }
 603
 604 /* Change fs mount parameters */
 605 static int
 606 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
 607 {
 608         int retval = 0;
 609         int namefix, permfix, permswitch;
 610         struct hfsmount *hfsmp;
 611         ExtendedVCB *vcb;
 612         hfs_to_unicode_func_t   get_unicode_func;
 613         unicode_to_hfs_func_t   get_hfsname_func;
 614         u_int32_t old_encoding = 0;
 615         struct hfs_changefs_cargs cargs;
 616         u_int32_t mount_flags;
 617
 618         hfsmp = VFSTOHFS(mp);
 619         vcb = HFSTOVCB(hfsmp);
 620         mount_flags = (unsigned int)vfs_flags(mp);
 621
 622         hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
 623
 624         permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
 625                        ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
 626                       (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
 627                        (mount_flags & MNT_UNKNOWNPERMISSIONS)));
 628
 629         /* The root filesystem must operate with actual permissions: */
 630         if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
 631                 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));  /* Just say "No". */
 632                 retval = EINVAL;
 633                 goto exit;
 634         }
 635         if (mount_flags & MNT_UNKNOWNPERMISSIONS)
 636                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
 637         else
 638                 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
 639
 640         namefix = permfix = 0;
 641
 642         /*
 643          * Tracking of hot files requires up-to-date access times.  So if
 644          * access time updates are disabled, we must also disable hot files.
 645          */
 646         if (mount_flags & MNT_NOATIME) {
 647                 (void) hfs_recording_suspend(hfsmp);
 648         }
 649
 650         /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
 651         if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
 652                 gTimeZone = args->hfs_timezone;
 653         }
 654
 655         /* Change the default uid, gid and/or mask */
 656         if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
 657                 hfsmp->hfs_uid = args->hfs_uid;
 658                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 659                         ++permfix;
 660         }
 661         if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
 662                 hfsmp->hfs_gid = args->hfs_gid;
 663                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 664                         ++permfix;
 665         }
 666         if (args->hfs_mask != (mode_t)VNOVAL) {
 667                 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
 668                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
 669                         hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
 670                         if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
 671                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
 672                         if (vcb->vcbSigWord == kHFSPlusSigWord)
 673                                 ++permfix;
 674                 }
 675         }
 676
 677         /* Change the hfs encoding value (hfs only) */
 678         if ((vcb->vcbSigWord == kHFSSigWord)    &&
 679             (args->hfs_encoding != (u_int32_t)VNOVAL)              &&
 680             (hfsmp->hfs_encoding != args->hfs_encoding)) {
 681
 682                 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
 683                 if (retval)
 684                         goto exit;
 685
 686                 /*
 687                  * Connect the new hfs_get_unicode converter but leave
 688                  * the old hfs_get_hfsname converter in place so that
 689                  * we can lookup existing vnodes to get their correctly
 690                  * encoded names.
 691                  *
 692                  * When we're all finished, we can then connect the new
 693                  * hfs_get_hfsname converter and release our interest
 694                  * in the old converters.
 695                  */
 696                 hfsmp->hfs_get_unicode = get_unicode_func;
 697                 old_encoding = hfsmp->hfs_encoding;
 698                 hfsmp->hfs_encoding = args->hfs_encoding;
 699                 ++namefix;
 700         }
 701
 702         if (!(namefix || permfix || permswitch))
 703                 goto exit;
 704
 705         /* XXX 3762912 hack to support HFS filesystem 'owner' */
 706         if (permfix)
 707                 vfs_setowner(mp,
 708                     hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
 709                     hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
 710
 711         /*
 712          * For each active vnode fix things that changed
 713          *
 714          * Note that we can visit a vnode more than once
 715          * and we can race with fsync.
 716          *
 717          * hfs_changefs_callback will be called for each vnode
 718          * hung off of this mount point
 719          *
 720          * The vnode will be properly referenced and unreferenced
 721          * around the callback
 722          */
 723         cargs.hfsmp = hfsmp;
 724         cargs.namefix = namefix;
 725         cargs.permfix = permfix;
 726         cargs.permswitch = permswitch;
 727
 728         vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
 729
 730         /*
 731          * If we're switching name converters we can now
 732          * connect the new hfs_get_hfsname converter and
 733          * release our interest in the old converters.
 734          */
 735         if (namefix) {
 736                 hfsmp->hfs_get_hfsname = get_hfsname_func;
 737                 vcb->volumeNameEncodingHint = args->hfs_encoding;
 738                 (void) hfs_relconverter(old_encoding);
 739         }
 740 exit:
 741         hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
 742         return (retval);
 743 }
 744
 745
 746 struct hfs_reload_cargs {
 747         struct hfsmount *hfsmp;
 748         int             error;
 749 };
 750
 751 static int
 752 hfs_reload_callback(struct vnode *vp, void *cargs)
 753 {
 754         struct cnode *cp;
 755         struct hfs_reload_cargs *args;
 756         int lockflags;
 757
 758         args = (struct hfs_reload_cargs *)cargs;
 759         /*
 760          * flush all the buffers associated with this node
 761          */
 762         (void) buf_invalidateblks(vp, 0, 0, 0);
 763
 764         cp = VTOC(vp);
 765         /*
 766          * Remove any directory hints
 767          */
 768         if (vnode_isdir(vp))
 769                 hfs_reldirhints(cp, 0);
 770
 771         /*
 772          * Re-read cnode data for all active vnodes (non-metadata files).
 773          */
 774         if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
 775                 struct cat_fork *datafork;
 776                 struct cat_desc desc;
 777
 778                 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
 779
 780                 /* lookup by fileID since name could have changed */
 781                 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 782                 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
 783                 hfs_systemfile_unlock(args->hfsmp, lockflags);
 784                 if (args->error) {
 785                         return (VNODE_RETURNED_DONE);
 786                 }
 787
 788                 /* update cnode's catalog descriptor */
 789                 (void) replace_desc(cp, &desc);
 790         }
 791         return (VNODE_RETURNED);
 792 }
 793
 794 /*
 795  * Reload all incore data for a filesystem (used after running fsck on
 796  * the root filesystem and finding things to fix). The filesystem must
 797  * be mounted read-only.
 798  *
 799  * Things to do to update the mount:
 800  *      invalidate all cached meta-data.
 801  *      invalidate all inactive vnodes.
 802  *      invalidate all cached file data.
 803  *      re-read volume header from disk.
 804  *      re-load meta-file info (extents, file size).
 805  *      re-load B-tree header data.
 806  *      re-read cnode data for all active vnodes.
 807  */
 808 int
 809 hfs_reload(struct mount *mountp)
 810 {
 811         register struct vnode *devvp;
 812         struct buf *bp;
 813         int error, i;
 814         struct hfsmount *hfsmp;
 815         struct HFSPlusVolumeHeader *vhp;
 816         ExtendedVCB *vcb;
 817         struct filefork *forkp;
 818         struct cat_desc cndesc;
 819         struct hfs_reload_cargs args;
 820         daddr64_t priIDSector;
 821
 822         hfsmp = VFSTOHFS(mountp);
 823         vcb = HFSTOVCB(hfsmp);
 824
 825         if (vcb->vcbSigWord == kHFSSigWord)
 826                 return (EINVAL);        /* rooting from HFS is not supported! */
 827
 828         /*
 829          * Invalidate all cached meta-data.
 830          */
 831         devvp = hfsmp->hfs_devvp;
 832         if (buf_invalidateblks(devvp, 0, 0, 0))
 833                 panic("hfs_reload: dirty1");
 834
 835         args.hfsmp = hfsmp;
 836         args.error = 0;
 837         /*
 838          * hfs_reload_callback will be called for each vnode
 839          * hung off of this mount point that can't be recycled...
 840          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 841          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 842          * properly referenced and unreferenced around the callback
 843          */
 844         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
 845
 846         if (args.error)
 847                 return (args.error);
 848
 849         /*
 850          * Re-read VolumeHeader from disk.
 851          */
 852         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 853                         HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 854
 855         error = (int)buf_meta_bread(hfsmp->hfs_devvp,
 856                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
 857                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
 858         if (error) {
 859                 if (bp != NULL)
 860                         buf_brelse(bp);
 861                 return (error);
 862         }
 863
 864         vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 865
 866         /* Do a quick sanity check */
 867         if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
 868              SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
 869             (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
 870              SWAP_BE16(vhp->version) != kHFSXVersion) ||
 871             SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
 872                 buf_brelse(bp);
 873                 return (EIO);
 874         }
 875
 876         vcb->vcbLsMod           = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 877         vcb->vcbAtrb            = SWAP_BE32 (vhp->attributes);
 878         vcb->vcbJinfoBlock  = SWAP_BE32(vhp->journalInfoBlock);
 879         vcb->vcbClpSiz          = SWAP_BE32 (vhp->rsrcClumpSize);
 880         vcb->vcbNxtCNID         = SWAP_BE32 (vhp->nextCatalogID);
 881         vcb->vcbVolBkUp         = to_bsd_time(SWAP_BE32(vhp->backupDate));
 882         vcb->vcbWrCnt           = SWAP_BE32 (vhp->writeCount);
 883         vcb->vcbFilCnt          = SWAP_BE32 (vhp->fileCount);
 884         vcb->vcbDirCnt          = SWAP_BE32 (vhp->folderCount);
 885         HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
 886         vcb->totalBlocks        = SWAP_BE32 (vhp->totalBlocks);
 887         vcb->freeBlocks         = SWAP_BE32 (vhp->freeBlocks);
 888         vcb->encodingsBitmap    = SWAP_BE64 (vhp->encodingsBitmap);
 889         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 890         vcb->localCreateDate    = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
 891
 892         /*
 893          * Re-load meta-file vnode data (extent info, file size, etc).
 894          */
 895         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 896         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 897                 forkp->ff_extents[i].startBlock =
 898                         SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 899                 forkp->ff_extents[i].blockCount =
 900                         SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 901         }
 902         forkp->ff_size      = SWAP_BE64 (vhp->extentsFile.logicalSize);
 903         forkp->ff_blocks    = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 904         forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
 905
 906
 907         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 908         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 909                 forkp->ff_extents[i].startBlock =
 910                         SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 911                 forkp->ff_extents[i].blockCount =
 912                         SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 913         }
 914         forkp->ff_size      = SWAP_BE64 (vhp->catalogFile.logicalSize);
 915         forkp->ff_blocks    = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 916         forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
 917
 918         if (hfsmp->hfs_attribute_vp) {
 919                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 920                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 921                         forkp->ff_extents[i].startBlock =
 922                                 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 923                         forkp->ff_extents[i].blockCount =
 924                                 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 925                 }
 926                 forkp->ff_size      = SWAP_BE64 (vhp->attributesFile.logicalSize);
 927                 forkp->ff_blocks    = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 928                 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
 929         }
 930
 931         forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
 932         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 933                 forkp->ff_extents[i].startBlock =
 934                         SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 935                 forkp->ff_extents[i].blockCount =
 936                         SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 937         }
 938         forkp->ff_size      = SWAP_BE64 (vhp->allocationFile.logicalSize);
 939         forkp->ff_blocks    = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 940         forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
 941
 942         buf_brelse(bp);
 943         vhp = NULL;
 944
 945         /*
 946          * Re-load B-tree header data
 947          */
 948         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 949         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 950                 return (error);
 951
 952         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 953         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 954                 return (error);
 955
 956         if (hfsmp->hfs_attribute_vp) {
 957                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 958                 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 959                         return (error);
 960         }
 961
 962         /* Reload the volume name */
 963         if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
 964                 return (error);
 965         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 966         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 967         cat_releasedesc(&cndesc);
 968
 969         /* Re-establish private/hidden directories. */
 970         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 971         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 972
 973         /* In case any volume information changed to trigger a notification */
 974         hfs_generate_volume_notifications(hfsmp);
 975
 976         return (0);
 977 }
 978
 979
 980
 981 static void
 982 hfs_syncer(void *arg0, void *unused)
 983 {
 984 #pragma unused(unused)
 985
 986     struct hfsmount *hfsmp = arg0;
 987     clock_sec_t secs;
 988     clock_usec_t usecs;
 989     uint32_t delay = HFS_META_DELAY;
 990     uint64_t now;
 991     static int no_max=1;
 992
 993     clock_get_calendar_microtime(&secs, &usecs);
 994     now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 995
 996     //
 997     // If the amount of pending writes is more than our limit, wait
 998     // for 2/3 of it to drain and then flush the journal.
 999     //
1000     if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1001             int counter=0;
1002             uint64_t pending_io, start, rate = 0;
1003
1004             no_max = 0;
1005
1006             hfs_start_transaction(hfsmp);   // so we hold off any new i/o's
1007
1008             pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1009
1010             clock_get_calendar_microtime(&secs, &usecs);
1011             start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1012
1013             while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1014                     tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1015             }
1016
1017             if (counter >= 500) {
1018                     printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1019             }
1020
1021             if (hfsmp->jnl) {
1022                     journal_flush(hfsmp->jnl, FALSE);
1023             } else {
1024                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1025             }
1026
1027             clock_get_calendar_microtime(&secs, &usecs);
1028             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1029             hfsmp->hfs_last_sync_time = now;
1030             if (now != start) {
1031                     rate = ((pending_io * 1000000ULL) / (now - start));     // yields bytes per second
1032             }
1033
1034             hfs_end_transaction(hfsmp);
1035
1036             //
1037             // If a reasonable amount of time elapsed then check the
1038             // i/o rate.  If it's taking less than 1 second or more
1039             // than 2 seconds, adjust hfs_max_pending_io so that we
1040             // will allow about 1.5 seconds of i/o to queue up.
1041             //
1042             if (((now - start) >= 300000) && (rate != 0)) {
1043                     uint64_t scale = (pending_io * 100) / rate;
1044
1045                     if (scale < 100 || scale > 200) {
1046                             // set it so that it should take about 1.5 seconds to drain
1047                             hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1048                     }
1049             }
1050
1051     } else if (   ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1052                || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1053                    && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1054                    && (hfsmp->hfs_active_threads == 0)
1055                    && (hfsmp->hfs_global_lock_nesting == 0))) {
1056
1057             //
1058             // Flush the journal if more than 5 seconds elapsed since
1059             // the last sync OR we have not sync'ed recently and the
1060             // last sync request time was more than 100 milliseconds
1061             // ago and no one is in the middle of a transaction right
1062             // now.  Else we defer the sync and reschedule it.
1063             //
1064             if (hfsmp->jnl) {
1065                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1066
1067                     journal_flush(hfsmp->jnl, FALSE);
1068
1069                         hfs_unlock_global (hfsmp);
1070             } else {
1071                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1072             }
1073
1074             clock_get_calendar_microtime(&secs, &usecs);
1075             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1076             hfsmp->hfs_last_sync_time = now;
1077
1078     } else if (hfsmp->hfs_active_threads == 0) {
1079             uint64_t deadline;
1080
1081             clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1082             thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1083
1084             // note: we intentionally return early here and do not
1085             // decrement the sync_scheduled and sync_incomplete
1086             // variables because we rescheduled the timer.
1087
1088             return;
1089     }
1090
1091     //
1092     // NOTE: we decrement these *after* we're done the journal_flush() since
1093     // it can take a significant amount of time and so we don't want more
1094     // callbacks scheduled until we're done this one.
1095     //
1096     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1097     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1098     wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1099 }
1100
1101
1102 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1103
1104 /*
1105  * Initialization code for Red-Black Tree Allocator
1106  *
1107  * This function will build the two red-black trees necessary for allocating space
1108  * from the metadata zone as well as normal allocations.  Currently, we use
1109  * an advisory read to get most of the data into the buffer cache.
1110  * This function is intended to be run in a separate thread so as not to slow down mount.
1111  *
1112  */
1113
1114 void
1115 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1116
1117 #if CONFIG_HFS_ALLOC_RBTREE
1118         u_int32_t err;
1119
1120         /*
1121          * Take the allocation file lock.  Journal transactions will block until
1122          * we're done here.
1123          */
1124         int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1125
1126         /*
1127          * GenerateTree assumes that the bitmap lock is held when you call the function.
1128          * It will drop and re-acquire the lock periodically as needed to let other allocations
1129          * through.  It returns with the bitmap lock held. Since we only maintain one tree,
1130          * we don't need to specify a start block (always starts at 0).
1131          */
1132         err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1133         if (err) {
1134                 goto bailout;
1135         }
1136         /* Mark offset tree as built */
1137         hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1138
1139 bailout:
1140         /*
1141          * GenerateTree may drop the bitmap lock during operation in order to give other
1142          * threads a chance to allocate blocks, but it will always return with the lock held, so
1143          * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1144          */
1145         hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1146         if (err != 0) {
1147                 /* Wakeup any waiters on the allocation bitmap lock */
1148                 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1149         }
1150
1151         hfs_systemfile_unlock(hfsmp, flags);
1152 #else
1153 #pragma unused (hfsmp)
1154 #endif
1155 }
1156
1157
1158 /*
1159  * Teardown code for the Red-Black Tree allocator.
1160  * This function consolidates the code which serializes with respect
1161  * to a thread that may be potentially still building the tree when we need to begin
1162  * tearing it down.   Since the red-black tree may not be live when we enter this function
1163  * we return:
1164  *              1 -> Tree was live.
1165  *              0 -> Tree was not active at time of call.
1166  */
1167
1168 int
1169 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1170         int rb_used = 0;
1171
1172 #if CONFIG_HFS_ALLOC_RBTREE
1173
1174         int flags = 0;
1175
1176         /*
1177          * Check to see if the tree-generation is still on-going.
1178          * If it is, then block until it's done.
1179          */
1180
1181         flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1182
1183
1184         while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1185                 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1186
1187                 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1188                                          &hfsmp->extent_tree_flags, THREAD_UNINT);
1189         }
1190
1191         if (hfs_isrbtree_active (hfsmp)) {
1192                 rb_used = 1;
1193
1194                 /* Tear down the RB Trees while we have the bitmap locked */
1195                 DestroyTrees(hfsmp);
1196
1197         }
1198
1199         hfs_systemfile_unlock(hfsmp, flags);
1200 #else
1201         #pragma unused (hfsmp)
1202 #endif
1203         return rb_used;
1204
1205 }
1206
1207
1208 static int hfs_root_unmounted_cleanly = 0;
1209
1210 SYSCTL_DECL(_vfs_generic);
1211 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1212
1213 /*
1214  * Common code for mount and mountroot
1215  */
1216 int
1217 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1218             int journal_replay_only, vfs_context_t context)
1219 {
1220         struct proc *p = vfs_context_proc(context);
1221         int retval = E_NONE;
1222         struct hfsmount *hfsmp = NULL;
1223         struct buf *bp;
1224         dev_t dev;
1225         HFSMasterDirectoryBlock *mdbp = NULL;
1226         int ronly;
1227 #if QUOTA
1228         int i;
1229 #endif
1230         int mntwrapper;
1231         kauth_cred_t cred;
1232         u_int64_t disksize;
1233         daddr64_t log_blkcnt;
1234         u_int32_t log_blksize;
1235         u_int32_t phys_blksize;
1236         u_int32_t minblksize;
1237         u_int32_t iswritable;
1238         daddr64_t mdb_offset;
1239         int isvirtual = 0;
1240         int isroot = 0;
1241         int isssd;
1242 #if CONFIG_HFS_ALLOC_RBTREE
1243         thread_t allocator_thread;
1244 #endif
1245
1246         if (args == NULL) {
1247                 /* only hfs_mountroot passes us NULL as the 'args' argument */
1248                 isroot = 1;
1249         }
1250
1251         ronly = vfs_isrdonly(mp);
1252         dev = vnode_specrdev(devvp);
1253         cred = p ? vfs_context_ucred(context) : NOCRED;
1254         mntwrapper = 0;
1255
1256         bp = NULL;
1257         hfsmp = NULL;
1258         mdbp = NULL;
1259         minblksize = kHFSBlockSize;
1260
1261         /* Advisory locking should be handled at the VFS layer */
1262         vfs_setlocklocal(mp);
1263
1264         /* Get the logical block size (treated as physical block size everywhere) */
1265         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1266                 if (HFS_MOUNT_DEBUG) {
1267                         printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1268                 }
1269                 retval = ENXIO;
1270                 goto error_exit;
1271         }
1272         if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1273                 printf("hfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
1274                 retval = ENXIO;
1275                 goto error_exit;
1276         }
1277
1278         /* Get the physical block size. */
1279         retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1280         if (retval) {
1281                 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1282                         if (HFS_MOUNT_DEBUG) {
1283                                 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1284                         }
1285                         retval = ENXIO;
1286                         goto error_exit;
1287                 }
1288                 /* If device does not support this ioctl, assume that physical
1289                  * block size is same as logical block size
1290                  */
1291                 phys_blksize = log_blksize;
1292         }
1293         if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1294                 printf("hfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
1295                 retval = ENXIO;
1296                 goto error_exit;
1297         }
1298
1299         /* Switch to 512 byte sectors (temporarily) */
1300         if (log_blksize > 512) {
1301                 u_int32_t size512 = 512;
1302
1303                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1304                         if (HFS_MOUNT_DEBUG) {
1305                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1306                         }
1307                         retval = ENXIO;
1308                         goto error_exit;
1309                 }
1310         }
1311         /* Get the number of 512 byte physical blocks. */
1312         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1313                 /* resetting block size may fail if getting block count did */
1314                 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1315                 if (HFS_MOUNT_DEBUG) {
1316                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1317                 }
1318                 retval = ENXIO;
1319                 goto error_exit;
1320         }
1321         /* Compute an accurate disk size (i.e. within 512 bytes) */
1322         disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1323
1324         /*
1325          * On Tiger it is not necessary to switch the device
1326          * block size to be 4k if there are more than 31-bits
1327          * worth of blocks but to insure compatibility with
1328          * pre-Tiger systems we have to do it.
1329          *
1330          * If the device size is not a multiple of 4K (8 * 512), then
1331          * switching the logical block size isn't going to help because
1332          * we will be unable to write the alternate volume header.
1333          * In this case, just leave the logical block size unchanged.
1334          */
1335         if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1336                 minblksize = log_blksize = 4096;
1337                 if (phys_blksize < log_blksize)
1338                         phys_blksize = log_blksize;
1339         }
1340
1341         /*
1342          * The cluster layer is not currently prepared to deal with a logical
1343          * block size larger than the system's page size.  (It can handle
1344          * blocks per page, but not multiple pages per block.)  So limit the
1345          * logical block size to the page size.
1346          */
1347         if (log_blksize > PAGE_SIZE)
1348                 log_blksize = PAGE_SIZE;
1349
1350         /* Now switch to our preferred physical block size. */
1351         if (log_blksize > 512) {
1352                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1353                         if (HFS_MOUNT_DEBUG) {
1354                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1355                         }
1356                         retval = ENXIO;
1357                         goto error_exit;
1358                 }
1359                 /* Get the count of physical blocks. */
1360                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1361                         if (HFS_MOUNT_DEBUG) {
1362                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1363                         }
1364                         retval = ENXIO;
1365                         goto error_exit;
1366                 }
1367         }
1368         /*
1369          * At this point:
1370          *   minblksize is the minimum physical block size
1371          *   log_blksize has our preferred physical block size
1372          *   log_blkcnt has the total number of physical blocks
1373          */
1374
1375         mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1376         if ((retval = (int)buf_meta_bread(devvp,
1377                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1378                                 phys_blksize, cred, &bp))) {
1379                 if (HFS_MOUNT_DEBUG) {
1380                         printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1381                 }
1382                 goto error_exit;
1383         }
1384         MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1385         if (mdbp == NULL) {
1386                 retval = ENOMEM;
1387                 if (HFS_MOUNT_DEBUG) {
1388                         printf("hfs_mountfs: MALLOC failed\n");
1389                 }
1390                 goto error_exit;
1391         }
1392         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1393         buf_brelse(bp);
1394         bp = NULL;
1395
1396         MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1397         if (hfsmp == NULL) {
1398                 if (HFS_MOUNT_DEBUG) {
1399                         printf("hfs_mountfs: MALLOC (2) failed\n");
1400                 }
1401                 retval = ENOMEM;
1402                 goto error_exit;
1403         }
1404         bzero(hfsmp, sizeof(struct hfsmount));
1405
1406         hfs_chashinit_finish(hfsmp);
1407
1408         /*
1409          * See if the disk is a solid state device.  We need this to decide what to do about
1410          * hotfiles.
1411          */
1412         if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1413                 if (isssd) {
1414                         hfsmp->hfs_flags |= HFS_SSD;
1415                 }
1416         }
1417
1418
1419         /*
1420          *  Init the volume information structure
1421          */
1422
1423         lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1424         lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1425         lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1426         lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1427         lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1428
1429         vfs_setfsprivate(mp, hfsmp);
1430         hfsmp->hfs_mp = mp;                     /* Make VFSTOHFS work */
1431         hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1432         hfsmp->hfs_devvp = devvp;
1433         vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
1434         hfsmp->hfs_logical_block_size = log_blksize;
1435         hfsmp->hfs_logical_block_count = log_blkcnt;
1436         hfsmp->hfs_physical_block_size = phys_blksize;
1437         hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1438         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1439         if (ronly)
1440                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1441         if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1442                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1443
1444 #if QUOTA
1445         for (i = 0; i < MAXQUOTAS; i++)
1446                 dqfileinit(&hfsmp->hfs_qfiles[i]);
1447 #endif
1448
1449         if (args) {
1450                 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1451                 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1452                 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1453                 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1454                 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                               /* tell the VFS */
1455                 if (args->hfs_mask != (mode_t)VNOVAL) {
1456                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1457                         if (args->flags & HFSFSMNT_NOXONFILES) {
1458                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1459                         } else {
1460                                 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1461                         }
1462                 } else {
1463                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1464                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1465                 }
1466                 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1467                         mntwrapper = 1;
1468         } else {
1469                 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1470                 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1471                         hfsmp->hfs_uid = UNKNOWNUID;
1472                         hfsmp->hfs_gid = UNKNOWNGID;
1473                         vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                       /* tell the VFS */
1474                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1475                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1476                 }
1477         }
1478
1479         /* Find out if disk media is writable. */
1480         if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1481                 if (iswritable)
1482                         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1483                 else
1484                         hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1485         }
1486
1487         // record the current time at which we're mounting this volume
1488         struct timeval tv;
1489         microtime(&tv);
1490         hfsmp->hfs_mount_time = tv.tv_sec;
1491
1492         /* Mount a standard HFS disk */
1493         if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1494             (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1495
1496                 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1497                 if (vfs_isrdwr(mp)) {
1498                         retval = EROFS;
1499                         goto error_exit;
1500                 }
1501
1502                 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1503
1504                 /* Treat it as if it's read-only and not writeable */
1505                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1506                 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1507
1508                 /* If only journal replay is requested, exit immediately */
1509                 if (journal_replay_only) {
1510                         retval = 0;
1511                         goto error_exit;
1512                 }
1513
1514                 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1515                         retval = EINVAL;  /* Cannot root from HFS standard disks */
1516                         goto error_exit;
1517                 }
1518                 /* HFS disks can only use 512 byte physical blocks */
1519                 if (log_blksize > kHFSBlockSize) {
1520                         log_blksize = kHFSBlockSize;
1521                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1522                                 retval = ENXIO;
1523                                 goto error_exit;
1524                         }
1525                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1526                                 retval = ENXIO;
1527                                 goto error_exit;
1528                         }
1529                         hfsmp->hfs_logical_block_size = log_blksize;
1530                         hfsmp->hfs_logical_block_count = log_blkcnt;
1531                         hfsmp->hfs_physical_block_size = log_blksize;
1532                         hfsmp->hfs_log_per_phys = 1;
1533                 }
1534                 if (args) {
1535                         hfsmp->hfs_encoding = args->hfs_encoding;
1536                         HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1537
1538                         /* establish the timezone */
1539                         gTimeZone = args->hfs_timezone;
1540                 }
1541
1542                 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1543                                         &hfsmp->hfs_get_hfsname);
1544                 if (retval)
1545                         goto error_exit;
1546
1547                 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1548                 if (retval)
1549                         (void) hfs_relconverter(hfsmp->hfs_encoding);
1550
1551         } else /* Mount an HFS Plus disk */ {
1552                 HFSPlusVolumeHeader *vhp;
1553                 off_t embeddedOffset;
1554                 int   jnl_disable = 0;
1555
1556                 /* Get the embedded Volume Header */
1557                 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1558                         embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1559                         embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1560                                           (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1561
1562                         /*
1563                          * If the embedded volume doesn't start on a block
1564                          * boundary, then switch the device to a 512-byte
1565                          * block size so everything will line up on a block
1566                          * boundary.
1567                          */
1568                         if ((embeddedOffset % log_blksize) != 0) {
1569                                 printf("hfs_mountfs: embedded volume offset not"
1570                                     " a multiple of physical block size (%d);"
1571                                     " switching to 512\n", log_blksize);
1572                                 log_blksize = 512;
1573                                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1574                                     (caddr_t)&log_blksize, FWRITE, context)) {
1575
1576                                         if (HFS_MOUNT_DEBUG) {
1577                                                 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1578                                         }
1579                                         retval = ENXIO;
1580                                         goto error_exit;
1581                                 }
1582                                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1583                                     (caddr_t)&log_blkcnt, 0, context)) {
1584                                         if (HFS_MOUNT_DEBUG) {
1585                                                 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1586                                         }
1587                                         retval = ENXIO;
1588                                         goto error_exit;
1589                                 }
1590                                 /* Note: relative block count adjustment */
1591                                 hfsmp->hfs_logical_block_count *=
1592                                     hfsmp->hfs_logical_block_size / log_blksize;
1593
1594                                 /* Update logical /physical block size */
1595                                 hfsmp->hfs_logical_block_size = log_blksize;
1596                                 hfsmp->hfs_physical_block_size = log_blksize;
1597                                 phys_blksize = log_blksize;
1598                                 hfsmp->hfs_log_per_phys = 1;
1599                         }
1600
1601                         disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1602                                    (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1603
1604                         hfsmp->hfs_logical_block_count = disksize / log_blksize;
1605
1606                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1607                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1608                                         phys_blksize, cred, &bp);
1609                         if (retval) {
1610                                 if (HFS_MOUNT_DEBUG) {
1611                                         printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1612                                 }
1613                                 goto error_exit;
1614                         }
1615                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1616                         buf_brelse(bp);
1617                         bp = NULL;
1618                         vhp = (HFSPlusVolumeHeader*) mdbp;
1619
1620                 } else /* pure HFS+ */ {
1621                         embeddedOffset = 0;
1622                         vhp = (HFSPlusVolumeHeader*) mdbp;
1623                 }
1624
1625                 if (isroot) {
1626                         hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1627                 }
1628
1629                 /*
1630                  * On inconsistent disks, do not allow read-write mount
1631                  * unless it is the boot volume being mounted.  We also
1632                  * always want to replay the journal if the journal_replay_only
1633                  * flag is set because that will (most likely) get the
1634                  * disk into a consistent state before fsck_hfs starts
1635                  * looking at it.
1636                  */
1637                 if (  !(vfs_flags(mp) & MNT_ROOTFS)
1638                    && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1639                    && !journal_replay_only
1640                    && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1641
1642                         if (HFS_MOUNT_DEBUG) {
1643                                 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1644                         }
1645                         retval = EINVAL;
1646                         goto error_exit;
1647                 }
1648
1649
1650                 // XXXdbg
1651                 //
1652                 hfsmp->jnl = NULL;
1653                 hfsmp->jvp = NULL;
1654                 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1655                     args->journal_disable) {
1656                     jnl_disable = 1;
1657                 }
1658
1659                 //
1660                 // We only initialize the journal here if the last person
1661                 // to mount this volume was journaling aware.  Otherwise
1662                 // we delay journal initialization until later at the end
1663                 // of hfs_MountHFSPlusVolume() because the last person who
1664                 // mounted it could have messed things up behind our back
1665                 // (so we need to go find the .journal file, make sure it's
1666                 // the right size, re-sync up if it was moved, etc).
1667                 //
1668                 if (   (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1669                         && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1670                         && !jnl_disable) {
1671
1672                         // if we're able to init the journal, mark the mount
1673                         // point as journaled.
1674                         //
1675                         if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1676                                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1677                         } else {
1678                                 if (retval == EROFS) {
1679                                         // EROFS is a special error code that means the volume has an external
1680                                         // journal which we couldn't find.  in that case we do not want to
1681                                         // rewrite the volume header - we'll just refuse to mount the volume.
1682                                         if (HFS_MOUNT_DEBUG) {
1683                                                 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1684                                         }
1685                                         retval = EINVAL;
1686                                         goto error_exit;
1687                                 }
1688
1689                                 // if the journal failed to open, then set the lastMountedVersion
1690                                 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1691                                 // of just bailing out because the volume is journaled.
1692                                 if (!ronly) {
1693                                         if (HFS_MOUNT_DEBUG) {
1694                                                 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1695                                         }
1696
1697                                         HFSPlusVolumeHeader *jvhp;
1698
1699                                     hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1700
1701                                     if (mdb_offset == 0) {
1702                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1703                                     }
1704
1705                                     bp = NULL;
1706                                     retval = (int)buf_meta_bread(devvp,
1707                                                     HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1708                                                     phys_blksize, cred, &bp);
1709                                     if (retval == 0) {
1710                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1711
1712                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1713                                                 printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1714                                             jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1715                                             buf_bwrite(bp);
1716                                         } else {
1717                                             buf_brelse(bp);
1718                                         }
1719                                         bp = NULL;
1720                                     } else if (bp) {
1721                                         buf_brelse(bp);
1722                                         // clear this so the error exit path won't try to use it
1723                                         bp = NULL;
1724                                     }
1725                                 }
1726
1727                                 // if this isn't the root device just bail out.
1728                                 // If it is the root device we just continue on
1729                                 // in the hopes that fsck_hfs will be able to
1730                                 // fix any damage that exists on the volume.
1731                                 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1732                                         if (HFS_MOUNT_DEBUG) {
1733                                                 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1734                                         }
1735                                     retval = EINVAL;
1736                                     goto error_exit;
1737                                 }
1738                         }
1739                 }
1740                 // XXXdbg
1741
1742                 /* Either the journal is replayed successfully, or there
1743                  * was nothing to replay, or no journal exists.  In any case,
1744                  * return success.
1745                  */
1746                 if (journal_replay_only) {
1747                         retval = 0;
1748                         goto error_exit;
1749                 }
1750
1751                 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1752
1753                 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1754                 /*
1755                  * If the backend didn't like our physical blocksize
1756                  * then retry with physical blocksize of 512.
1757                  */
1758                 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1759                         printf("hfs_mountfs: could not use physical block size "
1760                                 "(%d) switching to 512\n", log_blksize);
1761                         log_blksize = 512;
1762                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1763                                 if (HFS_MOUNT_DEBUG) {
1764                                         printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1765                                 }
1766                                 retval = ENXIO;
1767                                 goto error_exit;
1768                         }
1769                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1770                                 if (HFS_MOUNT_DEBUG) {
1771                                         printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1772                                 }
1773                                 retval = ENXIO;
1774                                 goto error_exit;
1775                         }
1776                         devvp->v_specsize = log_blksize;
1777                         /* Note: relative block count adjustment (in case this is an embedded volume). */
1778                         hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1779                         hfsmp->hfs_logical_block_size = log_blksize;
1780                         hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1781
1782                         if (hfsmp->jnl && hfsmp->jvp == devvp) {
1783                             // close and re-open this with the new block size
1784                             journal_close(hfsmp->jnl);
1785                             hfsmp->jnl = NULL;
1786                             if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1787                                         vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1788                                 } else {
1789                                         // if the journal failed to open, then set the lastMountedVersion
1790                                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
1791                                         // of just bailing out because the volume is journaled.
1792                                         if (!ronly) {
1793                                                 if (HFS_MOUNT_DEBUG) {
1794                                                         printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1795                                                 }
1796                                         HFSPlusVolumeHeader *jvhp;
1797
1798                                         hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1799
1800                                         if (mdb_offset == 0) {
1801                                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1802                                         }
1803
1804                                                 bp = NULL;
1805                                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1806                                                         phys_blksize, cred, &bp);
1807                                         if (retval == 0) {
1808                                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1809
1810                                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1811                                                                 printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1812                                                         jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1813                                                         buf_bwrite(bp);
1814                                                         } else {
1815                                                         buf_brelse(bp);
1816                                                         }
1817                                                         bp = NULL;
1818                                         } else if (bp) {
1819                                                         buf_brelse(bp);
1820                                                         // clear this so the error exit path won't try to use it
1821                                                         bp = NULL;
1822                                         }
1823                                         }
1824
1825                                         // if this isn't the root device just bail out.
1826                                         // If it is the root device we just continue on
1827                                         // in the hopes that fsck_hfs will be able to
1828                                         // fix any damage that exists on the volume.
1829                                         if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1830                                                 if (HFS_MOUNT_DEBUG) {
1831                                                         printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1832                                                 }
1833                                         retval = EINVAL;
1834                                         goto error_exit;
1835                                         }
1836                                 }
1837                         }
1838
1839                         /* Try again with a smaller block size... */
1840                         retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1841                         if (retval && HFS_MOUNT_DEBUG) {
1842                                 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1843                         }
1844                 }
1845                 if (retval)
1846                         (void) hfs_relconverter(0);
1847         }
1848
1849         // save off a snapshot of the mtime from the previous mount
1850         // (for matador).
1851         hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1852
1853         if ( retval ) {
1854                 if (HFS_MOUNT_DEBUG) {
1855                         printf("hfs_mountfs: encountered failure %d \n", retval);
1856                 }
1857                 goto error_exit;
1858         }
1859
1860         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1861         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1862         vfs_setmaxsymlen(mp, 0);
1863
1864         mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1865 #if NAMEDSTREAMS
1866         mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1867 #endif
1868         if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1869                 /* Tell VFS that we support directory hard links. */
1870                 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1871         } else {
1872                 /* HFS standard doesn't support extended readdir! */
1873                 mount_set_noreaddirext (mp);
1874         }
1875
1876         if (args) {
1877                 /*
1878                  * Set the free space warning levels for a non-root volume:
1879                  *
1880                  * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1881                  * is less.  Set the "warning" limit to 2% of the volume size or 150MB,
1882                  * whichever is less.  And last, set the "desired" freespace level to
1883                  * to 3% of the volume size or 200MB, whichever is less.
1884                  */
1885                 hfsmp->hfs_freespace_notify_dangerlimit =
1886                         MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1887                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1888                 hfsmp->hfs_freespace_notify_warninglimit =
1889                         MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1890                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1891                 hfsmp->hfs_freespace_notify_desiredlevel =
1892                         MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1893                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1894         } else {
1895                 /*
1896                  * Set the free space warning levels for the root volume:
1897                  *
1898                  * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1899                  * is less.  Set the "warning" limit to 10% of the volume size or 1GB,
1900                  * whichever is less.  And last, set the "desired" freespace level to
1901                  * to 11% of the volume size or 1.25GB, whichever is less.
1902                  */
1903                 hfsmp->hfs_freespace_notify_dangerlimit =
1904                         MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1905                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1906                 hfsmp->hfs_freespace_notify_warninglimit =
1907                         MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1908                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1909                 hfsmp->hfs_freespace_notify_desiredlevel =
1910                         MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1911                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1912         };
1913
1914         /* Check if the file system exists on virtual device, like disk image */
1915         if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1916                 if (isvirtual) {
1917                         hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1918                 }
1919         }
1920
1921         /* do not allow ejectability checks on the root device */
1922         if (isroot == 0) {
1923                 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1924                                 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1925                         hfsmp->hfs_max_pending_io = 4096*1024;   // a reasonable value to start with.
1926                         hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1927                         if (hfsmp->hfs_syncer == NULL) {
1928                                 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1929                                                 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1930                         }
1931                 }
1932         }
1933
1934 #if CONFIG_HFS_ALLOC_RBTREE
1935         /*
1936          * We spawn a thread to create the pair of red-black trees for this volume.
1937          * However, in so doing, we must be careful to ensure that if this thread is still
1938          * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1939          * we'll need to set a bit that indicates we're in progress building the trees here.
1940          * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1941          * notifies the tree generation code that an unmount is waiting.  Also mark the bit that
1942          * indicates the tree is live and operating.
1943          *
1944          * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1945          */
1946
1947         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1948                 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1949
1950                 /* Initialize EOF counter so that the thread can assume it started at initial values */
1951                 hfsmp->offset_block_end = 0;
1952                 InitTree(hfsmp);
1953
1954                 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1955                 thread_deallocate(allocator_thread);
1956         }
1957
1958 #endif
1959
1960         /*
1961          * Start looking for free space to drop below this level and generate a
1962          * warning immediately if needed:
1963          */
1964         hfsmp->hfs_notification_conditions = 0;
1965         hfs_generate_volume_notifications(hfsmp);
1966
1967         if (ronly == 0) {
1968                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1969         }
1970         FREE(mdbp, M_TEMP);
1971         return (0);
1972
1973 error_exit:
1974         if (bp)
1975                 buf_brelse(bp);
1976         if (mdbp)
1977                 FREE(mdbp, M_TEMP);
1978
1979         if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1980                 vnode_clearmountedon(hfsmp->jvp);
1981                 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1982                 hfsmp->jvp = NULL;
1983         }
1984         if (hfsmp) {
1985                 if (hfsmp->hfs_devvp) {
1986                         vnode_rele(hfsmp->hfs_devvp);
1987                 }
1988                 hfs_delete_chash(hfsmp);
1989
1990                 FREE(hfsmp, M_HFSMNT);
1991                 vfs_setfsprivate(mp, NULL);
1992         }
1993         return (retval);
1994 }
1995
1996
1997 /*
1998  * Make a filesystem operational.
1999  * Nothing to do at the moment.
2000  */
2001 /* ARGSUSED */
2002 static int
2003 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2004 {
2005         return (0);
2006 }
2007
2008
2009 /*
2010  * unmount system call
2011  */
2012 int
2013 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2014 {
2015         struct proc *p = vfs_context_proc(context);
2016         struct hfsmount *hfsmp = VFSTOHFS(mp);
2017         int retval = E_NONE;
2018         int flags;
2019         int force;
2020         int started_tr = 0;
2021         int rb_used = 0;
2022
2023         flags = 0;
2024         force = 0;
2025         if (mntflags & MNT_FORCE) {
2026                 flags |= FORCECLOSE;
2027                 force = 1;
2028         }
2029
2030         if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2031                 return (retval);
2032
2033         if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2034                 (void) hfs_recording_suspend(hfsmp);
2035
2036         /*
2037          * Cancel any pending timers for this volume.  Then wait for any timers
2038          * which have fired, but whose callbacks have not yet completed.
2039          */
2040         if (hfsmp->hfs_syncer)
2041         {
2042                 struct timespec ts = {0, 100000000};    /* 0.1 seconds */
2043
2044                 /*
2045                  * Cancel any timers that have been scheduled, but have not
2046                  * fired yet.  NOTE: The kernel considers a timer complete as
2047                  * soon as it starts your callback, so the kernel does not
2048                  * keep track of the number of callbacks in progress.
2049                  */
2050                 if (thread_call_cancel(hfsmp->hfs_syncer))
2051                         OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2052                 thread_call_free(hfsmp->hfs_syncer);
2053                 hfsmp->hfs_syncer = NULL;
2054
2055                 /*
2056                  * This waits for all of the callbacks that were entered before
2057                  * we did thread_call_cancel above, but have not completed yet.
2058                  */
2059                 while(hfsmp->hfs_sync_incomplete > 0)
2060                 {
2061                         msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2062                 }
2063
2064                 if (hfsmp->hfs_sync_incomplete < 0)
2065                         panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2066         }
2067
2068 #if CONFIG_HFS_ALLOC_RBTREE
2069         rb_used = hfs_teardown_allocator(hfsmp);
2070 #endif
2071
2072         /*
2073          * Flush out the b-trees, volume bitmap and Volume Header
2074          */
2075         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2076                 retval = hfs_start_transaction(hfsmp);
2077                 if (retval == 0) {
2078                     started_tr = 1;
2079                 } else if (!force) {
2080                     goto err_exit;
2081                 }
2082
2083                 if (hfsmp->hfs_startup_vp) {
2084                         (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2085                         retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2086                         hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2087                         if (retval && !force)
2088                                 goto err_exit;
2089                 }
2090
2091                 if (hfsmp->hfs_attribute_vp) {
2092                         (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2093                         retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2094                         hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2095                         if (retval && !force)
2096                                 goto err_exit;
2097                 }
2098
2099                 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2100                 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2101                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2102                 if (retval && !force)
2103                         goto err_exit;
2104
2105                 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2106                 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2107                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2108                 if (retval && !force)
2109                         goto err_exit;
2110
2111                 if (hfsmp->hfs_allocation_vp) {
2112                         (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2113                         retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2114                         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2115                         if (retval && !force)
2116                                 goto err_exit;
2117                 }
2118
2119                 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2120                         retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2121                         if (retval && !force)
2122                                 goto err_exit;
2123                 }
2124
2125                 /* If runtime corruption was detected, indicate that the volume
2126                  * was not unmounted cleanly.
2127                  */
2128                 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2129                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2130                 } else {
2131                         HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2132                 }
2133
2134
2135                 if (rb_used) {
2136                         /* If the rb-tree was live, just set min_start to 0 */
2137                         hfsmp->nextAllocation = 0;
2138                 }
2139                 else {
2140                         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2141                                 int i;
2142                                 u_int32_t min_start = hfsmp->totalBlocks;
2143
2144                                 // set the nextAllocation pointer to the smallest free block number
2145                                 // we've seen so on the next mount we won't rescan unnecessarily
2146                                 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2147                                 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2148                                         if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2149                                                 min_start = hfsmp->vcbFreeExt[i].startBlock;
2150                                         }
2151                                 }
2152                                 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2153                                 if (min_start < hfsmp->nextAllocation) {
2154                                         hfsmp->nextAllocation = min_start;
2155                                 }
2156                         }
2157                 }
2158
2159
2160                 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2161                 if (retval) {
2162                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2163                         if (!force)
2164                                 goto err_exit;  /* could not flush everything */
2165                 }
2166
2167                 if (started_tr) {
2168                     hfs_end_transaction(hfsmp);
2169                     started_tr = 0;
2170                 }
2171         }
2172
2173         if (hfsmp->jnl) {
2174                 hfs_journal_flush(hfsmp, FALSE);
2175         }
2176
2177         /*
2178          *      Invalidate our caches and release metadata vnodes
2179          */
2180         (void) hfsUnmount(hfsmp, p);
2181
2182         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2183                 (void) hfs_relconverter(hfsmp->hfs_encoding);
2184
2185         // XXXdbg
2186         if (hfsmp->jnl) {
2187             journal_close(hfsmp->jnl);
2188             hfsmp->jnl = NULL;
2189         }
2190
2191         VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2192
2193         if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2194             vnode_clearmountedon(hfsmp->jvp);
2195             retval = VNOP_CLOSE(hfsmp->jvp,
2196                                hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2197                                vfs_context_kernel());
2198             vnode_put(hfsmp->jvp);
2199             hfsmp->jvp = NULL;
2200         }
2201         // XXXdbg
2202
2203         /*
2204          * Last chance to dump unreferenced system files.
2205          */
2206         (void) vflush(mp, NULLVP, FORCECLOSE);
2207
2208 #if HFS_SPARSE_DEV
2209         /* Drop our reference on the backing fs (if any). */
2210         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2211                 struct vnode * tmpvp;
2212
2213                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2214                 tmpvp = hfsmp->hfs_backingfs_rootvp;
2215                 hfsmp->hfs_backingfs_rootvp = NULLVP;
2216                 vnode_rele(tmpvp);
2217         }
2218 #endif /* HFS_SPARSE_DEV */
2219         lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2220         lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2221         vnode_rele(hfsmp->hfs_devvp);
2222
2223         hfs_delete_chash(hfsmp);
2224         FREE(hfsmp, M_HFSMNT);
2225
2226         return (0);
2227
2228   err_exit:
2229         if (started_tr) {
2230                 hfs_end_transaction(hfsmp);
2231         }
2232         return retval;
2233 }
2234
2235
2236 /*
2237  * Return the root of a filesystem.
2238  */
2239 static int
2240 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2241 {
2242         return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2243 }
2244
2245
2246 /*
2247  * Do operations associated with quotas
2248  */
2249 #if !QUOTA
2250 static int
2251 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2252 {
2253         return (ENOTSUP);
2254 }
2255 #else
2256 static int
2257 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2258 {
2259         struct proc *p = vfs_context_proc(context);
2260         int cmd, type, error;
2261
2262         if (uid == ~0U)
2263                 uid = kauth_cred_getuid(vfs_context_ucred(context));
2264         cmd = cmds >> SUBCMDSHIFT;
2265
2266         switch (cmd) {
2267         case Q_SYNC:
2268         case Q_QUOTASTAT:
2269                 break;
2270         case Q_GETQUOTA:
2271                 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2272                         break;
2273                 /* fall through */
2274         default:
2275                 if ( (error = vfs_context_suser(context)) )
2276                         return (error);
2277         }
2278
2279         type = cmds & SUBCMDMASK;
2280         if ((u_int)type >= MAXQUOTAS)
2281                 return (EINVAL);
2282         if (vfs_busy(mp, LK_NOWAIT))
2283                 return (0);
2284
2285         switch (cmd) {
2286
2287         case Q_QUOTAON:
2288                 error = hfs_quotaon(p, mp, type, datap);
2289                 break;
2290
2291         case Q_QUOTAOFF:
2292                 error = hfs_quotaoff(p, mp, type);
2293                 break;
2294
2295         case Q_SETQUOTA:
2296                 error = hfs_setquota(mp, uid, type, datap);
2297                 break;
2298
2299         case Q_SETUSE:
2300                 error = hfs_setuse(mp, uid, type, datap);
2301                 break;
2302
2303         case Q_GETQUOTA:
2304                 error = hfs_getquota(mp, uid, type, datap);
2305                 break;
2306
2307         case Q_SYNC:
2308                 error = hfs_qsync(mp);
2309                 break;
2310
2311         case Q_QUOTASTAT:
2312                 error = hfs_quotastat(mp, type, datap);
2313                 break;
2314
2315         default:
2316                 error = EINVAL;
2317                 break;
2318         }
2319         vfs_unbusy(mp);
2320
2321         return (error);
2322 }
2323 #endif /* QUOTA */
2324
2325 /* Subtype is composite of bits */
2326 #define HFS_SUBTYPE_JOURNALED      0x01
2327 #define HFS_SUBTYPE_CASESENSITIVE  0x02
2328 /* bits 2 - 6 reserved */
2329 #define HFS_SUBTYPE_STANDARDHFS    0x80
2330
2331 /*
2332  * Get file system statistics.
2333  */
2334 int
2335 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2336 {
2337         ExtendedVCB *vcb = VFSTOVCB(mp);
2338         struct hfsmount *hfsmp = VFSTOHFS(mp);
2339         u_int32_t freeCNIDs;
2340         u_int16_t subtype = 0;
2341
2342         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2343
2344         sbp->f_bsize = (u_int32_t)vcb->blockSize;
2345         sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2346         sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2347         sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2348         sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2349         sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2));  /* max files is constrained by total blocks */
2350         sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2351
2352         /*
2353          * Subtypes (flavors) for HFS
2354          *   0:   Mac OS Extended
2355          *   1:   Mac OS Extended (Journaled)
2356          *   2:   Mac OS Extended (Case Sensitive)
2357          *   3:   Mac OS Extended (Case Sensitive, Journaled)
2358          *   4 - 127:   Reserved
2359          * 128:   Mac OS Standard
2360          *
2361          */
2362         if (hfsmp->hfs_flags & HFS_STANDARD) {
2363                 subtype = HFS_SUBTYPE_STANDARDHFS;
2364         } else /* HFS Plus */ {
2365                 if (hfsmp->jnl)
2366                         subtype |= HFS_SUBTYPE_JOURNALED;
2367                 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2368                         subtype |= HFS_SUBTYPE_CASESENSITIVE;
2369         }
2370         sbp->f_fssubtype = subtype;
2371
2372         return (0);
2373 }
2374
2375
2376 //
2377 // XXXdbg -- this is a callback to be used by the journal to
2378 //           get meta data blocks flushed out to disk.
2379 //
2380 // XXXdbg -- be smarter and don't flush *every* block on each
2381 //           call.  try to only flush some so we don't wind up
2382 //           being too synchronous.
2383 //
2384 __private_extern__
2385 void
2386 hfs_sync_metadata(void *arg)
2387 {
2388         struct mount *mp = (struct mount *)arg;
2389         struct hfsmount *hfsmp;
2390         ExtendedVCB *vcb;
2391         buf_t   bp;
2392         int  retval;
2393         daddr64_t priIDSector;
2394         hfsmp = VFSTOHFS(mp);
2395         vcb = HFSTOVCB(hfsmp);
2396
2397         // now make sure the super block is flushed
2398         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2399                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2400
2401         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2402                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2403                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
2404         if ((retval != 0 ) && (retval != ENXIO)) {
2405                 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2406                        (int)priIDSector, retval);
2407         }
2408
2409         if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2410             buf_bwrite(bp);
2411         } else if (bp) {
2412             buf_brelse(bp);
2413         }
2414
2415         // the alternate super block...
2416         // XXXdbg - we probably don't need to do this each and every time.
2417         //          hfs_btreeio.c:FlushAlternate() should flag when it was
2418         //          written...
2419         if (hfsmp->hfs_alt_id_sector) {
2420                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2421                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2422                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2423                 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2424                     buf_bwrite(bp);
2425                 } else if (bp) {
2426                     buf_brelse(bp);
2427                 }
2428         }
2429 }
2430
2431
2432 struct hfs_sync_cargs {
2433         kauth_cred_t cred;
2434         struct proc  *p;
2435         int    waitfor;
2436         int    error;
2437 };
2438
2439
2440 static int
2441 hfs_sync_callback(struct vnode *vp, void *cargs)
2442 {
2443         struct cnode *cp;
2444         struct hfs_sync_cargs *args;
2445         int error;
2446
2447         args = (struct hfs_sync_cargs *)cargs;
2448
2449         if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2450                 return (VNODE_RETURNED);
2451         }
2452         cp = VTOC(vp);
2453
2454         if ((cp->c_flag & C_MODIFIED) ||
2455             (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2456             vnode_hasdirtyblks(vp)) {
2457                 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2458
2459                 if (error)
2460                         args->error = error;
2461         }
2462         hfs_unlock(cp);
2463         return (VNODE_RETURNED);
2464 }
2465
2466
2467
2468 /*
2469  * Go through the disk queues to initiate sandbagged IO;
2470  * go through the inodes to write those that have been modified;
2471  * initiate the writing of the super block if it has been modified.
2472  *
2473  * Note: we are always called with the filesystem marked `MPBUSY'.
2474  */
2475 int
2476 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2477 {
2478         struct proc *p = vfs_context_proc(context);
2479         struct cnode *cp;
2480         struct hfsmount *hfsmp;
2481         ExtendedVCB *vcb;
2482         struct vnode *meta_vp[4];
2483         int i;
2484         int error, allerror = 0;
2485         struct hfs_sync_cargs args;
2486
2487         hfsmp = VFSTOHFS(mp);
2488
2489         /*
2490          * hfs_changefs might be manipulating vnodes so back off
2491          */
2492         if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2493                 return (0);
2494
2495         if (hfsmp->hfs_flags & HFS_READ_ONLY)
2496                 return (EROFS);
2497
2498         /* skip over frozen volumes */
2499         if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2500                 return 0;
2501
2502         args.cred = kauth_cred_get();
2503         args.waitfor = waitfor;
2504         args.p = p;
2505         args.error = 0;
2506         /*
2507          * hfs_sync_callback will be called for each vnode
2508          * hung off of this mount point... the vnode will be
2509          * properly referenced and unreferenced around the callback
2510          */
2511         vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2512
2513         if (args.error)
2514                 allerror = args.error;
2515
2516         vcb = HFSTOVCB(hfsmp);
2517
2518         meta_vp[0] = vcb->extentsRefNum;
2519         meta_vp[1] = vcb->catalogRefNum;
2520         meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
2521         meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2522
2523         /* Now sync our three metadata files */
2524         for (i = 0; i < 4; ++i) {
2525                 struct vnode *btvp;
2526
2527                 btvp = meta_vp[i];;
2528                 if ((btvp==0) || (vnode_mount(btvp) != mp))
2529                         continue;
2530
2531                 /* XXX use hfs_systemfile_lock instead ? */
2532                 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2533                 cp = VTOC(btvp);
2534
2535                 if (((cp->c_flag &  C_MODIFIED) == 0) &&
2536                     (cp->c_touch_acctime == 0) &&
2537                     (cp->c_touch_chgtime == 0) &&
2538                     (cp->c_touch_modtime == 0) &&
2539                     vnode_hasdirtyblks(btvp) == 0) {
2540                         hfs_unlock(VTOC(btvp));
2541                         continue;
2542                 }
2543                 error = vnode_get(btvp);
2544                 if (error) {
2545                         hfs_unlock(VTOC(btvp));
2546                         continue;
2547                 }
2548                 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2549                         allerror = error;
2550
2551                 hfs_unlock(cp);
2552                 vnode_put(btvp);
2553         };
2554
2555         /*
2556          * Force stale file system control information to be flushed.
2557          */
2558         if (vcb->vcbSigWord == kHFSSigWord) {
2559                 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2560                         allerror = error;
2561                 }
2562         }
2563 #if QUOTA
2564         hfs_qsync(mp);
2565 #endif /* QUOTA */
2566
2567         hfs_hotfilesync(hfsmp, vfs_context_kernel());
2568
2569         /*
2570          * Write back modified superblock.
2571          */
2572         if (IsVCBDirty(vcb)) {
2573                 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2574                 if (error)
2575                         allerror = error;
2576         }
2577
2578         if (hfsmp->jnl) {
2579             hfs_journal_flush(hfsmp, FALSE);
2580         }
2581
2582         {
2583                 clock_sec_t secs;
2584                 clock_usec_t usecs;
2585                 uint64_t now;
2586
2587                 clock_get_calendar_microtime(&secs, &usecs);
2588                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2589                 hfsmp->hfs_last_sync_time = now;
2590         }
2591
2592         lck_rw_unlock_shared(&hfsmp->hfs_insync);
2593         return (allerror);
2594 }
2595
2596
2597 /*
2598  * File handle to vnode
2599  *
2600  * Have to be really careful about stale file handles:
2601  * - check that the cnode id is valid
2602  * - call hfs_vget() to get the locked cnode
2603  * - check for an unallocated cnode (i_mode == 0)
2604  * - check that the given client host has export rights and return
2605  *   those rights via. exflagsp and credanonp
2606  */
2607 static int
2608 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2609 {
2610         struct hfsfid *hfsfhp;
2611         struct vnode *nvp;
2612         int result;
2613
2614         *vpp = NULL;
2615         hfsfhp = (struct hfsfid *)fhp;
2616
2617         if (fhlen < (int)sizeof(struct hfsfid))
2618                 return (EINVAL);
2619
2620         result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2621         if (result) {
2622                 if (result == ENOENT)
2623                         result = ESTALE;
2624                 return result;
2625         }
2626
2627         /*
2628          * We used to use the create time as the gen id of the file handle,
2629          * but it is not static enough because it can change at any point
2630          * via system calls.  We still don't have another volume ID or other
2631          * unique identifier to use for a generation ID across reboots that
2632          * persists until the file is removed.  Using only the CNID exposes
2633          * us to the potential wrap-around case, but as of 2/2008, it would take
2634          * over 2 months to wrap around if the machine did nothing but allocate
2635          * CNIDs.  Using some kind of wrap counter would only be effective if
2636          * each file had the wrap counter associated with it.  For now,
2637          * we use only the CNID to identify the file as it's good enough.
2638          */
2639
2640         *vpp = nvp;
2641
2642         hfs_unlock(VTOC(nvp));
2643         return (0);
2644 }
2645
2646
2647 /*
2648  * Vnode pointer to File handle
2649  */
2650 /* ARGSUSED */
2651 static int
2652 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2653 {
2654         struct cnode *cp;
2655         struct hfsfid *hfsfhp;
2656
2657         if (ISHFS(VTOVCB(vp)))
2658                 return (ENOTSUP);       /* hfs standard is not exportable */
2659
2660         if (*fhlenp < (int)sizeof(struct hfsfid))
2661                 return (EOVERFLOW);
2662
2663         cp = VTOC(vp);
2664         hfsfhp = (struct hfsfid *)fhp;
2665         /* only the CNID is used to identify the file now */
2666         hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2667         hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2668         *fhlenp = sizeof(struct hfsfid);
2669
2670         return (0);
2671 }
2672
2673
2674 /*
2675  * Initial HFS filesystems, done only once.
2676  */
2677 static int
2678 hfs_init(__unused struct vfsconf *vfsp)
2679 {
2680         static int done = 0;
2681
2682         if (done)
2683                 return (0);
2684         done = 1;
2685         hfs_chashinit();
2686         hfs_converterinit();
2687
2688         BTReserveSetup();
2689
2690
2691         hfs_lock_attr    = lck_attr_alloc_init();
2692         hfs_group_attr   = lck_grp_attr_alloc_init();
2693         hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2694         hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2695         hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2696
2697 #if HFS_COMPRESSION
2698     decmpfs_init();
2699 #endif
2700
2701         return (0);
2702 }
2703
2704 static int
2705 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2706 {
2707         struct hfsmount * hfsmp;
2708         char fstypename[MFSNAMELEN];
2709
2710         if (vp == NULL)
2711                 return (EINVAL);
2712
2713         if (!vnode_isvroot(vp))
2714                 return (EINVAL);
2715
2716         vnode_vfsname(vp, fstypename);
2717         if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2718                 return (EINVAL);
2719
2720         hfsmp = VTOHFS(vp);
2721
2722         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2723                 return (EINVAL);
2724
2725         *hfsmpp = hfsmp;
2726
2727         return (0);
2728 }
2729
2730 // XXXdbg
2731 #include <sys/filedesc.h>
2732
2733 /*
2734  * HFS filesystem related variables.
2735  */
2736 int
2737 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2738                         user_addr_t newp, size_t newlen, vfs_context_t context)
2739 {
2740         struct proc *p = vfs_context_proc(context);
2741         int error;
2742         struct hfsmount *hfsmp;
2743
2744         /* all sysctl names at this level are terminal */
2745
2746         if (name[0] == HFS_ENCODINGBIAS) {
2747                 int bias;
2748
2749                 bias = hfs_getencodingbias();
2750                 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2751                 if (error == 0 && newp)
2752                         hfs_setencodingbias(bias);
2753                 return (error);
2754
2755         } else if (name[0] == HFS_EXTEND_FS) {
2756         u_int64_t  newsize;
2757                 vnode_t vp = vfs_context_cwd(context);
2758
2759                 if (newp == USER_ADDR_NULL || vp == NULLVP)
2760                         return (EINVAL);
2761                 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2762                         return (error);
2763                 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2764                 if (error)
2765                         return (error);
2766
2767                 error = hfs_extendfs(hfsmp, newsize, context);
2768                 return (error);
2769
2770         } else if (name[0] == HFS_ENCODINGHINT) {
2771                 size_t bufsize;
2772                 size_t bytes;
2773                 u_int32_t hint;
2774                 u_int16_t *unicode_name = NULL;
2775                 char *filename = NULL;
2776
2777                 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2778                         return (EINVAL);
2779
2780                 bufsize = MAX(newlen * 3, MAXPATHLEN);
2781                 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2782                 if (filename == NULL) {
2783                         error = ENOMEM;
2784                         goto encodinghint_exit;
2785                 }
2786                 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2787                 if (filename == NULL) {
2788                         error = ENOMEM;
2789                         goto encodinghint_exit;
2790                 }
2791
2792                 error = copyin(newp, (caddr_t)filename, newlen);
2793                 if (error == 0) {
2794                         error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2795                                                &bytes, bufsize, 0, UTF_DECOMPOSED);
2796                         if (error == 0) {
2797                                 hint = hfs_pickencoding(unicode_name, bytes / 2);
2798                                 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2799                         }
2800                 }
2801
2802 encodinghint_exit:
2803                 if (unicode_name)
2804                         FREE(unicode_name, M_TEMP);
2805                 if (filename)
2806                         FREE(filename, M_TEMP);
2807                 return (error);
2808
2809         } else if (name[0] == HFS_ENABLE_JOURNALING) {
2810                 // make the file system journaled...
2811                 vnode_t vp = vfs_context_cwd(context);
2812                 vnode_t jvp;
2813                 ExtendedVCB *vcb;
2814                 struct cat_attr jnl_attr, jinfo_attr;
2815                 struct cat_fork jnl_fork, jinfo_fork;
2816                 void *jnl = NULL;
2817                 int lockflags;
2818
2819                 /* Only root can enable journaling */
2820                 if (!is_suser()) {
2821                         return (EPERM);
2822                 }
2823                 if (vp == NULLVP)
2824                         return EINVAL;
2825
2826                 hfsmp = VTOHFS(vp);
2827                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2828                         return EROFS;
2829                 }
2830                 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2831                         printf("hfs: can't make a plain hfs volume journaled.\n");
2832                         return EINVAL;
2833                 }
2834
2835                 if (hfsmp->jnl) {
2836                     printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2837                     return EAGAIN;
2838                 }
2839
2840                 vcb = HFSTOVCB(hfsmp);
2841                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2842                 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2843                         BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2844
2845                         printf("hfs: volume has a btree w/non-contiguous nodes.  can not enable journaling.\n");
2846                         hfs_systemfile_unlock(hfsmp, lockflags);
2847                         return EINVAL;
2848                 }
2849                 hfs_systemfile_unlock(hfsmp, lockflags);
2850
2851                 // make sure these both exist!
2852                 if (   GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2853                         || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2854
2855                         return EINVAL;
2856                 }
2857
2858                 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2859
2860                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2861                            (off_t)name[2], (off_t)name[3]);
2862
2863                 //
2864                 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2865                 //          enabling the journal on a separate device so it is safe
2866                 //          to just copy hfs_devvp here.  If hfs_util gets the ability
2867                 //          to dynamically enable the journal on a separate device then
2868                 //          we will have to do the same thing as hfs_early_journal_init()
2869                 //          to locate and open the journal device.
2870                 //
2871                 jvp = hfsmp->hfs_devvp;
2872                 jnl = journal_create(jvp,
2873                                                          (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2874                                                          + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2875                                                          (off_t)((unsigned)name[3]),
2876                                                          hfsmp->hfs_devvp,
2877                                                          hfsmp->hfs_logical_block_size,
2878                                                          0,
2879                                                          0,
2880                                                          hfs_sync_metadata, hfsmp->hfs_mp);
2881
2882                 /*
2883                  * Set up the trim callback function so that we can add
2884                  * recently freed extents to the free extent cache once
2885                  * the transaction that freed them is written to the
2886                  * journal on disk.
2887                  */
2888                 if (jnl)
2889                         journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2890
2891                 if (jnl == NULL) {
2892                         printf("hfs: FAILED to create the journal!\n");
2893                         if (jvp && jvp != hfsmp->hfs_devvp) {
2894                                 vnode_clearmountedon(jvp);
2895                                 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2896                         }
2897                         jvp = NULL;
2898
2899                         return EINVAL;
2900                 }
2901
2902                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2903
2904                 /*
2905                  * Flush all dirty metadata buffers.
2906                  */
2907                 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2908                 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2909                 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2910                 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2911                 if (hfsmp->hfs_attribute_vp)
2912                         buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2913
2914                 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2915                 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2916                 hfsmp->jvp = jvp;
2917                 hfsmp->jnl = jnl;
2918
2919                 // save this off for the hack-y check in hfs_remove()
2920                 hfsmp->jnl_start        = (u_int32_t)name[2];
2921                 hfsmp->jnl_size         = (off_t)((unsigned)name[3]);
2922                 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2923                 hfsmp->hfs_jnlfileid    = jnl_attr.ca_fileid;
2924
2925                 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2926
2927                 hfs_unlock_global (hfsmp);
2928                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2929
2930                 {
2931                         fsid_t fsid;
2932
2933                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2934                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2935                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2936                 }
2937                 return 0;
2938         } else if (name[0] == HFS_DISABLE_JOURNALING) {
2939                 // clear the journaling bit
2940                 vnode_t vp = vfs_context_cwd(context);
2941
2942                 /* Only root can disable journaling */
2943                 if (!is_suser()) {
2944                         return (EPERM);
2945                 }
2946                 if (vp == NULLVP)
2947                         return EINVAL;
2948
2949                 hfsmp = VTOHFS(vp);
2950
2951                 /*
2952                  * Disabling journaling is disallowed on volumes with directory hard links
2953                  * because we have not tested the relevant code path.
2954                  */
2955                 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2956                         printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2957                         return EPERM;
2958                 }
2959
2960                 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2961
2962                 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2963
2964                 // Lights out for you buddy!
2965                 journal_close(hfsmp->jnl);
2966                 hfsmp->jnl = NULL;
2967
2968                 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2969                         vnode_clearmountedon(hfsmp->jvp);
2970                         VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2971                         vnode_put(hfsmp->jvp);
2972                 }
2973                 hfsmp->jvp = NULL;
2974                 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2975                 hfsmp->jnl_start        = 0;
2976                 hfsmp->hfs_jnlinfoblkid = 0;
2977                 hfsmp->hfs_jnlfileid    = 0;
2978
2979                 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2980
2981                 hfs_unlock_global (hfsmp);
2982
2983                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2984
2985                 {
2986                         fsid_t fsid;
2987
2988                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2989                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2990                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2991                 }
2992                 return 0;
2993         } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2994                 vnode_t vp = vfs_context_cwd(context);
2995                 off_t jnl_start, jnl_size;
2996
2997                 if (vp == NULLVP)
2998                         return EINVAL;
2999
3000                 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3001                 if (proc_is64bit(current_proc()))
3002                         return EINVAL;
3003
3004                 hfsmp = VTOHFS(vp);
3005             if (hfsmp->jnl == NULL) {
3006                         jnl_start = 0;
3007                         jnl_size  = 0;
3008             } else {
3009                         jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3010                         jnl_size  = (off_t)hfsmp->jnl_size;
3011             }
3012
3013             if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3014                         return error;
3015                 }
3016             if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3017                         return error;
3018                 }
3019
3020                 return 0;
3021         } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3022
3023             return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3024
3025         } else if (name[0] == VFS_CTL_QUERY) {
3026         struct sysctl_req *req;
3027         union union_vfsidctl vc;
3028         struct mount *mp;
3029             struct vfsquery vq;
3030
3031                 req = CAST_DOWN(struct sysctl_req *, oldp);     /* we're new style vfs sysctl. */
3032
3033         error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3034                 if (error) return (error);
3035
3036                 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3037         if (mp == NULL) return (ENOENT);
3038
3039                 hfsmp = VFSTOHFS(mp);
3040                 bzero(&vq, sizeof(vq));
3041                 vq.vq_flags = hfsmp->hfs_notification_conditions;
3042                 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3043         } else if (name[0] == HFS_REPLAY_JOURNAL) {
3044                 vnode_t devvp = NULL;
3045                 int device_fd;
3046                 if (namelen != 2) {
3047                         return (EINVAL);
3048                 }
3049                 device_fd = name[1];
3050                 error = file_vnode(device_fd, &devvp);
3051                 if (error) {
3052                         return error;
3053                 }
3054                 error = vnode_getwithref(devvp);
3055                 if (error) {
3056                         file_drop(device_fd);
3057                         return error;
3058                 }
3059                 error = hfs_journal_replay(devvp, context);
3060                 file_drop(device_fd);
3061                 vnode_put(devvp);
3062                 return error;
3063         } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3064                 hfs_resize_debug = 1;
3065                 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3066                 return 0;
3067         }
3068
3069         return (ENOTSUP);
3070 }
3071
3072 /*
3073  * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3074  * the build_path ioctl.  We use it to leverage the code below that updates
3075  * the origin list cache if necessary
3076  */
3077
3078 int
3079 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3080 {
3081         int error;
3082         int lockflags;
3083         struct hfsmount *hfsmp;
3084
3085         hfsmp = VFSTOHFS(mp);
3086
3087         error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3088         if (error)
3089                 return (error);
3090
3091         /*
3092          * ADLs may need to have their origin state updated
3093          * since build_path needs a valid parent.  The same is true
3094          * for hardlinked files as well.  There isn't a race window here
3095          * in re-acquiring the cnode lock since we aren't pulling any data
3096          * out of the cnode; instead, we're going to the catalog.
3097          */
3098         if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3099             (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3100                 cnode_t *cp = VTOC(*vpp);
3101                 struct cat_desc cdesc;
3102
3103                 if (!hfs_haslinkorigin(cp)) {
3104                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3105                         error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3106                         hfs_systemfile_unlock(hfsmp, lockflags);
3107                         if (error == 0) {
3108                                 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3109                                         (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3110                                         hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3111                                 }
3112                                 cat_releasedesc(&cdesc);
3113                         }
3114                 }
3115                 hfs_unlock(cp);
3116         }
3117         return (0);
3118 }
3119
3120
3121 /*
3122  * Look up an HFS object by ID.
3123  *
3124  * The object is returned with an iocount reference and the cnode locked.
3125  *
3126  * If the object is a file then it will represent the data fork.
3127  */
3128 int
3129 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3130 {
3131         struct vnode *vp = NULLVP;
3132         struct cat_desc cndesc;
3133         struct cat_attr cnattr;
3134         struct cat_fork cnfork;
3135         u_int32_t linkref = 0;
3136         int error;
3137
3138         /* Check for cnids that should't be exported. */
3139         if ((cnid < kHFSFirstUserCatalogNodeID) &&
3140             (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3141                 return (ENOENT);
3142         }
3143         /* Don't export our private directories. */
3144         if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3145             cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3146                 return (ENOENT);
3147         }
3148         /*
3149          * Check the hash first
3150          */
3151         vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3152         if (vp) {
3153                 *vpp = vp;
3154                 return(0);
3155         }
3156
3157         bzero(&cndesc, sizeof(cndesc));
3158         bzero(&cnattr, sizeof(cnattr));
3159         bzero(&cnfork, sizeof(cnfork));
3160
3161         /*
3162          * Not in hash, lookup in catalog
3163          */
3164         if (cnid == kHFSRootParentID) {
3165                 static char hfs_rootname[] = "/";
3166
3167                 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3168                 cndesc.cd_namelen = 1;
3169                 cndesc.cd_parentcnid = kHFSRootParentID;
3170                 cndesc.cd_cnid = kHFSRootFolderID;
3171                 cndesc.cd_flags = CD_ISDIR;
3172
3173                 cnattr.ca_fileid = kHFSRootFolderID;
3174                 cnattr.ca_linkcount = 1;
3175                 cnattr.ca_entries = 1;
3176                 cnattr.ca_dircount = 1;
3177                 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3178         } else {
3179                 int lockflags;
3180                 cnid_t pid;
3181                 const char *nameptr;
3182
3183                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3184                 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3185                 hfs_systemfile_unlock(hfsmp, lockflags);
3186
3187                 if (error) {
3188                         *vpp = NULL;
3189                         return (error);
3190                 }
3191
3192                 /*
3193                  * Check for a raw hardlink inode and save its linkref.
3194                  */
3195                 pid = cndesc.cd_parentcnid;
3196                 nameptr = (const char *)cndesc.cd_nameptr;
3197
3198                 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3199                     (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3200                         linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3201
3202                 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3203                            (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3204                         linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3205
3206                 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3207                            (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3208                         *vpp = NULL;
3209                         cat_releasedesc(&cndesc);
3210                         return (ENOENT);  /* open unlinked file */
3211                 }
3212         }
3213
3214         /*
3215          * Finish initializing cnode descriptor for hardlinks.
3216          *
3217          * We need a valid name and parent for reverse lookups.
3218          */
3219         if (linkref) {
3220                 cnid_t nextlinkid;
3221                 cnid_t prevlinkid;
3222                 struct cat_desc linkdesc;
3223                 int lockflags;
3224
3225                 cnattr.ca_linkref = linkref;
3226
3227                 /*
3228                  * Pick up the first link in the chain and get a descriptor for it.
3229                  * This allows blind volfs paths to work for hardlinks.
3230                  */
3231                 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
3232                     (nextlinkid != 0)) {
3233                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3234                         error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3235                         hfs_systemfile_unlock(hfsmp, lockflags);
3236                         if (error == 0) {
3237                                 cat_releasedesc(&cndesc);
3238                                 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3239                         }
3240                 }
3241         }
3242
3243         if (linkref) {
3244                 int newvnode_flags = 0;
3245
3246                 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3247                                                                 &cnfork, &vp, &newvnode_flags);
3248                 if (error == 0) {
3249                         VTOC(vp)->c_flag |= C_HARDLINK;
3250                         vnode_setmultipath(vp);
3251                 }
3252         } else {
3253                 struct componentname cn;
3254                 int newvnode_flags = 0;
3255
3256                 /* Supply hfs_getnewvnode with a component name. */
3257                 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3258                 cn.cn_nameiop = LOOKUP;
3259                 cn.cn_flags = ISLASTCN | HASBUF;
3260                 cn.cn_context = NULL;
3261                 cn.cn_pnlen = MAXPATHLEN;
3262                 cn.cn_nameptr = cn.cn_pnbuf;
3263                 cn.cn_namelen = cndesc.cd_namelen;
3264                 cn.cn_hash = 0;
3265                 cn.cn_consume = 0;
3266                 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3267
3268                 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3269                                                                 &cnfork, &vp, &newvnode_flags);
3270
3271                 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3272                         hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3273                 }
3274                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3275         }
3276         cat_releasedesc(&cndesc);
3277
3278         *vpp = vp;
3279         if (vp && skiplock) {
3280                 hfs_unlock(VTOC(vp));
3281         }
3282         return (error);
3283 }
3284
3285
3286 /*
3287  * Flush out all the files in a filesystem.
3288  */
3289 static int
3290 #if QUOTA
3291 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3292 #else
3293 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3294 #endif /* QUOTA */
3295 {
3296         struct hfsmount *hfsmp;
3297         struct vnode *skipvp = NULLVP;
3298         int error;
3299 #if QUOTA
3300         int quotafilecnt;
3301         int i;
3302 #endif
3303
3304         hfsmp = VFSTOHFS(mp);
3305
3306 #if QUOTA
3307         /*
3308          * The open quota files have an indirect reference on
3309          * the root directory vnode.  We must account for this
3310          * extra reference when doing the intial vflush.
3311          */
3312         quotafilecnt = 0;
3313         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3314
3315                 /* Find out how many quota files we have open. */
3316                 for (i = 0; i < MAXQUOTAS; i++) {
3317                         if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3318                                 ++quotafilecnt;
3319                 }
3320
3321                 /* Obtain the root vnode so we can skip over it. */
3322                 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3323         }
3324 #endif /* QUOTA */
3325
3326         error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3327         if (error != 0)
3328                 return(error);
3329
3330         error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3331
3332 #if QUOTA
3333         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3334                 if (skipvp) {
3335                         /*
3336                          * See if there are additional references on the
3337                          * root vp besides the ones obtained from the open
3338                          * quota files and the hfs_chash_getvnode call above.
3339                          */
3340                         if ((error == 0) &&
3341                             (vnode_isinuse(skipvp,  quotafilecnt))) {
3342                                 error = EBUSY;  /* root directory is still open */
3343                         }
3344                         hfs_unlock(VTOC(skipvp));
3345                         vnode_put(skipvp);
3346                 }
3347                 if (error && (flags & FORCECLOSE) == 0)
3348                         return (error);
3349
3350                 for (i = 0; i < MAXQUOTAS; i++) {
3351                         if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3352                                 continue;
3353                         hfs_quotaoff(p, mp, i);
3354                 }
3355                 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3356         }
3357 #endif /* QUOTA */
3358
3359         return (error);
3360 }
3361
3362 /*
3363  * Update volume encoding bitmap (HFS Plus only)
3364  */
3365 __private_extern__
3366 void
3367 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3368 {
3369 #define  kIndexMacUkrainian     48  /* MacUkrainian encoding is 152 */
3370 #define  kIndexMacFarsi         49  /* MacFarsi encoding is 140 */
3371
3372         u_int32_t       index;
3373
3374         switch (encoding) {
3375         case kTextEncodingMacUkrainian:
3376                 index = kIndexMacUkrainian;
3377                 break;
3378         case kTextEncodingMacFarsi:
3379                 index = kIndexMacFarsi;
3380                 break;
3381         default:
3382                 index = encoding;
3383                 break;
3384         }
3385
3386         if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3387                 HFS_MOUNT_LOCK(hfsmp, TRUE)
3388                 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3389                 MarkVCBDirty(hfsmp);
3390                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3391         }
3392 }
3393
3394 /*
3395  * Update volume stats
3396  *
3397  * On journal volumes this will cause a volume header flush
3398  */
3399 int
3400 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3401 {
3402         struct timeval tv;
3403
3404         microtime(&tv);
3405
3406         lck_mtx_lock(&hfsmp->hfs_mutex);
3407
3408         MarkVCBDirty(hfsmp);
3409         hfsmp->hfs_mtime = tv.tv_sec;
3410
3411         switch (op) {
3412         case VOL_UPDATE:
3413                 break;
3414         case VOL_MKDIR:
3415                 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3416                         ++hfsmp->hfs_dircount;
3417                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3418                         ++hfsmp->vcbNmRtDirs;
3419                 break;
3420         case VOL_RMDIR:
3421                 if (hfsmp->hfs_dircount != 0)
3422                         --hfsmp->hfs_dircount;
3423                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3424                         --hfsmp->vcbNmRtDirs;
3425                 break;
3426         case VOL_MKFILE:
3427                 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3428                         ++hfsmp->hfs_filecount;
3429                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3430                         ++hfsmp->vcbNmFls;
3431                 break;
3432         case VOL_RMFILE:
3433                 if (hfsmp->hfs_filecount != 0)
3434                         --hfsmp->hfs_filecount;
3435                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3436                         --hfsmp->vcbNmFls;
3437                 break;
3438         }
3439
3440         lck_mtx_unlock(&hfsmp->hfs_mutex);
3441
3442         if (hfsmp->jnl) {
3443                 hfs_flushvolumeheader(hfsmp, 0, 0);
3444         }
3445
3446         return (0);
3447 }
3448
3449
3450 static int
3451 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3452 {
3453         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3454         struct filefork *fp;
3455         HFSMasterDirectoryBlock *mdb;
3456         struct buf *bp = NULL;
3457         int retval;
3458         int sectorsize;
3459         ByteCount namelen;
3460
3461         sectorsize = hfsmp->hfs_logical_block_size;
3462         retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3463         if (retval) {
3464                 if (bp)
3465                         buf_brelse(bp);
3466                 return retval;
3467         }
3468
3469         lck_mtx_lock(&hfsmp->hfs_mutex);
3470
3471         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3472
3473         mdb->drCrDate   = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3474         mdb->drLsMod    = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3475         mdb->drAtrb     = SWAP_BE16 (vcb->vcbAtrb);
3476         mdb->drNmFls    = SWAP_BE16 (vcb->vcbNmFls);
3477         mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3478         mdb->drClpSiz   = SWAP_BE32 (vcb->vcbClpSiz);
3479         mdb->drNxtCNID  = SWAP_BE32 (vcb->vcbNxtCNID);
3480         mdb->drFreeBks  = SWAP_BE16 (vcb->freeBlocks);
3481
3482         namelen = strlen((char *)vcb->vcbVN);
3483         retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3484         /* Retry with MacRoman in case that's how it was exported. */
3485         if (retval)
3486                 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3487
3488         mdb->drVolBkUp  = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3489         mdb->drWrCnt    = SWAP_BE32 (vcb->vcbWrCnt);
3490         mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3491         mdb->drFilCnt   = SWAP_BE32 (vcb->vcbFilCnt);
3492         mdb->drDirCnt   = SWAP_BE32 (vcb->vcbDirCnt);
3493
3494         bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3495
3496         fp = VTOF(vcb->extentsRefNum);
3497         mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3498         mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3499         mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3500         mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3501         mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3502         mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3503         mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3504         mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3505         FTOC(fp)->c_flag &= ~C_MODIFIED;
3506
3507         fp = VTOF(vcb->catalogRefNum);
3508         mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3509         mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3510         mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3511         mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3512         mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3513         mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3514         mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3515         mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3516         FTOC(fp)->c_flag &= ~C_MODIFIED;
3517
3518         MarkVCBClean( vcb );
3519
3520         lck_mtx_unlock(&hfsmp->hfs_mutex);
3521
3522         /* If requested, flush out the alternate MDB */
3523         if (altflush) {
3524                 struct buf *alt_bp = NULL;
3525
3526                 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3527                         bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3528
3529                         (void) VNOP_BWRITE(alt_bp);
3530                 } else if (alt_bp)
3531                         buf_brelse(alt_bp);
3532         }
3533
3534         if (waitfor != MNT_WAIT)
3535                 buf_bawrite(bp);
3536         else
3537                 retval = VNOP_BWRITE(bp);
3538
3539         return (retval);
3540 }
3541
3542 /*
3543  *  Flush any dirty in-memory mount data to the on-disk
3544  *  volume header.
3545  *
3546  *  Note: the on-disk volume signature is intentionally
3547  *  not flushed since the on-disk "H+" and "HX" signatures
3548  *  are always stored in-memory as "H+".
3549  */
3550 int
3551 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3552 {
3553         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3554         struct filefork *fp;
3555         HFSPlusVolumeHeader *volumeHeader, *altVH;
3556         int retval;
3557         struct buf *bp, *alt_bp;
3558         int i;
3559         daddr64_t priIDSector;
3560         int critical;
3561         u_int16_t  signature;
3562         u_int16_t  hfsversion;
3563
3564         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3565                 return(0);
3566         }
3567         if (hfsmp->hfs_flags & HFS_STANDARD) {
3568                 return hfs_flushMDB(hfsmp, waitfor, altflush);
3569         }
3570         critical = altflush;
3571         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3572                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3573
3574         if (hfs_start_transaction(hfsmp) != 0) {
3575             return EINVAL;
3576         }
3577
3578         bp = NULL;
3579         alt_bp = NULL;
3580
3581         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3582                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3583                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
3584         if (retval) {
3585                 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3586                 goto err_exit;
3587         }
3588
3589         volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3590                         HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3591
3592         /*
3593          * Sanity check what we just read.  If it's bad, try the alternate
3594          * instead.
3595          */
3596         signature = SWAP_BE16 (volumeHeader->signature);
3597         hfsversion   = SWAP_BE16 (volumeHeader->version);
3598         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3599             (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3600             (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3601                 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3602                       vcb->vcbVN, signature, hfsversion,
3603                       SWAP_BE32 (volumeHeader->blockSize),
3604                       hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3605                 hfs_mark_volume_inconsistent(hfsmp);
3606
3607                 if (hfsmp->hfs_alt_id_sector) {
3608                         retval = buf_meta_bread(hfsmp->hfs_devvp,
3609                             HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3610                             hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3611                         if (retval) {
3612                                 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3613                                 goto err_exit;
3614                         }
3615
3616                         altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3617                                 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3618                         signature = SWAP_BE16(altVH->signature);
3619                         hfsversion = SWAP_BE16(altVH->version);
3620
3621                         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3622                             (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3623                             (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3624                                 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3625                                     vcb->vcbVN, signature, hfsversion,
3626                                     SWAP_BE32(altVH->blockSize));
3627                                 retval = EIO;
3628                                 goto err_exit;
3629                         }
3630
3631                         /* The alternate is plausible, so use it. */
3632                         bcopy(altVH, volumeHeader, kMDBSize);
3633                         buf_brelse(alt_bp);
3634                         alt_bp = NULL;
3635                 } else {
3636                         /* No alternate VH, nothing more we can do. */
3637                         retval = EIO;
3638                         goto err_exit;
3639                 }
3640         }
3641
3642         if (hfsmp->jnl) {
3643                 journal_modify_block_start(hfsmp->jnl, bp);
3644         }
3645
3646         /*
3647          * For embedded HFS+ volumes, update create date if it changed
3648          * (ie from a setattrlist call)
3649          */
3650         if ((vcb->hfsPlusIOPosOffset != 0) &&
3651             (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3652                 struct buf *bp2;
3653                 HFSMasterDirectoryBlock *mdb;
3654
3655                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3656                                 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3657                                 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3658                 if (retval) {
3659                         if (bp2)
3660                                 buf_brelse(bp2);
3661                         retval = 0;
3662                 } else {
3663                         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3664                                 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3665
3666                         if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3667                           {
3668                                 if (hfsmp->jnl) {
3669                                     journal_modify_block_start(hfsmp->jnl, bp2);
3670                                 }
3671
3672                                 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);       /* pick up the new create date */
3673
3674                                 if (hfsmp->jnl) {
3675                                         journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3676                                 } else {
3677                                         (void) VNOP_BWRITE(bp2);                /* write out the changes */
3678                                 }
3679                           }
3680                         else
3681                           {
3682                                 buf_brelse(bp2);                                                /* just release it */
3683                           }
3684                   }
3685         }
3686
3687         lck_mtx_lock(&hfsmp->hfs_mutex);
3688
3689         /* Note: only update the lower 16 bits worth of attributes */
3690         volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
3691         volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3692         if (hfsmp->jnl) {
3693                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3694         } else {
3695                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3696         }
3697         volumeHeader->createDate        = SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
3698         volumeHeader->modifyDate        = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3699         volumeHeader->backupDate        = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3700         volumeHeader->fileCount         = SWAP_BE32 (vcb->vcbFilCnt);
3701         volumeHeader->folderCount       = SWAP_BE32 (vcb->vcbDirCnt);
3702         volumeHeader->totalBlocks       = SWAP_BE32 (vcb->totalBlocks);
3703         volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks);
3704         volumeHeader->nextAllocation    = SWAP_BE32 (vcb->nextAllocation);
3705         volumeHeader->rsrcClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3706         volumeHeader->dataClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3707         volumeHeader->nextCatalogID     = SWAP_BE32 (vcb->vcbNxtCNID);
3708         volumeHeader->writeCount        = SWAP_BE32 (vcb->vcbWrCnt);
3709         volumeHeader->encodingsBitmap   = SWAP_BE64 (vcb->encodingsBitmap);
3710
3711         if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3712                 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3713                 critical = 1;
3714         }
3715
3716         /*
3717          * System files are only dirty when altflush is set.
3718          */
3719         if (altflush == 0) {
3720                 goto done;
3721         }
3722
3723         /* Sync Extents over-flow file meta data */
3724         fp = VTOF(vcb->extentsRefNum);
3725         if (FTOC(fp)->c_flag & C_MODIFIED) {
3726                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3727                         volumeHeader->extentsFile.extents[i].startBlock =
3728                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3729                         volumeHeader->extentsFile.extents[i].blockCount =
3730                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3731                 }
3732                 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3733                 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3734                 volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3735                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3736         }
3737
3738         /* Sync Catalog file meta data */
3739         fp = VTOF(vcb->catalogRefNum);
3740         if (FTOC(fp)->c_flag & C_MODIFIED) {
3741                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3742                         volumeHeader->catalogFile.extents[i].startBlock =
3743                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3744                         volumeHeader->catalogFile.extents[i].blockCount =
3745                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3746                 }
3747                 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3748                 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3749                 volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3750                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3751         }
3752
3753         /* Sync Allocation file meta data */
3754         fp = VTOF(vcb->allocationsRefNum);
3755         if (FTOC(fp)->c_flag & C_MODIFIED) {
3756                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3757                         volumeHeader->allocationFile.extents[i].startBlock =
3758                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3759                         volumeHeader->allocationFile.extents[i].blockCount =
3760                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3761                 }
3762                 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3763                 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3764                 volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3765                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3766         }
3767
3768         /* Sync Attribute file meta data */
3769         if (hfsmp->hfs_attribute_vp) {
3770                 fp = VTOF(hfsmp->hfs_attribute_vp);
3771                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3772                         volumeHeader->attributesFile.extents[i].startBlock =
3773                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3774                         volumeHeader->attributesFile.extents[i].blockCount =
3775                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3776                 }
3777                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3778                 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3779                 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3780                 volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3781         }
3782
3783         /* Sync Startup file meta data */
3784         if (hfsmp->hfs_startup_vp) {
3785                 fp = VTOF(hfsmp->hfs_startup_vp);
3786                 if (FTOC(fp)->c_flag & C_MODIFIED) {
3787                         for (i = 0; i < kHFSPlusExtentDensity; i++) {
3788                                 volumeHeader->startupFile.extents[i].startBlock =
3789                                         SWAP_BE32 (fp->ff_extents[i].startBlock);
3790                                 volumeHeader->startupFile.extents[i].blockCount =
3791                                         SWAP_BE32 (fp->ff_extents[i].blockCount);
3792                         }
3793                         volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3794                         volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3795                         volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3796                         FTOC(fp)->c_flag &= ~C_MODIFIED;
3797                 }
3798         }
3799
3800 done:
3801         MarkVCBClean(hfsmp);
3802         lck_mtx_unlock(&hfsmp->hfs_mutex);
3803
3804         /* If requested, flush out the alternate volume header */
3805         if (altflush && hfsmp->hfs_alt_id_sector) {
3806                 if (buf_meta_bread(hfsmp->hfs_devvp,
3807                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3808                                 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3809                         if (hfsmp->jnl) {
3810                                 journal_modify_block_start(hfsmp->jnl, alt_bp);
3811                         }
3812
3813                         bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3814                                         HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3815                                         kMDBSize);
3816
3817                         if (hfsmp->jnl) {
3818                                 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3819                         } else {
3820                                 (void) VNOP_BWRITE(alt_bp);
3821                         }
3822                 } else if (alt_bp)
3823                         buf_brelse(alt_bp);
3824         }
3825
3826         if (hfsmp->jnl) {
3827                 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3828         } else {
3829                 if (waitfor != MNT_WAIT)
3830                         buf_bawrite(bp);
3831                 else {
3832                     retval = VNOP_BWRITE(bp);
3833                     /* When critical data changes, flush the device cache */
3834                     if (critical && (retval == 0)) {
3835                         (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3836                                          NULL, FWRITE, NULL);
3837                     }
3838                 }
3839         }
3840         hfs_end_transaction(hfsmp);
3841
3842         return (retval);
3843
3844 err_exit:
3845         if (alt_bp)
3846                 buf_brelse(alt_bp);
3847         if (bp)
3848                 buf_brelse(bp);
3849         hfs_end_transaction(hfsmp);
3850         return retval;
3851 }
3852
3853
3854 /*
3855  * Extend a file system.
3856  */
3857 int
3858 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3859 {
3860         struct proc *p = vfs_context_proc(context);
3861         kauth_cred_t cred = vfs_context_ucred(context);
3862         struct  vnode *vp;
3863         struct  vnode *devvp;
3864         struct  buf *bp;
3865         struct  filefork *fp = NULL;
3866         ExtendedVCB  *vcb;
3867         struct  cat_fork forkdata;
3868         u_int64_t  oldsize;
3869         u_int64_t  newblkcnt;
3870         u_int64_t  prev_phys_block_count;
3871         u_int32_t  addblks;
3872         u_int64_t  sectorcnt;
3873         u_int32_t  sectorsize;
3874         u_int32_t  phys_sectorsize;
3875         daddr64_t  prev_alt_sector;
3876         daddr_t    bitmapblks;
3877         int  lockflags = 0;
3878         int  error;
3879         int64_t oldBitmapSize;
3880         Boolean  usedExtendFileC = false;
3881         int transaction_begun = 0;
3882
3883         devvp = hfsmp->hfs_devvp;
3884         vcb = HFSTOVCB(hfsmp);
3885
3886         /*
3887          * - HFS Plus file systems only.
3888          * - Journaling must be enabled.
3889          * - No embedded volumes.
3890          */
3891         if ((vcb->vcbSigWord == kHFSSigWord) ||
3892              (hfsmp->jnl == NULL) ||
3893              (vcb->hfsPlusIOPosOffset != 0)) {
3894                 return (EPERM);
3895         }
3896         /*
3897          * If extending file system by non-root, then verify
3898          * ownership and check permissions.
3899          */
3900         if (suser(cred, NULL)) {
3901                 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3902
3903                 if (error)
3904                         return (error);
3905                 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3906                 if (error == 0) {
3907                         error = hfs_write_access(vp, cred, p, false);
3908                 }
3909                 hfs_unlock(VTOC(vp));
3910                 vnode_put(vp);
3911                 if (error)
3912                         return (error);
3913
3914                 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3915                 if (error)
3916                         return (error);
3917         }
3918         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3919                 return (ENXIO);
3920         }
3921         if (sectorsize != hfsmp->hfs_logical_block_size) {
3922                 return (ENXIO);
3923         }
3924         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3925                 return (ENXIO);
3926         }
3927         if ((sectorsize * sectorcnt) < newsize) {
3928                 printf("hfs_extendfs: not enough space on device\n");
3929                 return (ENOSPC);
3930         }
3931         error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3932         if (error) {
3933                 if ((error != ENOTSUP) && (error != ENOTTY)) {
3934                         return (ENXIO);
3935                 }
3936                 /* If ioctl is not supported, force physical and logical sector size to be same */
3937                 phys_sectorsize = sectorsize;
3938         }
3939         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3940
3941         /*
3942          * Validate new size.
3943          */
3944         if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3945                 printf("hfs_extendfs: invalid size\n");
3946                 return (EINVAL);
3947         }
3948         newblkcnt = newsize / vcb->blockSize;
3949         if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3950                 return (EOVERFLOW);
3951
3952         addblks = newblkcnt - vcb->totalBlocks;
3953
3954         if (hfs_resize_debug) {
3955                 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3956                 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3957         }
3958         printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3959
3960         HFS_MOUNT_LOCK(hfsmp, TRUE);
3961         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3962                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3963                 error = EALREADY;
3964                 goto out;
3965         }
3966         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3967         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3968
3969         /*
3970          * Enclose changes inside a transaction.
3971          */
3972         if (hfs_start_transaction(hfsmp) != 0) {
3973                 error = EINVAL;
3974                 goto out;
3975         }
3976         transaction_begun = 1;
3977
3978         /*
3979          * Note: we take the attributes lock in case we have an attribute data vnode
3980          * which needs to change size.
3981          */
3982         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3983         vp = vcb->allocationsRefNum;
3984         fp = VTOF(vp);
3985         bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3986
3987         /*
3988          * Calculate additional space required (if any) by allocation bitmap.
3989          */
3990         oldBitmapSize = fp->ff_size;
3991         bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3992         if (bitmapblks > (daddr_t)fp->ff_blocks)
3993                 bitmapblks -= fp->ff_blocks;
3994         else
3995                 bitmapblks = 0;
3996
3997         /*
3998          * The allocation bitmap can contain unused bits that are beyond end of
3999          * current volume's allocation blocks.  Usually they are supposed to be
4000          * zero'ed out but there can be cases where they might be marked as used.
4001          * After extending the file system, those bits can represent valid
4002          * allocation blocks, so we mark all the bits from the end of current
4003          * volume to end of allocation bitmap as "free".
4004          */
4005         BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4006                         (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4007
4008         if (bitmapblks > 0) {
4009                 daddr64_t blkno;
4010                 daddr_t blkcnt;
4011                 off_t bytesAdded;
4012
4013                 /*
4014                  * Get the bitmap's current size (in allocation blocks) so we know
4015                  * where to start zero filling once the new space is added.  We've
4016                  * got to do this before the bitmap is grown.
4017                  */
4018                 blkno  = (daddr64_t)fp->ff_blocks;
4019
4020                 /*
4021                  * Try to grow the allocation file in the normal way, using allocation
4022                  * blocks already existing in the file system.  This way, we might be
4023                  * able to grow the bitmap contiguously, or at least in the metadata
4024                  * zone.
4025                  */
4026                 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4027                                 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4028                                 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4029
4030                 if (error == 0) {
4031                         usedExtendFileC = true;
4032                 } else {
4033                         /*
4034                          * If the above allocation failed, fall back to allocating the new
4035                          * extent of the bitmap from the space we're going to add.  Since those
4036                          * blocks don't yet belong to the file system, we have to update the
4037                          * extent list directly, and manually adjust the file size.
4038                          */
4039                         bytesAdded = 0;
4040                         error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4041                         if (error) {
4042                                 printf("hfs_extendfs: error %d adding extents\n", error);
4043                                 goto out;
4044                         }
4045                         fp->ff_blocks += bitmapblks;
4046                         VTOC(vp)->c_blocks = fp->ff_blocks;
4047                         VTOC(vp)->c_flag |= C_MODIFIED;
4048                 }
4049
4050                 /*
4051                  * Update the allocation file's size to include the newly allocated
4052                  * blocks.  Note that ExtendFileC doesn't do this, which is why this
4053                  * statement is outside the above "if" statement.
4054                  */
4055                 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4056
4057                 /*
4058                  * Zero out the new bitmap blocks.
4059                  */
4060                 {
4061
4062                         bp = NULL;
4063                         blkcnt = bitmapblks;
4064                         while (blkcnt > 0) {
4065                                 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4066                                 if (error) {
4067                                         if (bp) {
4068                                                 buf_brelse(bp);
4069                                         }
4070                                         break;
4071                                 }
4072                                 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4073                                 buf_markaged(bp);
4074                                 error = (int)buf_bwrite(bp);
4075                                 if (error)
4076                                         break;
4077                                 --blkcnt;
4078                                 ++blkno;
4079                         }
4080                 }
4081                 if (error) {
4082                         printf("hfs_extendfs: error %d  clearing blocks\n", error);
4083                         goto out;
4084                 }
4085                 /*
4086                  * Mark the new bitmap space as allocated.
4087                  *
4088                  * Note that ExtendFileC will have marked any blocks it allocated, so
4089                  * this is only needed if we used AddFileExtent.  Also note that this
4090                  * has to come *after* the zero filling of new blocks in the case where
4091                  * we used AddFileExtent (since the part of the bitmap we're touching
4092                  * is in those newly allocated blocks).
4093                  */
4094                 if (!usedExtendFileC) {
4095                         error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4096                         if (error) {
4097                                 printf("hfs_extendfs: error %d setting bitmap\n", error);
4098                                 goto out;
4099                         }
4100                         vcb->freeBlocks -= bitmapblks;
4101                 }
4102         }
4103         /*
4104          * Mark the new alternate VH as allocated.
4105          */
4106         if (vcb->blockSize == 512)
4107                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4108         else
4109                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4110         if (error) {
4111                 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4112                 goto out;
4113         }
4114         /*
4115          * Mark the old alternate VH as free.
4116          */
4117         if (vcb->blockSize == 512)
4118                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4119         else
4120                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4121         /*
4122          * Adjust file system variables for new space.
4123          */
4124         prev_phys_block_count = hfsmp->hfs_logical_block_count;
4125         prev_alt_sector = hfsmp->hfs_alt_id_sector;
4126
4127         vcb->totalBlocks += addblks;
4128         vcb->freeBlocks += addblks;
4129         hfsmp->hfs_logical_block_count = newsize / sectorsize;
4130         hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4131                                   HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4132         MarkVCBDirty(vcb);
4133         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4134         if (error) {
4135                 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4136                 /*
4137                  * Restore to old state.
4138                  */
4139                 if (usedExtendFileC) {
4140                         (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4141                                                                  FTOC(fp)->c_fileid, false);
4142                 } else {
4143                         fp->ff_blocks -= bitmapblks;
4144                         fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4145                         /*
4146                          * No need to mark the excess blocks free since those bitmap blocks
4147                          * are no longer part of the bitmap.  But we do need to undo the
4148                          * effect of the "vcb->freeBlocks -= bitmapblks" above.
4149                          */
4150                         vcb->freeBlocks += bitmapblks;
4151                 }
4152                 vcb->totalBlocks -= addblks;
4153                 vcb->freeBlocks -= addblks;
4154                 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4155                 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4156                 MarkVCBDirty(vcb);
4157                 if (vcb->blockSize == 512) {
4158                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4159                                 hfs_mark_volume_inconsistent(hfsmp);
4160                         }
4161                 } else {
4162                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4163                                 hfs_mark_volume_inconsistent(hfsmp);
4164                         }
4165                 }
4166                 goto out;
4167         }
4168         /*
4169          * Invalidate the old alternate volume header.
4170          */
4171         bp = NULL;
4172         if (prev_alt_sector) {
4173                 if (buf_meta_bread(hfsmp->hfs_devvp,
4174                                 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4175                                 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4176                         journal_modify_block_start(hfsmp->jnl, bp);
4177
4178                         bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4179
4180                         journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4181                 } else if (bp) {
4182                         buf_brelse(bp);
4183                 }
4184         }
4185
4186         /*
4187          * Update the metadata zone size based on current volume size
4188          */
4189         hfs_metadatazone_init(hfsmp, false);
4190
4191         /*
4192          * Adjust the size of hfsmp->hfs_attrdata_vp
4193          */
4194         if (hfsmp->hfs_attrdata_vp) {
4195                 struct cnode *attr_cp;
4196                 struct filefork *attr_fp;
4197
4198                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4199                         attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4200                         attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4201
4202                         attr_cp->c_blocks = newblkcnt;
4203                         attr_fp->ff_blocks = newblkcnt;
4204                         attr_fp->ff_extents[0].blockCount = newblkcnt;
4205                         attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4206                         ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4207                         vnode_put(hfsmp->hfs_attrdata_vp);
4208                 }
4209         }
4210
4211         /*
4212          * Update the R/B Tree if necessary.  Since we don't have to drop the systemfile
4213          * locks in the middle of these operations like we do in the truncate case
4214          * where we have to relocate files, we can only update the red-black tree
4215          * if there were actual changes made to the bitmap.  Also, we can't really scan the
4216          * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4217          * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4218          * not currently controlled by the tree.
4219          *
4220          * We only update hfsmp->allocLimit if totalBlocks actually increased.
4221          */
4222
4223         if (error == 0) {
4224                 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4225         }
4226
4227         /* Log successful extending */
4228         printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4229                hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4230
4231 out:
4232         if (error && fp) {
4233                 /* Restore allocation fork. */
4234                 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4235                 VTOC(vp)->c_blocks = fp->ff_blocks;
4236
4237         }
4238
4239         HFS_MOUNT_LOCK(hfsmp, TRUE);
4240         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4241         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4242         if (lockflags) {
4243                 hfs_systemfile_unlock(hfsmp, lockflags);
4244         }
4245         if (transaction_begun) {
4246                 hfs_end_transaction(hfsmp);
4247         }
4248
4249         return MacToVFSError(error);
4250 }
4251
4252 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
4253
4254 /*
4255  * Truncate a file system (while still mounted).
4256  */
4257 int
4258 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4259 {
4260         struct  buf *bp = NULL;
4261         u_int64_t oldsize;
4262         u_int32_t newblkcnt;
4263         u_int32_t reclaimblks = 0;
4264         int lockflags = 0;
4265         int transaction_begun = 0;
4266         Boolean updateFreeBlocks = false;
4267         Boolean disable_sparse = false;
4268         int error = 0;
4269
4270         lck_mtx_lock(&hfsmp->hfs_mutex);
4271         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4272                 lck_mtx_unlock(&hfsmp->hfs_mutex);
4273                 return (EALREADY);
4274         }
4275         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4276         hfsmp->hfs_resize_blocksmoved = 0;
4277         hfsmp->hfs_resize_totalblocks = 0;
4278         hfsmp->hfs_resize_progress = 0;
4279         lck_mtx_unlock(&hfsmp->hfs_mutex);
4280
4281         /*
4282          * - Journaled HFS Plus volumes only.
4283          * - No embedded volumes.
4284          */
4285         if ((hfsmp->jnl == NULL) ||
4286             (hfsmp->hfsPlusIOPosOffset != 0)) {
4287                 error = EPERM;
4288                 goto out;
4289         }
4290         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4291         newblkcnt = newsize / hfsmp->blockSize;
4292         reclaimblks = hfsmp->totalBlocks - newblkcnt;
4293
4294         if (hfs_resize_debug) {
4295                 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4296                 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4297         }
4298
4299         /* Make sure new size is valid. */
4300         if ((newsize < HFS_MIN_SIZE) ||
4301             (newsize >= oldsize) ||
4302             (newsize % hfsmp->hfs_logical_block_size) ||
4303             (newsize % hfsmp->hfs_physical_block_size)) {
4304                 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4305                 error = EINVAL;
4306                 goto out;
4307         }
4308
4309         /*
4310          * Make sure that the file system has enough free blocks reclaim.
4311          *
4312          * Before resize, the disk is divided into four zones -
4313          *      A. Allocated_Stationary - These are allocated blocks that exist
4314          *         before the new end of disk.  These blocks will not be
4315          *         relocated or modified during resize.
4316          *      B. Free_Stationary - These are free blocks that exist before the
4317          *         new end of disk.  These blocks can be used for any new
4318          *         allocations during resize, including allocation for relocating
4319          *         data from the area of disk being reclaimed.
4320          *      C. Allocated_To-Reclaim - These are allocated blocks that exist
4321          *         beyond the new end of disk.  These blocks need to be reclaimed
4322          *         during resize by allocating equal number of blocks in Free
4323          *         Stationary zone and copying the data.
4324          *      D. Free_To-Reclaim - These are free blocks that exist beyond the
4325          *         new end of disk.  Nothing special needs to be done to reclaim
4326          *         them.
4327          *
4328          * Total number of blocks on the disk before resize:
4329          * ------------------------------------------------
4330          *      Total Blocks = Allocated_Stationary + Free_Stationary +
4331          *                     Allocated_To-Reclaim + Free_To-Reclaim
4332          *
4333          * Total number of blocks that need to be reclaimed:
4334          * ------------------------------------------------
4335          *      Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4336          *
4337          * Note that the check below also makes sure that we have enough space
4338          * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4339          * Therefore we do not need to check total number of blocks to relocate
4340          * later in the code.
4341          *
4342          * The condition below gets converted to:
4343          *
4344          * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4345          *
4346          * which is equivalent to:
4347          *
4348          *              Allocated To-Reclaim >= Free Stationary
4349          */
4350         if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4351                 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4352                 error = ENOSPC;
4353                 goto out;
4354         }
4355
4356         /* Start with a clean journal. */
4357         hfs_journal_flush(hfsmp, TRUE);
4358
4359         if (hfs_start_transaction(hfsmp) != 0) {
4360                 error = EINVAL;
4361                 goto out;
4362         }
4363         transaction_begun = 1;
4364
4365         /* Take the bitmap lock to update the alloc limit field */
4366         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4367
4368         /*
4369          * Prevent new allocations from using the part we're trying to truncate.
4370          *
4371          * NOTE: allocLimit is set to the allocation block number where the new
4372          * alternate volume header will be.  That way there will be no files to
4373          * interfere with allocating the new alternate volume header, and no files
4374          * in the allocation blocks beyond (i.e. the blocks we're trying to
4375          * truncate away.
4376          *
4377          * Also shrink the red-black tree if needed.
4378          */
4379         if (hfsmp->blockSize == 512) {
4380                 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4381         }
4382         else {
4383                 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4384         }
4385
4386         /* Sparse devices use first fit allocation which is not ideal
4387          * for volume resize which requires best fit allocation.  If a
4388          * sparse device is being truncated, disable the sparse device
4389          * property temporarily for the duration of resize.  Also reset
4390          * the free extent cache so that it is rebuilt as sorted by
4391          * totalBlocks instead of startBlock.
4392          *
4393          * Note that this will affect all allocations on the volume and
4394          * ideal fix would be just to modify resize-related allocations,
4395          * but it will result in complexity like handling of two free
4396          * extent caches sorted differently, etc.  So we stick to this
4397          * solution for now.
4398          */
4399         HFS_MOUNT_LOCK(hfsmp, TRUE);
4400         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4401                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4402                 ResetVCBFreeExtCache(hfsmp);
4403                 disable_sparse = true;
4404         }
4405
4406         /*
4407          * Update the volume free block count to reflect the total number
4408          * of free blocks that will exist after a successful resize.
4409          * Relocation of extents will result in no net change in the total
4410          * free space on the disk.  Therefore the code that allocates
4411          * space for new extent and deallocates the old extent explicitly
4412          * prevents updating the volume free block count.  It will also
4413          * prevent false disk full error when the number of blocks in
4414          * an extent being relocated is more than the free blocks that
4415          * will exist after the volume is resized.
4416          */
4417         hfsmp->freeBlocks -= reclaimblks;
4418         updateFreeBlocks = true;
4419         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4420
4421         if (lockflags) {
4422                 hfs_systemfile_unlock(hfsmp, lockflags);
4423                 lockflags = 0;
4424         }
4425
4426         /*
4427          * Update the metadata zone size to match the new volume size,
4428          * and if it too less, metadata zone might be disabled.
4429          */
4430         hfs_metadatazone_init(hfsmp, false);
4431
4432         /*
4433          * If some files have blocks at or beyond the location of the
4434          * new alternate volume header, recalculate free blocks and
4435          * reclaim blocks.  Otherwise just update free blocks count.
4436          *
4437          * The current allocLimit is set to the location of new alternate
4438          * volume header, and reclaimblks are the total number of blocks
4439          * that need to be reclaimed.  So the check below is really
4440          * ignoring the blocks allocated for old alternate volume header.
4441          */
4442         if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4443                 /*
4444                  * hfs_reclaimspace will use separate transactions when
4445                  * relocating files (so we don't overwhelm the journal).
4446                  */
4447                 hfs_end_transaction(hfsmp);
4448                 transaction_begun = 0;
4449
4450                 /* Attempt to reclaim some space. */
4451                 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4452                 if (error != 0) {
4453                         printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4454                         error = ENOSPC;
4455                         goto out;
4456                 }
4457                 if (hfs_start_transaction(hfsmp) != 0) {
4458                         error = EINVAL;
4459                         goto out;
4460                 }
4461                 transaction_begun = 1;
4462
4463                 /* Check if we're clear now. */
4464                 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4465                 if (error != 0) {
4466                         printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4467                         error = EAGAIN;  /* tell client to try again */
4468                         goto out;
4469                 }
4470         }
4471
4472         /*
4473          * Note: we take the attributes lock in case we have an attribute data vnode
4474          * which needs to change size.
4475          */
4476         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4477
4478         /*
4479          * Allocate last 1KB for alternate volume header.
4480          */
4481         error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4482         if (error) {
4483                 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4484                 goto out;
4485         }
4486
4487         /*
4488          * Mark the old alternate volume header as free.
4489          * We don't bother shrinking allocation bitmap file.
4490          */
4491         if (hfsmp->blockSize == 512)
4492                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4493         else
4494                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4495
4496         /*
4497          * Invalidate the existing alternate volume header.
4498          *
4499          * Don't include this in a transaction (don't call journal_modify_block)
4500          * since this block will be outside of the truncated file system!
4501          */
4502         if (hfsmp->hfs_alt_id_sector) {
4503                 error = buf_meta_bread(hfsmp->hfs_devvp,
4504                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4505                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4506                 if (error == 0) {
4507                         bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4508                         (void) VNOP_BWRITE(bp);
4509                 } else {
4510                         if (bp) {
4511                                 buf_brelse(bp);
4512                         }
4513                 }
4514                 bp = NULL;
4515         }
4516
4517         /* Log successful shrinking. */
4518         printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4519                hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4520
4521         /*
4522          * Adjust file system variables and flush them to disk.
4523          */
4524         hfsmp->totalBlocks = newblkcnt;
4525         hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4526         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4527         MarkVCBDirty(hfsmp);
4528         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4529         if (error)
4530                 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4531
4532         /*
4533          * Adjust the size of hfsmp->hfs_attrdata_vp
4534          */
4535         if (hfsmp->hfs_attrdata_vp) {
4536                 struct cnode *cp;
4537                 struct filefork *fp;
4538
4539                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4540                         cp = VTOC(hfsmp->hfs_attrdata_vp);
4541                         fp = VTOF(hfsmp->hfs_attrdata_vp);
4542
4543                         cp->c_blocks = newblkcnt;
4544                         fp->ff_blocks = newblkcnt;
4545                         fp->ff_extents[0].blockCount = newblkcnt;
4546                         fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4547                         ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4548                         vnode_put(hfsmp->hfs_attrdata_vp);
4549                 }
4550         }
4551
4552 out:
4553         /*
4554          * Update the allocLimit to acknowledge the last one or two blocks now.
4555          * Add it to the tree as well if necessary.
4556          */
4557         UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4558
4559         HFS_MOUNT_LOCK(hfsmp, TRUE);
4560         if (disable_sparse == true) {
4561                 /* Now that resize is completed, set the volume to be sparse
4562                  * device again so that all further allocations will be first
4563                  * fit instead of best fit.  Reset free extent cache so that
4564                  * it is rebuilt.
4565                  */
4566                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4567                 ResetVCBFreeExtCache(hfsmp);
4568         }
4569
4570         if (error && (updateFreeBlocks == true)) {
4571                 hfsmp->freeBlocks += reclaimblks;
4572         }
4573
4574         if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4575                 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4576         }
4577         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4578         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4579
4580         /* On error, reset the metadata zone for original volume size */
4581         if (error && (updateFreeBlocks == true)) {
4582                 hfs_metadatazone_init(hfsmp, false);
4583         }
4584
4585         if (lockflags) {
4586                 hfs_systemfile_unlock(hfsmp, lockflags);
4587         }
4588         if (transaction_begun) {
4589                 hfs_end_transaction(hfsmp);
4590                 hfs_journal_flush(hfsmp, FALSE);
4591                 /* Just to be sure, sync all data to the disk */
4592                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4593         }
4594
4595         return MacToVFSError(error);
4596 }
4597
4598
4599 /*
4600  * Invalidate the physical block numbers associated with buffer cache blocks
4601  * in the given extent of the given vnode.
4602  */
4603 struct hfs_inval_blk_no {
4604         daddr64_t sectorStart;
4605         daddr64_t sectorCount;
4606 };
4607 static int
4608 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4609 {
4610         daddr64_t blkno;
4611         struct hfs_inval_blk_no *args;
4612
4613         blkno = buf_blkno(bp);
4614         args = args_in;
4615
4616         if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4617                 buf_setblkno(bp, buf_lblkno(bp));
4618
4619         return BUF_RETURNED;
4620 }
4621 static void
4622 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4623 {
4624         struct hfs_inval_blk_no args;
4625         args.sectorStart = sectorStart;
4626         args.sectorCount = sectorCount;
4627
4628         buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4629 }
4630
4631
4632 /*
4633  * Copy the contents of an extent to a new location.  Also invalidates the
4634  * physical block number of any buffer cache block in the copied extent
4635  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4636  * determine the new physical block number).
4637  */
4638 static int
4639 hfs_copy_extent(
4640         struct hfsmount *hfsmp,
4641         struct vnode *vp,               /* The file whose extent is being copied. */
4642         u_int32_t oldStart,             /* The start of the source extent. */
4643         u_int32_t newStart,             /* The start of the destination extent. */
4644         u_int32_t blockCount,   /* The number of allocation blocks to copy. */
4645         vfs_context_t context)
4646 {
4647         int err = 0;
4648         size_t bufferSize;
4649         void *buffer = NULL;
4650         struct vfsioattr ioattr;
4651         buf_t bp = NULL;
4652         off_t resid;
4653         size_t ioSize;
4654         u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
4655         daddr64_t srcSector, destSector;
4656         u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4657 #if CONFIG_PROTECT
4658         int cpenabled = 0;
4659 #endif
4660
4661         /*
4662          * Sanity check that we have locked the vnode of the file we're copying.
4663          *
4664          * But since hfs_systemfile_lock() doesn't actually take the lock on
4665          * the allocation file if a journal is active, ignore the check if the
4666          * file being copied is the allocation file.
4667          */
4668         struct cnode *cp = VTOC(vp);
4669         if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4670                 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4671
4672 #if CONFIG_PROTECT
4673         /* Prepare the CP blob and get it ready for use */
4674         if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4675                         cp_fs_protected (hfsmp->hfs_mp)) {
4676                 int cp_err = 0;
4677                 cp_err = cp_handle_relocate (cp);
4678                 if (cp_err) {
4679                         /*
4680                          * can't copy the file because we couldn't set up keys.
4681                          * bail out
4682                          */
4683                         return cp_err;
4684                 }
4685                 else {
4686                         cpenabled = 1;
4687                 }
4688         }
4689 #endif
4690
4691         /*
4692          * Determine the I/O size to use
4693          *
4694          * NOTE: Many external drives will result in an ioSize of 128KB.
4695          * TODO: Should we use a larger buffer, doing several consecutive
4696          * reads, then several consecutive writes?
4697          */
4698         vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4699         bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4700         if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4701                 return ENOMEM;
4702
4703         /* Get a buffer for doing the I/O */
4704         bp = buf_alloc(hfsmp->hfs_devvp);
4705         buf_setdataptr(bp, (uintptr_t)buffer);
4706
4707         resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4708         srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4709         destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4710         while (resid > 0) {
4711                 ioSize = MIN(bufferSize, (size_t) resid);
4712                 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4713
4714                 /* Prepare the buffer for reading */
4715                 buf_reset(bp, B_READ);
4716                 buf_setsize(bp, ioSize);
4717                 buf_setcount(bp, ioSize);
4718                 buf_setblkno(bp, srcSector);
4719                 buf_setlblkno(bp, srcSector);
4720
4721                 /* Attach the CP to the buffer */
4722 #if CONFIG_PROTECT
4723                 if (cpenabled) {
4724                         buf_setcpaddr (bp, cp->c_cpentry);
4725                 }
4726 #endif
4727
4728                 /* Do the read */
4729                 err = VNOP_STRATEGY(bp);
4730                 if (!err)
4731                         err = buf_biowait(bp);
4732                 if (err) {
4733                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4734                         break;
4735                 }
4736
4737                 /* Prepare the buffer for writing */
4738                 buf_reset(bp, B_WRITE);
4739                 buf_setsize(bp, ioSize);
4740                 buf_setcount(bp, ioSize);
4741                 buf_setblkno(bp, destSector);
4742                 buf_setlblkno(bp, destSector);
4743                 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4744                         buf_markfua(bp);
4745
4746 #if CONFIG_PROTECT
4747                 /* Attach the CP to the buffer */
4748                 if (cpenabled) {
4749                         buf_setcpaddr (bp, cp->c_cpentry);
4750                 }
4751 #endif
4752
4753                 /* Do the write */
4754                 vnode_startwrite(hfsmp->hfs_devvp);
4755                 err = VNOP_STRATEGY(bp);
4756                 if (!err)
4757                         err = buf_biowait(bp);
4758                 if (err) {
4759                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4760                         break;
4761                 }
4762
4763                 resid -= ioSize;
4764                 srcSector += ioSizeSectors;
4765                 destSector += ioSizeSectors;
4766         }
4767         if (bp)
4768                 buf_free(bp);
4769         if (buffer)
4770                 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4771
4772         /* Make sure all writes have been flushed to disk. */
4773         if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4774                 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4775                 if (err) {
4776                         printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4777                         err = 0;        /* Don't fail the copy. */
4778                 }
4779         }
4780
4781         if (!err)
4782                 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4783
4784         return err;
4785 }
4786
4787
4788 /* Structure to store state of reclaiming extents from a
4789  * given file.  hfs_reclaim_file()/hfs_reclaim_xattr()
4790  * initializes the values in this structure which are then
4791  * used by code that reclaims and splits the extents.
4792  */
4793 struct hfs_reclaim_extent_info {
4794         struct vnode *vp;
4795         u_int32_t fileID;
4796         u_int8_t forkType;
4797         u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
4798         u_int8_t is_sysfile;                 /* Extent belongs to system file */
4799         u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
4800         u_int8_t extent_index;
4801         int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
4802         u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
4803         u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
4804         u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
4805         struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
4806         union record {
4807                 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4808                 HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
4809         } record;
4810         HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
4811                                               * For catalog extent record, points to the correct
4812                                               * extent information in filefork.  For overflow extent
4813                                               * record, or xattr record, points to extent record
4814                                               * in the structure above
4815                                               */
4816         struct cat_desc *dirlink_desc;
4817         struct cat_attr *dirlink_attr;
4818         struct filefork *dirlink_fork;        /* For directory hard links, fp points actually to this */
4819         struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr()
4820                                                * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4821                                                * use it for writing updated extent record
4822                                                */
4823         struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
4824         u_int16_t recordlen;
4825         int overflow_count;                   /* For debugging, counter for overflow extent record */
4826         FCB *fcb;                             /* Pointer to the current btree being traversed */
4827 };
4828
4829 /*
4830  * Split the current extent into two extents, with first extent
4831  * to contain given number of allocation blocks.  Splitting of
4832  * extent creates one new extent entry which can result in
4833  * shifting of many entries through all the extent records of a
4834  * file, and/or creating a new extent record in the overflow
4835  * extent btree.
4836  *
4837  * Example:
4838  * The diagram below represents two consecutive extent records,
4839  * for simplicity, lets call them record X and X+1 respectively.
4840  * Interesting extent entries have been denoted by letters.
4841  * If the letter is unchanged before and after split, it means
4842  * that the extent entry was not modified during the split.
4843  * A '.' means that the entry remains unchanged after the split
4844  * and is not relevant for our example.  A '0' means that the
4845  * extent entry is empty.
4846  *
4847  * If there isn't sufficient contiguous free space to relocate
4848  * an extent (extent "C" below), we will have to break the one
4849  * extent into multiple smaller extents, and relocate each of
4850  * the smaller extents individually.  The way we do this is by
4851  * finding the largest contiguous free space that is currently
4852  * available (N allocation blocks), and then convert extent "C"
4853  * into two extents, C1 and C2, that occupy exactly the same
4854  * allocation blocks as extent C.  Extent C1 is the first
4855  * N allocation blocks of extent C, and extent C2 is the remainder
4856  * of extent C.  Then we can relocate extent C1 since we know
4857  * we have enough contiguous free space to relocate it in its
4858  * entirety.  We then repeat the process starting with extent C2.
4859  *
4860  * In record X, only the entries following entry C are shifted, and
4861  * the original entry C is replaced with two entries C1 and C2 which
4862  * are actually two extent entries for contiguous allocation blocks.
4863  *
4864  * Note that the entry E from record X is shifted into record X+1 as
4865  * the new first entry.  Since the first entry of record X+1 is updated,
4866  * the FABN will also get updated with the blockCount of entry E.
4867  * This also results in shifting of all extent entries in record X+1.
4868  * Note that the number of empty entries after the split has been
4869  * changed from 3 to 2.
4870  *
4871  * Before:
4872  *               record X                           record X+1
4873  *  ---------------------===---------     ---------------------------------
4874  *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
4875  *  ---------------------===---------     ---------------------------------
4876  *
4877  * After:
4878  *  ---------------------=======-----     ---------------------------------
4879  *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
4880  *  ---------------------=======-----     ---------------------------------
4881  *
4882  *  C1.startBlock = C.startBlock
4883  *  C1.blockCount = N
4884  *
4885  *  C2.startBlock = C.startBlock + N
4886  *  C2.blockCount = C.blockCount - N
4887  *
4888  *                                        FABN = old FABN - E.blockCount
4889  *
4890  * Inputs:
4891  *      extent_info - This is the structure that contains state about
4892  *                    the current file, extent, and extent record that
4893  *                    is being relocated.  This structure is shared
4894  *                    among code that traverses through all the extents
4895  *                    of the file, code that relocates extents, and
4896  *                    code that splits the extent.
4897  * Output:
4898  *      Zero on success, non-zero on failure.
4899  */
4900 static int
4901 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4902 {
4903         int error = 0;
4904         int index = extent_info->extent_index;
4905         int i;
4906         HFSPlusExtentDescriptor shift_extent;
4907         HFSPlusExtentDescriptor last_extent;
4908         HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4909         HFSPlusExtentRecord *extents_rec = NULL;
4910         HFSPlusExtentKey *extents_key = NULL;
4911         HFSPlusAttrRecord *xattr_rec = NULL;
4912         HFSPlusAttrKey *xattr_key = NULL;
4913         struct BTreeIterator iterator;
4914         struct FSBufferDescriptor btdata;
4915         uint16_t reclen;
4916         uint32_t read_recStartBlock;    /* Starting allocation block number to read old extent record */
4917         uint32_t write_recStartBlock;   /* Starting allocation block number to insert newly updated extent record */
4918         Boolean create_record = false;
4919         Boolean is_xattr;
4920
4921         is_xattr = extent_info->is_xattr;
4922         extents = extent_info->extents;
4923
4924         if (hfs_resize_debug) {
4925                 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4926         }
4927
4928         /* Determine the starting allocation block number for the following
4929          * overflow extent record, if any, before the current record
4930          * gets modified.
4931          */
4932         read_recStartBlock = extent_info->recStartBlock;
4933         for (i = 0; i < kHFSPlusExtentDensity; i++) {
4934                 if (extents[i].blockCount == 0) {
4935                         break;
4936                 }
4937                 read_recStartBlock += extents[i].blockCount;
4938         }
4939
4940         /* Shift and split */
4941         if (index == kHFSPlusExtentDensity-1) {
4942                 /* The new extent created after split will go into following overflow extent record */
4943                 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4944                 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4945
4946                 /* Last extent in the record will be split, so nothing to shift */
4947         } else {
4948                 /* Splitting of extents can result in at most of one
4949                  * extent entry to be shifted into following overflow extent
4950                  * record.  So, store the last extent entry for later.
4951                  */
4952                 shift_extent = extents[kHFSPlusExtentDensity-1];
4953
4954                 /* Start shifting extent information from the end of the extent
4955                  * record to the index where we want to insert the new extent.
4956                  * Note that kHFSPlusExtentDensity-1 is already saved above, and
4957                  * does not need to be shifted.  The extent entry that is being
4958                  * split does not get shifted.
4959                  */
4960                 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4961                         if (hfs_resize_debug) {
4962                                 if (extents[i].blockCount) {
4963                                         printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4964                                 }
4965                         }
4966                         extents[i+1] = extents[i];
4967                 }
4968         }
4969
4970         if (index == kHFSPlusExtentDensity-1) {
4971                 /* The second half of the extent being split will be the overflow
4972                  * entry that will go into following overflow extent record.  The
4973                  * value has been stored in 'shift_extent' above, so there is
4974                  * nothing to be done here.
4975                  */
4976         } else {
4977                 /* Update the values in the second half of the extent being split
4978                  * before updating the first half of the split.  Note that the
4979                  * extent to split or first half of the split is at index 'index'
4980                  * and a new extent or second half of the split will be inserted at
4981                  * 'index+1' or into following overflow extent record.
4982                  */
4983                 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
4984                 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
4985         }
4986         /* Update the extent being split, only the block count will change */
4987         extents[index].blockCount = newBlockCount;
4988
4989         if (hfs_resize_debug) {
4990                 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
4991                 if (index != kHFSPlusExtentDensity-1) {
4992                         printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
4993                 } else {
4994                         printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
4995                 }
4996         }
4997
4998         /* If the newly split extent is for large EAs or in overflow extent
4999          * record, so update it directly in the btree using the iterator
5000          * information from the shared extent_info structure
5001          */
5002         if (extent_info->catalog_fp == NULL) {
5003                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5004                                 &(extent_info->btdata), extent_info->recordlen);
5005                 if (error) {
5006                         printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5007                         goto out;
5008                 }
5009         }
5010
5011         /* No extent entry to be shifted into another extent overflow record */
5012         if (shift_extent.blockCount == 0) {
5013                 if (hfs_resize_debug) {
5014                         printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5015                 }
5016                 error = 0;
5017                 goto out;
5018         }
5019
5020         /* The overflow extent entry has to be shifted into an extent
5021          * overflow record.  This would mean that we have to shift
5022          * extent entries from all overflow records by one.  We will
5023          * start iteration from the first record to the last record,
5024          * and shift the extent entry from one record to another.
5025          * We might have to create a new record for the last extent
5026          * entry for the file.
5027          */
5028
5029         /* Initialize iterator to search the next record */
5030         bzero(&iterator, sizeof(iterator));
5031         if (is_xattr) {
5032                 /* Copy the key from the iterator that was to update the modified attribute record. */
5033                 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5034                 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5035                 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5036
5037                 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5038                                 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5039                 if (xattr_rec == NULL) {
5040                         error = ENOMEM;
5041                         goto out;
5042                 }
5043                 btdata.bufferAddress = xattr_rec;
5044                 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5045                 btdata.itemCount = 1;
5046                 extents = xattr_rec->overflowExtents.extents;
5047         } else {
5048                 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5049                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5050                 extents_key->forkType = extent_info->forkType;
5051                 extents_key->fileID = extent_info->fileID;
5052                 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5053
5054                 MALLOC(extents_rec, HFSPlusExtentRecord *,
5055                                 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5056                 if (extents_rec == NULL) {
5057                         error = ENOMEM;
5058                         goto out;
5059                 }
5060                 btdata.bufferAddress = extents_rec;
5061                 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5062                 btdata.itemCount = 1;
5063                 extents = extents_rec[0];
5064         }
5065
5066         /* An extent entry still needs to be shifted into following overflow
5067          * extent record.  This will result in the starting allocation block
5068          * number of the extent record being changed which is part of the key
5069          * for the extent record.  Since the extent record key is changing,
5070          * the record can not be updated, instead has to be deleted and
5071          * inserted again.
5072          */
5073         while (shift_extent.blockCount) {
5074                 if (hfs_resize_debug) {
5075                         printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5076                 }
5077
5078                 /* Search if there is any existing overflow extent record.
5079                  * For this, the logical start block number in the key is
5080                  * the value calculated based on the logical start block
5081                  * number of the current extent record and the total number
5082                  * of blocks existing in the current extent record.
5083                  */
5084                 if (is_xattr) {
5085                         xattr_key->startBlock = read_recStartBlock;
5086                 } else {
5087                         extents_key->startBlock = read_recStartBlock;
5088                 }
5089                 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5090                 if (error) {
5091                         if (error != btNotFound) {
5092                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5093                                 goto out;
5094                         }
5095                         create_record = true;
5096                 }
5097
5098                 /* The extra extent entry from the previous record is being inserted
5099                  * as the first entry in the current extent record.  This will change
5100                  * the file allocation block number (FABN) of the current extent
5101                  * record, which is the startBlock value from the extent record key.
5102                  * Since one extra entry is being inserted in the record, the new
5103                  * FABN for the record will less than old FABN by the number of blocks
5104                  * in the new extent entry being inserted at the start.  We have to
5105                  * do this before we update read_recStartBlock to point at the
5106                  * startBlock of the following record.
5107                  */
5108                 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5109                 if (hfs_resize_debug) {
5110                         if (create_record) {
5111                                 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5112                         }
5113                 }
5114
5115                 /* Now update the read_recStartBlock to account for total number
5116                  * of blocks in this extent record.  It will now point to the
5117                  * starting allocation block number for the next extent record.
5118                  */
5119                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5120                         if (extents[i].blockCount == 0) {
5121                                 break;
5122                         }
5123                         read_recStartBlock += extents[i].blockCount;
5124                 }
5125
5126                 if (create_record == true) {
5127                         /* Initialize new record content with only one extent entry */
5128                         bzero(extents, sizeof(HFSPlusExtentRecord));
5129                         /* The new record will contain only one extent entry */
5130                         extents[0] = shift_extent;
5131                         /* There are no more overflow extents to be shifted */
5132                         shift_extent.startBlock = shift_extent.blockCount = 0;
5133
5134                         if (is_xattr) {
5135                                 xattr_rec->recordType = kHFSPlusAttrExtents;
5136                                 xattr_rec->overflowExtents.reserved = 0;
5137                                 reclen = sizeof(HFSPlusAttrExtents);
5138                         } else {
5139                                 reclen = sizeof(HFSPlusExtentRecord);
5140                         }
5141                 } else {
5142                         /* The overflow extent entry from previous record will be
5143                          * the first entry in this extent record.  If the last
5144                          * extent entry in this record is valid, it will be shifted
5145                          * into the following extent record as its first entry.  So
5146                          * save the last entry before shifting entries in current
5147                          * record.
5148                          */
5149                         last_extent = extents[kHFSPlusExtentDensity-1];
5150
5151                         /* Shift all entries by one index towards the end */
5152                         for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5153                                 extents[i+1] = extents[i];
5154                         }
5155
5156                         /* Overflow extent entry saved from previous record
5157                          * is now the first entry in the current record.
5158                          */
5159                         extents[0] = shift_extent;
5160
5161                         if (hfs_resize_debug) {
5162                                 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5163                         }
5164
5165                         /* The last entry from current record will be the
5166                          * overflow entry which will be the first entry for
5167                          * the following extent record.
5168                          */
5169                         shift_extent = last_extent;
5170
5171                         /* Since the key->startBlock is being changed for this record,
5172                          * it should be deleted and inserted with the new key.
5173                          */
5174                         error = BTDeleteRecord(extent_info->fcb, &iterator);
5175                         if (error) {
5176                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5177                                 goto out;
5178                         }
5179                         if (hfs_resize_debug) {
5180                                 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5181                         }
5182                 }
5183
5184                 /* Insert the newly created or modified extent record */
5185                 bzero(&iterator.hint, sizeof(iterator.hint));
5186                 if (is_xattr) {
5187                         xattr_key->startBlock = write_recStartBlock;
5188                 } else {
5189                         extents_key->startBlock = write_recStartBlock;
5190                 }
5191                 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5192                 if (error) {
5193                         printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5194                         goto out;
5195                 }
5196                 if (hfs_resize_debug) {
5197                         printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5198                 }
5199         }
5200         BTFlushPath(extent_info->fcb);
5201 out:
5202         if (extents_rec) {
5203                 FREE (extents_rec, M_TEMP);
5204         }
5205         if (xattr_rec) {
5206                 FREE (xattr_rec, M_TEMP);
5207         }
5208         return error;
5209 }
5210
5211
5212 /*
5213  * Relocate an extent if it lies beyond the expected end of volume.
5214  *
5215  * This function is called for every extent of the file being relocated.
5216  * It allocates space for relocation, copies the data, deallocates
5217  * the old extent, and update corresponding on-disk extent.  If the function
5218  * does not find contiguous space to  relocate an extent, it splits the
5219  * extent in smaller size to be able to relocate it out of the area of
5220  * disk being reclaimed.  As an optimization, if an extent lies partially
5221  * in the area of the disk being reclaimed, it is split so that we only
5222  * have to relocate the area that was overlapping with the area of disk
5223  * being reclaimed.
5224  *
5225  * Note that every extent is relocated in its own transaction so that
5226  * they do not overwhelm the journal.  This function handles the extent
5227  * record that exists in the catalog record, extent record from overflow
5228  * extents btree, and extents for large EAs.
5229  *
5230  * Inputs:
5231  *      extent_info - This is the structure that contains state about
5232  *                    the current file, extent, and extent record that
5233  *                    is being relocated.  This structure is shared
5234  *                    among code that traverses through all the extents
5235  *                    of the file, code that relocates extents, and
5236  *                    code that splits the extent.
5237  */
5238 static int
5239 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5240 {
5241         int error = 0;
5242         int index;
5243         struct cnode *cp;
5244         u_int32_t oldStartBlock;
5245         u_int32_t oldBlockCount;
5246         u_int32_t newStartBlock;
5247         u_int32_t newBlockCount;
5248         u_int32_t alloc_flags;
5249         int blocks_allocated = false;
5250
5251         index = extent_info->extent_index;
5252         cp = VTOC(extent_info->vp);
5253
5254         oldStartBlock = extent_info->extents[index].startBlock;
5255         oldBlockCount = extent_info->extents[index].blockCount;
5256
5257         if (0 && hfs_resize_debug) {
5258                 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5259         }
5260
5261         /* Check if the current extent lies completely within allocLimit */
5262         if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5263                 extent_info->cur_blockCount += oldBlockCount;
5264                 return error;
5265         }
5266
5267         /* Every extent should be relocated in its own transaction
5268          * to make sure that we don't overflow the journal buffer.
5269          */
5270         error = hfs_start_transaction(hfsmp);
5271         if (error) {
5272                 return error;
5273         }
5274         extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5275
5276         /* Check if the extent lies partially in the area to reclaim,
5277          * i.e. it starts before allocLimit and ends beyond allocLimit.
5278          * We have already skipped extents that lie completely within
5279          * allocLimit in the check above, so we only check for the
5280          * startBlock.  If it lies partially, split it so that we
5281          * only relocate part of the extent.
5282          */
5283         if (oldStartBlock < allocLimit) {
5284                 newBlockCount = allocLimit - oldStartBlock;
5285                 error = hfs_split_extent(extent_info, newBlockCount);
5286                 if (error == 0) {
5287                         /* After successful split, the current extent does not
5288                          * need relocation, so just return back.
5289                          */
5290                         goto out;
5291                 }
5292                 /* Ignore error and try relocating the entire extent instead */
5293         }
5294
5295         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5296         if (extent_info->is_sysfile) {
5297                 alloc_flags |= HFS_ALLOC_METAZONE;
5298         }
5299
5300         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5301                         &newStartBlock, &newBlockCount);
5302         if ((extent_info->is_sysfile == false) &&
5303             ((error == dskFulErr) || (error == ENOSPC))) {
5304                 /* For non-system files, try reallocating space in metadata zone */
5305                 alloc_flags |= HFS_ALLOC_METAZONE;
5306                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5307                                 alloc_flags, &newStartBlock, &newBlockCount);
5308         }
5309         if ((error == dskFulErr) || (error == ENOSPC)) {
5310                 /* We did not find desired contiguous space for this extent.
5311                  * So try to allocate the maximum contiguous space available.
5312                  */
5313                 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5314
5315                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5316                                 alloc_flags, &newStartBlock, &newBlockCount);
5317                 if (error) {
5318                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5319                         goto out;
5320                 }
5321                 blocks_allocated = true;
5322
5323                 error = hfs_split_extent(extent_info, newBlockCount);
5324                 if (error) {
5325                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5326                         goto out;
5327                 }
5328                 oldBlockCount = newBlockCount;
5329         }
5330         if (error) {
5331                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5332                 goto out;
5333         }
5334         blocks_allocated = true;
5335
5336         /* Copy data from old location to new location */
5337         error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5338                         newStartBlock, newBlockCount, context);
5339         if (error) {
5340                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5341                 goto out;
5342         }
5343
5344         /* Update the extent record with the new start block information */
5345         extent_info->extents[index].startBlock = newStartBlock;
5346
5347         /* Sync the content back to the disk */
5348         if (extent_info->catalog_fp) {
5349                 /* Update the extents in catalog record */
5350                 if (extent_info->is_dirlink) {
5351                         error = cat_update_dirlink(hfsmp, extent_info->forkType,
5352                                         extent_info->dirlink_desc, extent_info->dirlink_attr,
5353                                         &(extent_info->dirlink_fork->ff_data));
5354                 } else {
5355                         cp->c_flag |= C_MODIFIED;
5356                         /* If this is a system file, sync volume headers on disk */
5357                         if (extent_info->is_sysfile) {
5358                                 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5359                         }
5360                 }
5361         } else {
5362                 /* Replace record for extents overflow or extents-based xattrs */
5363                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5364                                 &(extent_info->btdata), extent_info->recordlen);
5365         }
5366         if (error) {
5367                 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5368                 goto out;
5369         }
5370
5371         /* Deallocate the old extent */
5372         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5373         if (error) {
5374                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5375                 goto out;
5376         }
5377         extent_info->blocks_relocated += newBlockCount;
5378
5379         if (hfs_resize_debug) {
5380                 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5381         }
5382
5383 out:
5384         if (error != 0) {
5385                 if (blocks_allocated == true) {
5386                         BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5387                 }
5388         } else {
5389                 /* On success, increment the total allocation blocks processed */
5390                 extent_info->cur_blockCount += newBlockCount;
5391         }
5392
5393         hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5394
5395         /* For a non-system file, if an extent entry from catalog record
5396          * was modified, sync the in-memory changes to the catalog record
5397          * on disk before ending the transaction.
5398          */
5399         if ((error == 0) &&
5400             (extent_info->overflow_count < kHFSPlusExtentDensity) &&
5401             (extent_info->is_sysfile == false)) {
5402                 (void) hfs_update(extent_info->vp, MNT_WAIT);
5403         }
5404
5405         hfs_end_transaction(hfsmp);
5406
5407         return error;
5408 }
5409
5410 /* Report intermediate progress during volume resize */
5411 static void
5412 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5413 {
5414         u_int32_t cur_progress;
5415
5416         hfs_resize_progress(hfsmp, &cur_progress);
5417         if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5418                 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5419                 hfsmp->hfs_resize_progress = cur_progress;
5420         }
5421         return;
5422 }
5423
5424 /*
5425  * Reclaim space at the end of a volume for given file and forktype.
5426  *
5427  * This routine attempts to move any extent which contains allocation blocks
5428  * at or after "allocLimit."  A separate transaction is used for every extent
5429  * that needs to be moved.  If there is not contiguous space available for
5430  * moving an extent, it can be split into smaller extents.  The contents of
5431  * any moved extents are read and written via the volume's device vnode --
5432  * NOT via "vp."  During the move, moved blocks which are part of a transaction
5433  * have their physical block numbers invalidated so they will eventually be
5434  * written to their new locations.
5435  *
5436  * This function is also called for directory hard links.  Directory hard links
5437  * are regular files with no data fork and resource fork that contains alias
5438  * information for backward compatibility with pre-Leopard systems.  However
5439  * non-Mac OS X implementation can add/modify data fork or resource fork
5440  * information to directory hard links, so we check, and if required, relocate
5441  * both data fork and resource fork.
5442  *
5443  * Inputs:
5444  *    hfsmp       The volume being resized.
5445  *    vp          The vnode for the system file.
5446  *    fileID      ID of the catalog record that needs to be relocated
5447  *    forktype    The type of fork that needs relocated,
5448  *                      kHFSResourceForkType for resource fork,
5449  *                      kHFSDataForkType for data fork
5450  *    allocLimit  Allocation limit for the new volume size,
5451  *                do not use this block or beyond.  All extents
5452  *                that use this block or any blocks beyond this limit
5453  *                will be relocated.
5454  *
5455  * Side Effects:
5456  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5457  * blocks that were relocated.
5458  */
5459 static int
5460 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5461                 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5462 {
5463         int error = 0;
5464         struct hfs_reclaim_extent_info *extent_info;
5465         int i;
5466         int lockflags = 0;
5467         struct cnode *cp;
5468         struct filefork *fp;
5469         int took_truncate_lock = false;
5470         int release_desc = false;
5471         HFSPlusExtentKey *key;
5472
5473         /* If there is no vnode for this file, then there's nothing to do. */
5474         if (vp == NULL) {
5475                 return 0;
5476         }
5477
5478         cp = VTOC(vp);
5479
5480         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5481                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5482         if (extent_info == NULL) {
5483                 return ENOMEM;
5484         }
5485         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5486         extent_info->vp = vp;
5487         extent_info->fileID = fileID;
5488         extent_info->forkType = forktype;
5489         extent_info->is_sysfile = vnode_issystem(vp);
5490         if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5491                 extent_info->is_dirlink = true;
5492         }
5493         /* We always need allocation bitmap and extent btree lock */
5494         lockflags = SFL_BITMAP | SFL_EXTENTS;
5495         if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5496                 lockflags |= SFL_CATALOG;
5497         } else if (fileID == kHFSAttributesFileID) {
5498                 lockflags |= SFL_ATTRIBUTE;
5499         } else if (fileID == kHFSStartupFileID) {
5500                 lockflags |= SFL_STARTUP;
5501         }
5502         extent_info->lockflags = lockflags;
5503         extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5504
5505         /* Flush data associated with current file on disk.
5506          *
5507          * If the current vnode is directory hard link, no flushing of
5508          * journal or vnode is required.  The current kernel does not
5509          * modify data/resource fork of directory hard links, so nothing
5510          * will be in the cache.  If a directory hard link is newly created,
5511          * the resource fork data is written directly using devvp and
5512          * the code that actually relocates data (hfs_copy_extent()) also
5513          * uses devvp for its I/O --- so they will see a consistent copy.
5514          */
5515         if (extent_info->is_sysfile) {
5516                 /* If the current vnode is system vnode, flush journal
5517                  * to make sure that all data is written to the disk.
5518                  */
5519                 error = hfs_journal_flush(hfsmp, TRUE);
5520                 if (error) {
5521                         printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5522                         goto out;
5523                 }
5524         } else if (extent_info->is_dirlink == false) {
5525                 /* Flush all blocks associated with this regular file vnode.
5526                  * Normally there should not be buffer cache blocks for regular
5527                  * files, but for objects like symlinks, we can have buffer cache
5528                  * blocks associated with the vnode.  Therefore we call
5529                  * buf_flushdirtyblks() also.
5530                  */
5531                 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5532
5533                 hfs_unlock(cp);
5534                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5535                 took_truncate_lock = true;
5536                 (void) cluster_push(vp, 0);
5537                 error = hfs_lock(cp, HFS_FORCE_LOCK);
5538                 if (error) {
5539                         goto out;
5540                 }
5541
5542                 /* If the file no longer exists, nothing left to do */
5543                 if (cp->c_flag & C_NOEXISTS) {
5544                         error = 0;
5545                         goto out;
5546                 }
5547
5548                 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5549                  * be copying consistent bits.  (Otherwise, it's possible that an async
5550                  * write will complete to the old extent after we read from it.  That
5551                  * could lead to corruption.)
5552                  */
5553                 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5554                 if (error) {
5555                         goto out;
5556                 }
5557         }
5558
5559         if (hfs_resize_debug) {
5560                 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5561         }
5562
5563         if (extent_info->is_dirlink) {
5564                 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5565                                 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5566                 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5567                                 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5568                 MALLOC(extent_info->dirlink_fork, struct filefork *,
5569                                 sizeof(struct filefork), M_TEMP, M_WAITOK);
5570                 if ((extent_info->dirlink_desc == NULL) ||
5571                     (extent_info->dirlink_attr == NULL) ||
5572                     (extent_info->dirlink_fork == NULL)) {
5573                         error = ENOMEM;
5574                         goto out;
5575                 }
5576
5577                 /* Lookup catalog record for directory hard link and
5578                  * create a fake filefork for the value looked up from
5579                  * the disk.
5580                  */
5581                 fp = extent_info->dirlink_fork;
5582                 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5583                 extent_info->dirlink_fork->ff_cp = cp;
5584                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5585                 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5586                                 extent_info->dirlink_desc, extent_info->dirlink_attr,
5587                                 &(extent_info->dirlink_fork->ff_data));
5588                 hfs_systemfile_unlock(hfsmp, lockflags);
5589                 if (error) {
5590                         printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5591                         goto out;
5592                 }
5593                 release_desc = true;
5594         } else {
5595                 fp = VTOF(vp);
5596         }
5597
5598         extent_info->catalog_fp = fp;
5599         extent_info->recStartBlock = 0;
5600         extent_info->extents = extent_info->catalog_fp->ff_extents;
5601         /* Relocate extents from the catalog record */
5602         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5603                 if (fp->ff_extents[i].blockCount == 0) {
5604                         break;
5605                 }
5606                 extent_info->extent_index = i;
5607                 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5608                 if (error) {
5609                         printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5610                         goto out;
5611                 }
5612         }
5613
5614         /* If the number of allocation blocks processed for reclaiming
5615          * are less than total number of blocks for the file, continuing
5616          * working on overflow extents record.
5617          */
5618         if (fp->ff_blocks <= extent_info->cur_blockCount) {
5619                 if (0 && hfs_resize_debug) {
5620                         printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5621                 }
5622                 goto out;
5623         }
5624
5625         if (hfs_resize_debug) {
5626                 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5627         }
5628
5629         MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5630         if (extent_info->iterator == NULL) {
5631                 error = ENOMEM;
5632                 goto out;
5633         }
5634         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5635         key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5636         key->keyLength = kHFSPlusExtentKeyMaximumLength;
5637         key->forkType = forktype;
5638         key->fileID = fileID;
5639         key->startBlock = extent_info->cur_blockCount;
5640
5641         extent_info->btdata.bufferAddress = extent_info->record.overflow;
5642         extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5643         extent_info->btdata.itemCount = 1;
5644
5645         extent_info->catalog_fp = NULL;
5646
5647         /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5648         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5649         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5650                         &(extent_info->btdata), &(extent_info->recordlen),
5651                         extent_info->iterator);
5652         hfs_systemfile_unlock(hfsmp, lockflags);
5653         while (error == 0) {
5654                 extent_info->overflow_count++;
5655                 extent_info->recStartBlock = key->startBlock;
5656                 extent_info->extents = extent_info->record.overflow;
5657                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5658                         if (extent_info->record.overflow[i].blockCount == 0) {
5659                                 goto out;
5660                         }
5661                         extent_info->extent_index = i;
5662                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5663                         if (error) {
5664                                 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5665                                 goto out;
5666                         }
5667                 }
5668
5669                 /* Look for more overflow records */
5670                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5671                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5672                                 extent_info->iterator, &(extent_info->btdata),
5673                                 &(extent_info->recordlen));
5674                 hfs_systemfile_unlock(hfsmp, lockflags);
5675                 if (error) {
5676                         break;
5677                 }
5678                 /* Stop when we encounter a different file or fork. */
5679                 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5680                         break;
5681                 }
5682         }
5683         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5684                 error = 0;
5685         }
5686
5687 out:
5688         /* If any blocks were relocated, account them and report progress */
5689         if (extent_info->blocks_relocated) {
5690                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5691                 hfs_truncatefs_progress(hfsmp);
5692                 if (fileID < kHFSFirstUserCatalogNodeID) {
5693                         printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5694                                         extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5695                 }
5696         }
5697         if (extent_info->iterator) {
5698                 FREE(extent_info->iterator, M_TEMP);
5699         }
5700         if (release_desc == true) {
5701                 cat_releasedesc(extent_info->dirlink_desc);
5702         }
5703         if (extent_info->dirlink_desc) {
5704                 FREE(extent_info->dirlink_desc, M_TEMP);
5705         }
5706         if (extent_info->dirlink_attr) {
5707                 FREE(extent_info->dirlink_attr, M_TEMP);
5708         }
5709         if (extent_info->dirlink_fork) {
5710                 FREE(extent_info->dirlink_fork, M_TEMP);
5711         }
5712         if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5713                 (void) hfs_update(vp, MNT_WAIT);
5714         }
5715         if (took_truncate_lock) {
5716                 hfs_unlock_truncate(cp, 0);
5717         }
5718         if (extent_info) {
5719                 FREE(extent_info, M_TEMP);
5720         }
5721         if (hfs_resize_debug) {
5722                 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5723         }
5724
5725         return error;
5726 }
5727
5728
5729 /*
5730  * This journal_relocate callback updates the journal info block to point
5731  * at the new journal location.  This write must NOT be done using the
5732  * transaction.  We must write the block immediately.  We must also force
5733  * it to get to the media so that the new journal location will be seen by
5734  * the replay code before we can safely let journaled blocks be written
5735  * to their normal locations.
5736  *
5737  * The tests for journal_uses_fua below are mildly hacky.  Since the journal
5738  * and the file system are both on the same device, I'm leveraging what
5739  * the journal has decided about FUA.
5740  */
5741 struct hfs_journal_relocate_args {
5742         struct hfsmount *hfsmp;
5743         vfs_context_t context;
5744         u_int32_t newStartBlock;
5745 };
5746
5747 static errno_t
5748 hfs_journal_relocate_callback(void *_args)
5749 {
5750         int error;
5751         struct hfs_journal_relocate_args *args = _args;
5752         struct hfsmount *hfsmp = args->hfsmp;
5753         buf_t bp;
5754         JournalInfoBlock *jibp;
5755
5756         error = buf_meta_bread(hfsmp->hfs_devvp,
5757                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5758                 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5759         if (error) {
5760                 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5761                 if (bp) {
5762                         buf_brelse(bp);
5763                 }
5764                 return error;
5765         }
5766         jibp = (JournalInfoBlock*) buf_dataptr(bp);
5767         jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5768         jibp->size = SWAP_BE64(hfsmp->jnl_size);
5769         if (journal_uses_fua(hfsmp->jnl))
5770                 buf_markfua(bp);
5771         error = buf_bwrite(bp);
5772         if (error) {
5773                 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5774                 return error;
5775         }
5776         if (!journal_uses_fua(hfsmp->jnl)) {
5777                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5778                 if (error) {
5779                         printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5780                         error = 0;              /* Don't fail the operation. */
5781                 }
5782         }
5783
5784         return error;
5785 }
5786
5787
5788 static int
5789 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5790 {
5791         int error;
5792         int journal_err;
5793         int lockflags;
5794         u_int32_t oldStartBlock;
5795         u_int32_t newStartBlock;
5796         u_int32_t oldBlockCount;
5797         u_int32_t newBlockCount;
5798         struct cat_desc journal_desc;
5799         struct cat_attr journal_attr;
5800         struct cat_fork journal_fork;
5801         struct hfs_journal_relocate_args callback_args;
5802
5803         if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5804                 /* The journal does not require relocation */
5805                 return 0;
5806         }
5807
5808         error = hfs_start_transaction(hfsmp);
5809         if (error) {
5810                 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5811                 return error;
5812         }
5813         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5814
5815         oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5816
5817         /* TODO: Allow the journal to change size based on the new volume size. */
5818         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5819                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5820                          &newStartBlock, &newBlockCount);
5821         if (error) {
5822                 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5823                 goto fail;
5824         }
5825         if (newBlockCount != oldBlockCount) {
5826                 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5827                 goto free_fail;
5828         }
5829
5830         error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5831         if (error) {
5832                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5833                 goto free_fail;
5834         }
5835
5836         /* Update the catalog record for .journal */
5837         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5838         if (error) {
5839                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5840                 goto free_fail;
5841         }
5842         oldStartBlock = journal_fork.cf_extents[0].startBlock;
5843         journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5844         journal_fork.cf_extents[0].startBlock = newStartBlock;
5845         journal_fork.cf_extents[0].blockCount = newBlockCount;
5846         journal_fork.cf_blocks = newBlockCount;
5847         error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
5848         cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
5849         if (error) {
5850                 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
5851                 goto free_fail;
5852         }
5853         callback_args.hfsmp = hfsmp;
5854         callback_args.context = context;
5855         callback_args.newStartBlock = newStartBlock;
5856
5857         error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
5858                 (off_t)newBlockCount*hfsmp->blockSize, 0,
5859                 hfs_journal_relocate_callback, &callback_args);
5860         if (error) {
5861                 /* NOTE: journal_relocate will mark the journal invalid. */
5862                 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
5863                 goto fail;
5864         }
5865         hfsmp->jnl_start = newStartBlock;
5866         hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
5867
5868         hfs_systemfile_unlock(hfsmp, lockflags);
5869         error = hfs_end_transaction(hfsmp);
5870         if (error) {
5871                 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
5872         }
5873
5874         /* Account for the blocks relocated and print progress */
5875         hfsmp->hfs_resize_blocksmoved += oldBlockCount;
5876         hfs_truncatefs_progress(hfsmp);
5877         if (!error) {
5878                 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
5879                                 oldBlockCount, hfsmp->vcbVN);
5880                 if (hfs_resize_debug) {
5881                         printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5882                 }
5883         }
5884         return error;
5885
5886 free_fail:
5887         journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5888         if (journal_err) {
5889                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5890                 hfs_mark_volume_inconsistent(hfsmp);
5891         }
5892 fail:
5893         hfs_systemfile_unlock(hfsmp, lockflags);
5894         (void) hfs_end_transaction(hfsmp);
5895         if (hfs_resize_debug) {
5896                 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
5897         }
5898         return error;
5899 }
5900
5901
5902 /*
5903  * Move the journal info block to a new location.  We have to make sure the
5904  * new copy of the journal info block gets to the media first, then change
5905  * the field in the volume header and the catalog record.
5906  */
5907 static int
5908 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5909 {
5910         int error;
5911         int journal_err;
5912         int lockflags;
5913         u_int32_t oldBlock;
5914         u_int32_t newBlock;
5915         u_int32_t blockCount;
5916         struct cat_desc jib_desc;
5917         struct cat_attr jib_attr;
5918         struct cat_fork jib_fork;
5919         buf_t old_bp, new_bp;
5920
5921         if (hfsmp->vcbJinfoBlock <= allocLimit) {
5922                 /* The journal info block does not require relocation */
5923                 return 0;
5924         }
5925
5926         error = hfs_start_transaction(hfsmp);
5927         if (error) {
5928                 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
5929                 return error;
5930         }
5931         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5932
5933         error = BlockAllocate(hfsmp, 1, 1, 1,
5934                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5935                         &newBlock, &blockCount);
5936         if (error) {
5937                 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
5938                 goto fail;
5939         }
5940         if (blockCount != 1) {
5941                 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
5942                 goto free_fail;
5943         }
5944         error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
5945         if (error) {
5946                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
5947                 goto free_fail;
5948         }
5949
5950         /* Copy the old journal info block content to the new location */
5951         error = buf_meta_bread(hfsmp->hfs_devvp,
5952                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5953                 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
5954         if (error) {
5955                 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
5956                 if (old_bp) {
5957                         buf_brelse(old_bp);
5958                 }
5959                 goto free_fail;
5960         }
5961         new_bp = buf_getblk(hfsmp->hfs_devvp,
5962                 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5963                 hfsmp->blockSize, 0, 0, BLK_META);
5964         bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
5965         buf_brelse(old_bp);
5966         if (journal_uses_fua(hfsmp->jnl))
5967                 buf_markfua(new_bp);
5968         error = buf_bwrite(new_bp);
5969         if (error) {
5970                 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
5971                 goto free_fail;
5972         }
5973         if (!journal_uses_fua(hfsmp->jnl)) {
5974                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5975                 if (error) {
5976                         printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5977                         /* Don't fail the operation. */
5978                 }
5979         }
5980
5981         /* Update the catalog record for .journal_info_block */
5982         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
5983         if (error) {
5984                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5985                 goto fail;
5986         }
5987         oldBlock = jib_fork.cf_extents[0].startBlock;
5988         jib_fork.cf_size = hfsmp->blockSize;
5989         jib_fork.cf_extents[0].startBlock = newBlock;
5990         jib_fork.cf_extents[0].blockCount = 1;
5991         jib_fork.cf_blocks = 1;
5992         error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
5993         cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
5994         if (error) {
5995                 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
5996                 goto fail;
5997         }
5998
5999         /* Update the pointer to the journal info block in the volume header. */
6000         hfsmp->vcbJinfoBlock = newBlock;
6001         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6002         if (error) {
6003                 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6004                 goto fail;
6005         }
6006         hfs_systemfile_unlock(hfsmp, lockflags);
6007         error = hfs_end_transaction(hfsmp);
6008         if (error) {
6009                 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6010         }
6011         error = hfs_journal_flush(hfsmp, FALSE);
6012         if (error) {
6013                 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6014         }
6015
6016         /* Account for the block relocated and print progress */
6017         hfsmp->hfs_resize_blocksmoved += 1;
6018         hfs_truncatefs_progress(hfsmp);
6019         if (!error) {
6020                 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6021                                 hfsmp->vcbVN);
6022                 if (hfs_resize_debug) {
6023                         printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6024                 }
6025         }
6026         return error;
6027
6028 free_fail:
6029         journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6030         if (journal_err) {
6031                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6032                 hfs_mark_volume_inconsistent(hfsmp);
6033         }
6034
6035 fail:
6036         hfs_systemfile_unlock(hfsmp, lockflags);
6037         (void) hfs_end_transaction(hfsmp);
6038         if (hfs_resize_debug) {
6039                 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6040         }
6041         return error;
6042 }
6043
6044
6045 /*
6046  * This function traverses through all extended attribute records for a given
6047  * fileID, and calls function that reclaims data blocks that exist in the
6048  * area of the disk being reclaimed which in turn is responsible for allocating
6049  * new space, copying extent data, deallocating new space, and if required,
6050  * splitting the extent.
6051  *
6052  * Note: The caller has already acquired the cnode lock on the file.  Therefore
6053  * we are assured that no other thread would be creating/deleting/modifying
6054  * extended attributes for this file.
6055  *
6056  * Side Effects:
6057  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6058  * blocks that were relocated.
6059  *
6060  * Returns:
6061  *      0 on success, non-zero on failure.
6062  */
6063 static int
6064 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6065 {
6066         int error = 0;
6067         struct hfs_reclaim_extent_info *extent_info;
6068         int i;
6069         HFSPlusAttrKey *key;
6070         int *lockflags;
6071
6072         if (hfs_resize_debug) {
6073                 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6074         }
6075
6076         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6077                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6078         if (extent_info == NULL) {
6079                 return ENOMEM;
6080         }
6081         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6082         extent_info->vp = vp;
6083         extent_info->fileID = fileID;
6084         extent_info->is_xattr = true;
6085         extent_info->is_sysfile = vnode_issystem(vp);
6086         extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6087         lockflags = &(extent_info->lockflags);
6088         *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6089
6090         /* Initialize iterator from the extent_info structure */
6091         MALLOC(extent_info->iterator, struct BTreeIterator *,
6092                sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6093         if (extent_info->iterator == NULL) {
6094                 error = ENOMEM;
6095                 goto out;
6096         }
6097         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6098
6099         /* Build attribute key */
6100         key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6101         error = hfs_buildattrkey(fileID, NULL, key);
6102         if (error) {
6103                 goto out;
6104         }
6105
6106         /* Initialize btdata from extent_info structure.  Note that the
6107          * buffer pointer actually points to the xattr record from the
6108          * extent_info structure itself.
6109          */
6110         extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6111         extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6112         extent_info->btdata.itemCount = 1;
6113
6114         /*
6115          * Sync all extent-based attribute data to the disk.
6116          *
6117          * All extent-based attribute data I/O is performed via cluster
6118          * I/O using a virtual file that spans across entire file system
6119          * space.
6120          */
6121         hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6122         (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6123         error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6124         hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6125         if (error) {
6126                 goto out;
6127         }
6128
6129         /* Search for extended attribute for current file.  This
6130          * will place the iterator before the first matching record.
6131          */
6132         *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6133         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6134                         &(extent_info->btdata), &(extent_info->recordlen),
6135                         extent_info->iterator);
6136         hfs_systemfile_unlock(hfsmp, *lockflags);
6137         if (error) {
6138                 if (error != btNotFound) {
6139                         goto out;
6140                 }
6141                 /* btNotFound is expected here, so just mask it */
6142                 error = 0;
6143         }
6144
6145         while (1) {
6146                 /* Iterate to the next record */
6147                 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6148                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6149                                 extent_info->iterator, &(extent_info->btdata),
6150                                 &(extent_info->recordlen));
6151                 hfs_systemfile_unlock(hfsmp, *lockflags);
6152
6153                 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6154                 if (error || key->fileID != fileID) {
6155                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6156                                 error = 0;
6157                         }
6158                         break;
6159                 }
6160
6161                 /* We only care about extent-based EAs */
6162                 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6163                     (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6164                         continue;
6165                 }
6166
6167                 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6168                         extent_info->overflow_count = 0;
6169                         extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6170                 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6171                         extent_info->overflow_count++;
6172                         extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6173                 }
6174
6175                 extent_info->recStartBlock = key->startBlock;
6176                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6177                         if (extent_info->extents[i].blockCount == 0) {
6178                                 break;
6179                         }
6180                         extent_info->extent_index = i;
6181                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6182                         if (error) {
6183                                 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6184                                 goto out;
6185                         }
6186                 }
6187         }
6188
6189 out:
6190         /* If any blocks were relocated, account them and report progress */
6191         if (extent_info->blocks_relocated) {
6192                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6193                 hfs_truncatefs_progress(hfsmp);
6194         }
6195         if (extent_info->iterator) {
6196                 FREE(extent_info->iterator, M_TEMP);
6197         }
6198         if (extent_info) {
6199                 FREE(extent_info, M_TEMP);
6200         }
6201         if (hfs_resize_debug) {
6202                 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6203         }
6204         return error;
6205 }
6206
6207 /*
6208  * Reclaim any extent-based extended attributes allocation blocks from
6209  * the area of the disk that is being truncated.
6210  *
6211  * The function traverses the attribute btree to find out the fileIDs
6212  * of the extended attributes that need to be relocated.  For every
6213  * file whose large EA requires relocation, it looks up the cnode and
6214  * calls hfs_reclaim_xattr() to do all the work for allocating
6215  * new space, copying data, deallocating old space, and if required,
6216  * splitting the extents.
6217  *
6218  * Inputs:
6219  *      allocLimit    - starting block of the area being reclaimed
6220  *
6221  * Returns:
6222  *      returns 0 on success, non-zero on failure.
6223  */
6224 static int
6225 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6226 {
6227         int error = 0;
6228         FCB *fcb;
6229         struct BTreeIterator *iterator = NULL;
6230         struct FSBufferDescriptor btdata;
6231         HFSPlusAttrKey *key;
6232         HFSPlusAttrRecord rec;
6233         int lockflags = 0;
6234         cnid_t prev_fileid = 0;
6235         struct vnode *vp;
6236         int need_relocate;
6237         int btree_operation;
6238         u_int32_t files_moved = 0;
6239         u_int32_t prev_blocksmoved;
6240         int i;
6241
6242         fcb = VTOF(hfsmp->hfs_attribute_vp);
6243         /* Store the value to print total blocks moved by this function in end */
6244         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6245
6246         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6247                 return ENOMEM;
6248         }
6249         bzero(iterator, sizeof(*iterator));
6250         key = (HFSPlusAttrKey *)&iterator->key;
6251         btdata.bufferAddress = &rec;
6252         btdata.itemSize = sizeof(rec);
6253         btdata.itemCount = 1;
6254
6255         need_relocate = false;
6256         btree_operation = kBTreeFirstRecord;
6257         /* Traverse the attribute btree to find extent-based EAs to reclaim */
6258         while (1) {
6259                 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6260                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6261                 hfs_systemfile_unlock(hfsmp, lockflags);
6262                 if (error) {
6263                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6264                                 error = 0;
6265                         }
6266                         break;
6267                 }
6268                 btree_operation = kBTreeNextRecord;
6269
6270                 /* If the extents of current fileID were already relocated, skip it */
6271                 if (prev_fileid == key->fileID) {
6272                         continue;
6273                 }
6274
6275                 /* Check if any of the extents in the current record need to be relocated */
6276                 need_relocate = false;
6277                 switch(rec.recordType) {
6278                         case kHFSPlusAttrForkData:
6279                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6280                                         if (rec.forkData.theFork.extents[i].blockCount == 0) {
6281                                                 break;
6282                                         }
6283                                         if ((rec.forkData.theFork.extents[i].startBlock +
6284                                              rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6285                                                 need_relocate = true;
6286                                                 break;
6287                                         }
6288                                 }
6289                                 break;
6290
6291                         case kHFSPlusAttrExtents:
6292                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6293                                         if (rec.overflowExtents.extents[i].blockCount == 0) {
6294                                                 break;
6295                                         }
6296                                         if ((rec.overflowExtents.extents[i].startBlock +
6297                                              rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6298                                                 need_relocate = true;
6299                                                 break;
6300                                         }
6301                                 }
6302                                 break;
6303                 };
6304
6305                 /* Continue iterating to next attribute record */
6306                 if (need_relocate == false) {
6307                         continue;
6308                 }
6309
6310                 /* Look up the vnode for corresponding file.  The cnode
6311                  * will be locked which will ensure that no one modifies
6312                  * the xattrs when we are relocating them.
6313                  *
6314                  * We want to allow open-unlinked files to be moved,
6315                  * so provide allow_deleted == 1 for hfs_vget().
6316                  */
6317                 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6318                         continue;
6319                 }
6320
6321                 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6322                 hfs_unlock(VTOC(vp));
6323                 vnode_put(vp);
6324                 if (error) {
6325                         printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6326                         break;
6327                 }
6328                 prev_fileid = key->fileID;
6329                 files_moved++;
6330         }
6331
6332         if (files_moved) {
6333                 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6334                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6335                                 files_moved, hfsmp->vcbVN);
6336         }
6337
6338         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6339         return error;
6340 }
6341
6342 /*
6343  * Reclaim blocks from regular files.
6344  *
6345  * This function iterates over all the record in catalog btree looking
6346  * for files with extents that overlap into the space we're trying to
6347  * free up.  If a file extent requires relocation, it looks up the vnode
6348  * and calls function to relocate the data.
6349  *
6350  * Returns:
6351  *      Zero on success, non-zero on failure.
6352  */
6353 static int
6354 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6355 {
6356         int error;
6357         FCB *fcb;
6358         struct BTreeIterator *iterator = NULL;
6359         struct FSBufferDescriptor btdata;
6360         int btree_operation;
6361         int lockflags;
6362         struct HFSPlusCatalogFile filerec;
6363         struct vnode *vp;
6364         struct vnode *rvp;
6365         struct filefork *datafork;
6366         u_int32_t files_moved = 0;
6367         u_int32_t prev_blocksmoved;
6368
6369         fcb = VTOF(hfsmp->hfs_catalog_vp);
6370         /* Store the value to print total blocks moved by this function at the end */
6371         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6372
6373         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6374                 return ENOMEM;
6375         }
6376         bzero(iterator, sizeof(*iterator));
6377
6378         btdata.bufferAddress = &filerec;
6379         btdata.itemSize = sizeof(filerec);
6380         btdata.itemCount = 1;
6381
6382         btree_operation = kBTreeFirstRecord;
6383         while (1) {
6384                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6385                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6386                 hfs_systemfile_unlock(hfsmp, lockflags);
6387                 if (error) {
6388                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6389                                 error = 0;
6390                         }
6391                         break;
6392                 }
6393                 btree_operation = kBTreeNextRecord;
6394
6395                 if (filerec.recordType != kHFSPlusFileRecord) {
6396                         continue;
6397                 }
6398
6399                 /* Check if any of the extents require relocation */
6400                 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6401                         continue;
6402                 }
6403
6404                 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6405                 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6406                         continue;
6407                 }
6408
6409                 /* If data fork exists or item is a directory hard link, relocate blocks */
6410                 datafork = VTOF(vp);
6411                 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6412                         error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6413                                         kHFSDataForkType, allocLimit, context);
6414                         if (error)  {
6415                                 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6416                                 hfs_unlock(VTOC(vp));
6417                                 vnode_put(vp);
6418                                 break;
6419                         }
6420                 }
6421
6422                 /* If resource fork exists or item is a directory hard link, relocate blocks */
6423                 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6424                         if (vnode_isdir(vp)) {
6425                                 /* Resource fork vnode lookup is invalid for directory hard link.
6426                                  * So we fake data fork vnode as resource fork vnode.
6427                                  */
6428                                 rvp = vp;
6429                         } else {
6430                                 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6431                                 if (error) {
6432                                         printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6433                                         hfs_unlock(VTOC(vp));
6434                                         vnode_put(vp);
6435                                         break;
6436                                 }
6437                                 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6438                         }
6439
6440                         error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6441                                         kHFSResourceForkType, allocLimit, context);
6442                         if (error) {
6443                                 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6444                                 hfs_unlock(VTOC(vp));
6445                                 vnode_put(vp);
6446                                 break;
6447                         }
6448                 }
6449
6450                 /* The file forks were relocated successfully, now drop the
6451                  * cnode lock and vnode reference, and continue iterating to
6452                  * next catalog record.
6453                  */
6454                 hfs_unlock(VTOC(vp));
6455                 vnode_put(vp);
6456                 files_moved++;
6457         }
6458
6459         if (files_moved) {
6460                 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6461                                 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6462                                 files_moved, hfsmp->vcbVN);
6463         }
6464
6465         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6466         return error;
6467 }
6468
6469 /*
6470  * Reclaim space at the end of a file system.
6471  *
6472  * Inputs -
6473  *      allocLimit      - start block of the space being reclaimed
6474  *      reclaimblks     - number of allocation blocks to reclaim
6475  */
6476 static int
6477 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6478 {
6479         int error = 0;
6480
6481         /*
6482          * Preflight the bitmap to find out total number of blocks that need
6483          * relocation.
6484          *
6485          * Note: Since allocLimit is set to the location of new alternate volume
6486          * header, the check below does not account for blocks allocated for old
6487          * alternate volume header.
6488          */
6489         error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6490         if (error) {
6491                 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6492                 return error;
6493         }
6494         if (hfs_resize_debug) {
6495                 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6496         }
6497
6498         /* Relocate extents of the Allocation file if they're in the way. */
6499         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6500                         kHFSDataForkType, allocLimit, context);
6501         if (error) {
6502                 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6503                 return error;
6504         }
6505
6506         /* Relocate extents of the Extents B-tree if they're in the way. */
6507         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6508                         kHFSDataForkType, allocLimit, context);
6509         if (error) {
6510                 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6511                 return error;
6512         }
6513
6514         /* Relocate extents of the Catalog B-tree if they're in the way. */
6515         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6516                         kHFSDataForkType, allocLimit, context);
6517         if (error) {
6518                 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6519                 return error;
6520         }
6521
6522         /* Relocate extents of the Attributes B-tree if they're in the way. */
6523         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6524                         kHFSDataForkType, allocLimit, context);
6525         if (error) {
6526                 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6527                 return error;
6528         }
6529
6530         /* Relocate extents of the Startup File if there is one and they're in the way. */
6531         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6532                         kHFSDataForkType, allocLimit, context);
6533         if (error) {
6534                 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6535                 return error;
6536         }
6537
6538         /*
6539          * We need to make sure the alternate volume header gets flushed if we moved
6540          * any extents in the volume header.  But we need to do that before
6541          * shrinking the size of the volume, or else the journal code will panic
6542          * with an invalid (too large) block number.
6543          *
6544          * Note that blks_moved will be set if ANY extent was moved, even
6545          * if it was just an overflow extent.  In this case, the journal_flush isn't
6546          * strictly required, but shouldn't hurt.
6547          */
6548         if (hfsmp->hfs_resize_blocksmoved) {
6549                 hfs_journal_flush(hfsmp, FALSE);
6550         }
6551
6552         /* Relocate journal file blocks if they're in the way. */
6553         error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6554         if (error) {
6555                 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6556                 return error;
6557         }
6558
6559         /* Relocate journal info block blocks if they're in the way. */
6560         error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6561         if (error) {
6562                 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6563                 return error;
6564         }
6565
6566         /* Reclaim extents from catalog file records */
6567         error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6568         if (error) {
6569                 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6570                 return error;
6571         }
6572
6573         /* Reclaim extents from extent-based extended attributes, if any */
6574         error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6575         if (error) {
6576                 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6577                 return error;
6578         }
6579
6580         return error;
6581 }
6582
6583
6584 /*
6585  * Check if there are any extents (including overflow extents) that overlap
6586  * into the disk space that is being reclaimed.
6587  *
6588  * Output -
6589  *      true  - One of the extents need to be relocated
6590  *      false - No overflow extents need to be relocated, or there was an error
6591  */
6592 static int
6593 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6594 {
6595         struct BTreeIterator * iterator = NULL;
6596         struct FSBufferDescriptor btdata;
6597         HFSPlusExtentRecord extrec;
6598         HFSPlusExtentKey *extkeyptr;
6599         FCB *fcb;
6600         int overlapped = false;
6601         int i, j;
6602         int error;
6603         int lockflags = 0;
6604         u_int32_t endblock;
6605
6606         /* Check if data fork overlaps the target space */
6607         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6608                 if (filerec->dataFork.extents[i].blockCount == 0) {
6609                         break;
6610                 }
6611                 endblock = filerec->dataFork.extents[i].startBlock +
6612                         filerec->dataFork.extents[i].blockCount;
6613                 if (endblock > allocLimit) {
6614                         overlapped = true;
6615                         goto out;
6616                 }
6617         }
6618
6619         /* Check if resource fork overlaps the target space */
6620         for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6621                 if (filerec->resourceFork.extents[j].blockCount == 0) {
6622                         break;
6623                 }
6624                 endblock = filerec->resourceFork.extents[j].startBlock +
6625                         filerec->resourceFork.extents[j].blockCount;
6626                 if (endblock > allocLimit) {
6627                         overlapped = true;
6628                         goto out;
6629                 }
6630         }
6631
6632         /* Return back if there are no overflow extents for this file */
6633         if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6634                 goto out;
6635         }
6636
6637         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6638                 return 0;
6639         }
6640         bzero(iterator, sizeof(*iterator));
6641         extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6642         extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6643         extkeyptr->forkType = 0;
6644         extkeyptr->fileID = filerec->fileID;
6645         extkeyptr->startBlock = 0;
6646
6647         btdata.bufferAddress = &extrec;
6648         btdata.itemSize = sizeof(extrec);
6649         btdata.itemCount = 1;
6650
6651         fcb = VTOF(hfsmp->hfs_extents_vp);
6652
6653         lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6654
6655         /* This will position the iterator just before the first overflow
6656          * extent record for given fileID.  It will always return btNotFound,
6657          * so we special case the error code.
6658          */
6659         error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6660         if (error && (error != btNotFound)) {
6661                 goto out;
6662         }
6663
6664         /* BTIterateRecord() might return error if the btree is empty, and
6665          * therefore we return that the extent does not overflow to the caller
6666          */
6667         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6668         while (error == 0) {
6669                 /* Stop when we encounter a different file. */
6670                 if (extkeyptr->fileID != filerec->fileID) {
6671                         break;
6672                 }
6673                 /* Check if any of the forks exist in the target space. */
6674                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6675                         if (extrec[i].blockCount == 0) {
6676                                 break;
6677                         }
6678                         endblock = extrec[i].startBlock + extrec[i].blockCount;
6679                         if (endblock > allocLimit) {
6680                                 overlapped = true;
6681                                 goto out;
6682                         }
6683                 }
6684                 /* Look for more records. */
6685                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6686         }
6687
6688 out:
6689         if (lockflags) {
6690                 hfs_systemfile_unlock(hfsmp, lockflags);
6691         }
6692         if (iterator) {
6693                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6694         }
6695         return overlapped;
6696 }
6697
6698
6699 /*
6700  * Calculate the progress of a file system resize operation.
6701  */
6702 __private_extern__
6703 int
6704 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6705 {
6706         if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6707                 return (ENXIO);
6708         }
6709
6710         if (hfsmp->hfs_resize_totalblocks > 0) {
6711                 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6712         } else {
6713                 *progress = 0;
6714         }
6715
6716         return (0);
6717 }
6718
6719
6720 /*
6721  * Creates a UUID from a unique "name" in the HFS UUID Name space.
6722  * See version 3 UUID.
6723  */
6724 static void
6725 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6726 {
6727         MD5_CTX  md5c;
6728         uint8_t  rawUUID[8];
6729
6730         ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6731         ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6732
6733         MD5Init( &md5c );
6734         MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6735         MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6736         MD5Final( result, &md5c );
6737
6738         result[6] = 0x30 | ( result[6] & 0x0F );
6739         result[8] = 0x80 | ( result[8] & 0x3F );
6740 }
6741
6742 /*
6743  * Get file system attributes.
6744  */
6745 static int
6746 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6747 {
6748 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6749 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6750 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6751
6752         ExtendedVCB *vcb = VFSTOVCB(mp);
6753         struct hfsmount *hfsmp = VFSTOHFS(mp);
6754         u_int32_t freeCNIDs;
6755
6756         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6757
6758         VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6759         VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6760         VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6761         VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6762         VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6763         VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6764         VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6765         VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6766         VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6767         /* XXX needs clarification */
6768         VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6769         /* Maximum files is constrained by total blocks. */
6770         VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6771         VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6772
6773         fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6774         fsap->f_fsid.val[1] = vfs_typenum(mp);
6775         VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6776
6777         VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6778         VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6779
6780         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6781                 vol_capabilities_attr_t *cap;
6782
6783                 cap = &fsap->f_capabilities;
6784
6785                 if (hfsmp->hfs_flags & HFS_STANDARD) {
6786                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6787                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6788                                 VOL_CAP_FMT_CASE_PRESERVING |
6789                                 VOL_CAP_FMT_FAST_STATFS |
6790                                 VOL_CAP_FMT_HIDDEN_FILES |
6791                                 VOL_CAP_FMT_PATH_FROM_ID;
6792                 } else {
6793                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6794                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6795                                 VOL_CAP_FMT_SYMBOLICLINKS |
6796                                 VOL_CAP_FMT_HARDLINKS |
6797                                 VOL_CAP_FMT_JOURNAL |
6798                                 VOL_CAP_FMT_ZERO_RUNS |
6799                                 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6800                                 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6801                                 VOL_CAP_FMT_CASE_PRESERVING |
6802                                 VOL_CAP_FMT_FAST_STATFS |
6803                                 VOL_CAP_FMT_2TB_FILESIZE |
6804                                 VOL_CAP_FMT_HIDDEN_FILES |
6805 #if HFS_COMPRESSION
6806                                 VOL_CAP_FMT_PATH_FROM_ID |
6807                                 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6808 #else
6809                                 VOL_CAP_FMT_PATH_FROM_ID;
6810 #endif
6811                 }
6812                 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6813                         VOL_CAP_INT_SEARCHFS |
6814                         VOL_CAP_INT_ATTRLIST |
6815                         VOL_CAP_INT_NFSEXPORT |
6816                         VOL_CAP_INT_READDIRATTR |
6817                         VOL_CAP_INT_EXCHANGEDATA |
6818                         VOL_CAP_INT_ALLOCATE |
6819                         VOL_CAP_INT_VOL_RENAME |
6820                         VOL_CAP_INT_ADVLOCK |
6821                         VOL_CAP_INT_FLOCK |
6822 #if NAMEDSTREAMS
6823                         VOL_CAP_INT_EXTENDED_ATTR |
6824                         VOL_CAP_INT_NAMEDSTREAMS;
6825 #else
6826                         VOL_CAP_INT_EXTENDED_ATTR;
6827 #endif
6828                 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6829                 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6830
6831                 cap->valid[VOL_CAPABILITIES_FORMAT] =
6832                         VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6833                         VOL_CAP_FMT_SYMBOLICLINKS |
6834                         VOL_CAP_FMT_HARDLINKS |
6835                         VOL_CAP_FMT_JOURNAL |
6836                         VOL_CAP_FMT_JOURNAL_ACTIVE |
6837                         VOL_CAP_FMT_NO_ROOT_TIMES |
6838                         VOL_CAP_FMT_SPARSE_FILES |
6839                         VOL_CAP_FMT_ZERO_RUNS |
6840                         VOL_CAP_FMT_CASE_SENSITIVE |
6841                         VOL_CAP_FMT_CASE_PRESERVING |
6842                         VOL_CAP_FMT_FAST_STATFS |
6843                         VOL_CAP_FMT_2TB_FILESIZE |
6844                         VOL_CAP_FMT_OPENDENYMODES |
6845                         VOL_CAP_FMT_HIDDEN_FILES |
6846 #if HFS_COMPRESSION
6847                         VOL_CAP_FMT_PATH_FROM_ID |
6848                         VOL_CAP_FMT_DECMPFS_COMPRESSION;
6849 #else
6850                         VOL_CAP_FMT_PATH_FROM_ID;
6851 #endif
6852                 cap->valid[VOL_CAPABILITIES_INTERFACES] =
6853                         VOL_CAP_INT_SEARCHFS |
6854                         VOL_CAP_INT_ATTRLIST |
6855                         VOL_CAP_INT_NFSEXPORT |
6856                         VOL_CAP_INT_READDIRATTR |
6857                         VOL_CAP_INT_EXCHANGEDATA |
6858                         VOL_CAP_INT_COPYFILE |
6859                         VOL_CAP_INT_ALLOCATE |
6860                         VOL_CAP_INT_VOL_RENAME |
6861                         VOL_CAP_INT_ADVLOCK |
6862                         VOL_CAP_INT_FLOCK |
6863                         VOL_CAP_INT_MANLOCK |
6864 #if NAMEDSTREAMS
6865                         VOL_CAP_INT_EXTENDED_ATTR |
6866                         VOL_CAP_INT_NAMEDSTREAMS;
6867 #else
6868                         VOL_CAP_INT_EXTENDED_ATTR;
6869 #endif
6870                 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
6871                 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
6872                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
6873         }
6874         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
6875                 vol_attributes_attr_t *attrp = &fsap->f_attributes;
6876
6877                 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6878                 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6879                 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
6880                 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6881                 attrp->validattr.forkattr = 0;
6882
6883                 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6884                 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6885                 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
6886                 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6887                 attrp->nativeattr.forkattr = 0;
6888                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
6889         }
6890         fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
6891         fsap->f_create_time.tv_nsec = 0;
6892         VFSATTR_SET_SUPPORTED(fsap, f_create_time);
6893         fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
6894         fsap->f_modify_time.tv_nsec = 0;
6895         VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
6896
6897         fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
6898         fsap->f_backup_time.tv_nsec = 0;
6899         VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
6900         if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
6901                 u_int16_t subtype = 0;
6902
6903                 /*
6904                  * Subtypes (flavors) for HFS
6905                  *   0:   Mac OS Extended
6906                  *   1:   Mac OS Extended (Journaled)
6907                  *   2:   Mac OS Extended (Case Sensitive)
6908                  *   3:   Mac OS Extended (Case Sensitive, Journaled)
6909                  *   4 - 127:   Reserved
6910                  * 128:   Mac OS Standard
6911                  *
6912                  */
6913                 if (hfsmp->hfs_flags & HFS_STANDARD) {
6914                         subtype = HFS_SUBTYPE_STANDARDHFS;
6915                 } else /* HFS Plus */ {
6916                         if (hfsmp->jnl)
6917                                 subtype |= HFS_SUBTYPE_JOURNALED;
6918                         if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
6919                                 subtype |= HFS_SUBTYPE_CASESENSITIVE;
6920                 }
6921                 fsap->f_fssubtype = subtype;
6922                 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
6923         }
6924
6925         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
6926                 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
6927                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
6928         }
6929         if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
6930                 hfs_getvoluuid(hfsmp, fsap->f_uuid);
6931                 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
6932         }
6933         return (0);
6934 }
6935
6936 /*
6937  * Perform a volume rename.  Requires the FS' root vp.
6938  */
6939 static int
6940 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
6941 {
6942         ExtendedVCB *vcb = VTOVCB(vp);
6943         struct cnode *cp = VTOC(vp);
6944         struct hfsmount *hfsmp = VTOHFS(vp);
6945         struct cat_desc to_desc;
6946         struct cat_desc todir_desc;
6947         struct cat_desc new_desc;
6948         cat_cookie_t cookie;
6949         int lockflags;
6950         int error = 0;
6951         char converted_volname[256];
6952         size_t volname_length = 0;
6953         size_t conv_volname_length = 0;
6954
6955
6956         /*
6957          * Ignore attempts to rename a volume to a zero-length name.
6958          */
6959         if (name[0] == 0)
6960                 return(0);
6961
6962         bzero(&to_desc, sizeof(to_desc));
6963         bzero(&todir_desc, sizeof(todir_desc));
6964         bzero(&new_desc, sizeof(new_desc));
6965         bzero(&cookie, sizeof(cookie));
6966
6967         todir_desc.cd_parentcnid = kHFSRootParentID;
6968         todir_desc.cd_cnid = kHFSRootFolderID;
6969         todir_desc.cd_flags = CD_ISDIR;
6970
6971         to_desc.cd_nameptr = (const u_int8_t *)name;
6972         to_desc.cd_namelen = strlen(name);
6973         to_desc.cd_parentcnid = kHFSRootParentID;
6974         to_desc.cd_cnid = cp->c_cnid;
6975         to_desc.cd_flags = CD_ISDIR;
6976
6977         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
6978                 if ((error = hfs_start_transaction(hfsmp)) == 0) {
6979                         if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
6980                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
6981
6982                                 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
6983
6984                                 /*
6985                                  * If successful, update the name in the VCB, ensure it's terminated.
6986                                  */
6987                                 if (!error) {
6988                                         strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6989                                         volname_length = strlen ((const char*)vcb->vcbVN);
6990 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
6991                                         /* Send the volume name down to CoreStorage if necessary */
6992                                         error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
6993                                         if (error == 0) {
6994                                                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
6995                                         }
6996                                         error = 0;
6997                                 }
6998
6999                                 hfs_systemfile_unlock(hfsmp, lockflags);
7000                                 cat_postflight(hfsmp, &cookie, p);
7001
7002                                 if (error)
7003                                         MarkVCBDirty(vcb);
7004                                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7005                         }
7006                         hfs_end_transaction(hfsmp);
7007                 }
7008                 if (!error) {
7009                         /* Release old allocated name buffer */
7010                         if (cp->c_desc.cd_flags & CD_HASBUF) {
7011                                 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7012
7013                                 cp->c_desc.cd_nameptr = 0;
7014                                 cp->c_desc.cd_namelen = 0;
7015                                 cp->c_desc.cd_flags &= ~CD_HASBUF;
7016                                 vfs_removename(tmp_name);
7017                         }
7018                         /* Update cnode's catalog descriptor */
7019                         replace_desc(cp, &new_desc);
7020                         vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7021                         cp->c_touch_chgtime = TRUE;
7022                 }
7023
7024                 hfs_unlock(cp);
7025         }
7026
7027         return(error);
7028 }
7029
7030 /*
7031  * Get file system attributes.
7032  */
7033 static int
7034 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7035 {
7036         kauth_cred_t cred = vfs_context_ucred(context);
7037         int error = 0;
7038
7039         /*
7040          * Must be superuser or owner of filesystem to change volume attributes
7041          */
7042         if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7043                 return(EACCES);
7044
7045         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7046                 vnode_t root_vp;
7047
7048                 error = hfs_vfs_root(mp, &root_vp, context);
7049                 if (error)
7050                         goto out;
7051
7052                 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7053                 (void) vnode_put(root_vp);
7054                 if (error)
7055                         goto out;
7056
7057                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7058         }
7059
7060 out:
7061         return error;
7062 }
7063
7064 /* If a runtime corruption is detected, set the volume inconsistent
7065  * bit in the volume attributes.  The volume inconsistent bit is a persistent
7066  * bit which represents that the volume is corrupt and needs repair.
7067  * The volume inconsistent bit can be set from the kernel when it detects
7068  * runtime corruption or from file system repair utilities like fsck_hfs when
7069  * a repair operation fails.  The bit should be cleared only from file system
7070  * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7071  */
7072 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7073 {
7074         HFS_MOUNT_LOCK(hfsmp, TRUE);
7075         if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7076                 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7077                 MarkVCBDirty(hfsmp);
7078         }
7079         if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7080                 /* Log information to ASL log */
7081                 fslog_fs_corrupt(hfsmp->hfs_mp);
7082                 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7083         }
7084         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7085 }
7086
7087 /* Replay the journal on the device node provided.  Returns zero if
7088  * journal replay succeeded or no journal was supposed to be replayed.
7089  */
7090 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7091 {
7092         int retval = 0;
7093         struct mount *mp = NULL;
7094         struct hfs_mount_args *args = NULL;
7095
7096         /* Replay allowed only on raw devices */
7097         if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7098                 retval = EINVAL;
7099                 goto out;
7100         }
7101
7102         /* Create dummy mount structures */
7103         MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7104         if (mp == NULL) {
7105                 retval = ENOMEM;
7106                 goto out;
7107         }
7108         bzero(mp, sizeof(struct mount));
7109         mount_lock_init(mp);
7110
7111         MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7112         if (args == NULL) {
7113                 retval = ENOMEM;
7114                 goto out;
7115         }
7116         bzero(args, sizeof(struct hfs_mount_args));
7117
7118         retval = hfs_mountfs(devvp, mp, args, 1, context);
7119         buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7120
7121         /* FSYNC the devnode to be sure all data has been flushed */
7122         retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7123
7124 out:
7125         if (mp) {
7126                 mount_lock_destroy(mp);
7127                 FREE(mp, M_TEMP);
7128         }
7129         if (args) {
7130                 FREE(args, M_TEMP);
7131         }
7132         return retval;
7133 }
7134
7135 /*
7136  * hfs vfs operations.
7137  */
7138 struct vfsops hfs_vfsops = {
7139         hfs_mount,
7140         hfs_start,
7141         hfs_unmount,
7142         hfs_vfs_root,
7143         hfs_quotactl,
7144         hfs_vfs_getattr,        /* was hfs_statfs */
7145         hfs_sync,
7146         hfs_vfs_vget,
7147         hfs_fhtovp,
7148         hfs_vptofh,
7149         hfs_init,
7150         hfs_sysctl,
7151         hfs_vfs_setattr,
7152         {NULL}
7153 };