bsd/hfs/hfs_vfsops.c

   1 /*
   2  * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1991, 1993, 1994
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      hfs_vfsops.c
  66  *  derived from        @(#)ufs_vfsops.c        8.8 (Berkeley) 5/20/95
  67  *
  68  *      (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
  69  *
  70  *      hfs_vfsops.c -- VFS layer for loadable HFS file system.
  71  *
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/kauth.h>
  76
  77 #include <sys/ubc.h>
  78 #include <sys/ubc_internal.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/mount_internal.h>
  81 #include <sys/sysctl.h>
  82 #include <sys/malloc.h>
  83 #include <sys/stat.h>
  84 #include <sys/quota.h>
  85 #include <sys/disk.h>
  86 #include <sys/paths.h>
  87 #include <sys/utfconv.h>
  88 #include <sys/kdebug.h>
  89 #include <sys/fslog.h>
  90
  91 #include <kern/locks.h>
  92
  93 #include <vfs/vfs_journal.h>
  94
  95 #include <miscfs/specfs/specdev.h>
  96 #include <hfs/hfs_mount.h>
  97
  98 #include <libkern/crypto/md5.h>
  99 #include <uuid/uuid.h>
 100
 101 #include "hfs.h"
 102 #include "hfs_catalog.h"
 103 #include "hfs_cnode.h"
 104 #include "hfs_dbg.h"
 105 #include "hfs_endian.h"
 106 #include "hfs_hotfiles.h"
 107 #include "hfs_quota.h"
 108
 109 #include "hfscommon/headers/FileMgrInternal.h"
 110 #include "hfscommon/headers/BTreesInternal.h"
 111
 112 #if     HFS_DIAGNOSTIC
 113 int hfs_dbg_all = 0;
 114 int hfs_dbg_err = 0;
 115 #endif
 116
 117 /* Enable/disable debugging code for live volume resizing */
 118 int hfs_resize_debug = 0;
 119
 120 lck_grp_attr_t *  hfs_group_attr;
 121 lck_attr_t *  hfs_lock_attr;
 122 lck_grp_t *  hfs_mutex_group;
 123 lck_grp_t *  hfs_rwlock_group;
 124
 125 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 126 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
 127
 128 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
 129 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 130
 131 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
 132 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
 133 static int hfs_flushfiles(struct mount *, int, struct proc *);
 134 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
 135 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
 136 static int hfs_init(struct vfsconf *vfsp);
 137 static int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
 138 static int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
 139 static int hfs_reload(struct mount *mp);
 140 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
 141 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
 142 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
 143 static int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
 144 static int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
 145 static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 146                       user_addr_t newp, size_t newlen, vfs_context_t context);
 147 static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 148 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 149
 150 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context);
 151 static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID);
 152 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
 153
 154
 155 /*
 156  * Called by vfs_mountroot when mounting HFS Plus as root.
 157  */
 158
 159 __private_extern__
 160 int
 161 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 162 {
 163         struct hfsmount *hfsmp;
 164         ExtendedVCB *vcb;
 165         struct vfsstatfs *vfsp;
 166         int error;
 167
 168         if ((error = hfs_mountfs(rvp, mp, NULL, 0, context)))
 169                 return (error);
 170
 171         /* Init hfsmp */
 172         hfsmp = VFSTOHFS(mp);
 173
 174         hfsmp->hfs_uid = UNKNOWNUID;
 175         hfsmp->hfs_gid = UNKNOWNGID;
 176         hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 177         hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
 178
 179         /* Establish the free block reserve. */
 180         vcb = HFSTOVCB(hfsmp);
 181         vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
 182         vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
 183
 184         vfsp = vfs_statfs(mp);
 185         (void)hfs_statfs(mp, vfsp, NULL);
 186
 187         return (0);
 188 }
 189
 190
 191 /*
 192  * VFS Operations.
 193  *
 194  * mount system call
 195  */
 196
 197 static int
 198 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
 199 {
 200         struct proc *p = vfs_context_proc(context);
 201         struct hfsmount *hfsmp = NULL;
 202         struct hfs_mount_args args;
 203         int retval = E_NONE;
 204         u_int32_t cmdflags;
 205
 206         if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
 207                 return (retval);
 208         }
 209         cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
 210         if (cmdflags & MNT_UPDATE) {
 211                 hfsmp = VFSTOHFS(mp);
 212
 213                 /* Reload incore data after an fsck. */
 214                 if (cmdflags & MNT_RELOAD) {
 215                         if (vfs_isrdonly(mp))
 216                                 return hfs_reload(mp);
 217                         else
 218                                 return (EINVAL);
 219                 }
 220
 221                 /* Change to a read-only file system. */
 222                 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 223                     vfs_isrdonly(mp)) {
 224                         int flags;
 225
 226                         /* Set flag to indicate that a downgrade to read-only
 227                          * is in progress and therefore block any further
 228                          * modifications to the file system.
 229                          */
 230                         hfs_global_exclusive_lock_acquire(hfsmp);
 231                         hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
 232                         hfsmp->hfs_downgrading_proc = current_thread();
 233                         hfs_global_exclusive_lock_release(hfsmp);
 234
 235                         /* use VFS_SYNC to push out System (btree) files */
 236                         retval = VFS_SYNC(mp, MNT_WAIT, context);
 237                         if (retval && ((cmdflags & MNT_FORCE) == 0)) {
 238                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 239                                 hfsmp->hfs_downgrading_proc = NULL;
 240                                 goto out;
 241                         }
 242
 243                         flags = WRITECLOSE;
 244                         if (cmdflags & MNT_FORCE)
 245                                 flags |= FORCECLOSE;
 246
 247                         if ((retval = hfs_flushfiles(mp, flags, p))) {
 248                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 249                                 hfsmp->hfs_downgrading_proc = NULL;
 250                                 goto out;
 251                         }
 252
 253                         /* mark the volume cleanly unmounted */
 254                         hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
 255                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 256                         hfsmp->hfs_flags |= HFS_READ_ONLY;
 257
 258                         /* also get the volume bitmap blocks */
 259                         if (!retval) {
 260                                 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
 261                                         retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
 262                                 } else {
 263                                         vnode_get(hfsmp->hfs_devvp);
 264                                         retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
 265                                         vnode_put(hfsmp->hfs_devvp);
 266                                 }
 267                         }
 268                         if (retval) {
 269                                 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 270                                 hfsmp->hfs_downgrading_proc = NULL;
 271                                 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 272                                 goto out;
 273                         }
 274                         if (hfsmp->jnl) {
 275                             hfs_global_exclusive_lock_acquire(hfsmp);
 276
 277                             journal_close(hfsmp->jnl);
 278                             hfsmp->jnl = NULL;
 279
 280                             // Note: we explicitly don't want to shutdown
 281                             //       access to the jvp because we may need
 282                             //       it later if we go back to being read-write.
 283
 284                             hfs_global_exclusive_lock_release(hfsmp);
 285                         }
 286
 287                         hfsmp->hfs_downgrading_proc = NULL;
 288                 }
 289
 290                 /* Change to a writable file system. */
 291                 if (vfs_iswriteupgrade(mp)) {
 292
 293                         /*
 294                          * On inconsistent disks, do not allow read-write mount
 295                          * unless it is the boot volume being mounted.
 296                          */
 297                         if (!(vfs_flags(mp) & MNT_ROOTFS) &&
 298                                         (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
 299                                 retval = EINVAL;
 300                                 goto out;
 301                         }
 302
 303                         // If the journal was shut-down previously because we were
 304                         // asked to be read-only, let's start it back up again now
 305
 306                         if (   (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
 307                             && hfsmp->jnl == NULL
 308                             && hfsmp->jvp != NULL) {
 309                             int jflags;
 310
 311                             if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
 312                                         jflags = JOURNAL_RESET;
 313                             } else {
 314                                         jflags = 0;
 315                             }
 316
 317                             hfs_global_exclusive_lock_acquire(hfsmp);
 318
 319                             hfsmp->jnl = journal_open(hfsmp->jvp,
 320                                                       (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
 321                                                       hfsmp->jnl_size,
 322                                                       hfsmp->hfs_devvp,
 323                                                       hfsmp->hfs_logical_block_size,
 324                                                       jflags,
 325                                                       0,
 326                                                       hfs_sync_metadata, hfsmp->hfs_mp);
 327
 328                             hfs_global_exclusive_lock_release(hfsmp);
 329
 330                             if (hfsmp->jnl == NULL) {
 331                                 retval = EINVAL;
 332                                 goto out;
 333                             } else {
 334                                 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
 335                             }
 336
 337                         }
 338
 339                         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 340                         retval = hfs_erase_unused_nodes(hfsmp);
 341                         if (retval != E_NONE)
 342                                 goto out;
 343
 344                         /* Only clear HFS_READ_ONLY after a successful write */
 345                         hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 346
 347                         /* If this mount point was downgraded from read-write
 348                          * to read-only, clear that information as we are now
 349                          * moving back to read-write.
 350                          */
 351                         hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 352                         hfsmp->hfs_downgrading_proc = NULL;
 353
 354                         /* mark the volume dirty (clear clean unmount bit) */
 355                         hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 356
 357                         retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 358                         if (retval != E_NONE)
 359                                 goto out;
 360
 361                         if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
 362                                 /* Setup private/hidden directories for hardlinks. */
 363                                 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 364                                 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 365
 366                                 hfs_remove_orphans(hfsmp);
 367
 368                                 /*
 369                                  * Allow hot file clustering if conditions allow.
 370                                  */
 371                                 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
 372                                     ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
 373                                         (void) hfs_recording_init(hfsmp);
 374                                 }
 375                                 /* Force ACLs on HFS+ file systems. */
 376                                 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
 377                                         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 378                                 }
 379                         }
 380                 }
 381
 382                 /* Update file system parameters. */
 383                 retval = hfs_changefs(mp, &args);
 384
 385         } else /* not an update request */ {
 386
 387                 /* Set the mount flag to indicate that we support volfs  */
 388                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
 389
 390                 retval = hfs_mountfs(devvp, mp, &args, 0, context);
 391         }
 392 out:
 393         if (retval == 0) {
 394                 (void)hfs_statfs(mp, vfs_statfs(mp), context);
 395         }
 396         return (retval);
 397 }
 398
 399
 400 struct hfs_changefs_cargs {
 401         struct hfsmount *hfsmp;
 402         int             namefix;
 403         int             permfix;
 404         int             permswitch;
 405 };
 406
 407 static int
 408 hfs_changefs_callback(struct vnode *vp, void *cargs)
 409 {
 410         ExtendedVCB *vcb;
 411         struct cnode *cp;
 412         struct cat_desc cndesc;
 413         struct cat_attr cnattr;
 414         struct hfs_changefs_cargs *args;
 415         int lockflags;
 416         int error;
 417
 418         args = (struct hfs_changefs_cargs *)cargs;
 419
 420         cp = VTOC(vp);
 421         vcb = HFSTOVCB(args->hfsmp);
 422
 423         lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 424         error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
 425         hfs_systemfile_unlock(args->hfsmp, lockflags);
 426         if (error) {
 427                 /*
 428                  * If we couldn't find this guy skip to the next one
 429                  */
 430                 if (args->namefix)
 431                         cache_purge(vp);
 432
 433                 return (VNODE_RETURNED);
 434         }
 435         /*
 436          * Get the real uid/gid and perm mask from disk.
 437          */
 438         if (args->permswitch || args->permfix) {
 439                 cp->c_uid = cnattr.ca_uid;
 440                 cp->c_gid = cnattr.ca_gid;
 441                 cp->c_mode = cnattr.ca_mode;
 442         }
 443         /*
 444          * If we're switching name converters then...
 445          *   Remove the existing entry from the namei cache.
 446          *   Update name to one based on new encoder.
 447          */
 448         if (args->namefix) {
 449                 cache_purge(vp);
 450                 replace_desc(cp, &cndesc);
 451
 452                 if (cndesc.cd_cnid == kHFSRootFolderID) {
 453                         strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
 454                         cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
 455                 }
 456         } else {
 457                 cat_releasedesc(&cndesc);
 458         }
 459         return (VNODE_RETURNED);
 460 }
 461
 462 /* Change fs mount parameters */
 463 static int
 464 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
 465 {
 466         int retval = 0;
 467         int namefix, permfix, permswitch;
 468         struct hfsmount *hfsmp;
 469         ExtendedVCB *vcb;
 470         hfs_to_unicode_func_t   get_unicode_func;
 471         unicode_to_hfs_func_t   get_hfsname_func;
 472         u_int32_t old_encoding = 0;
 473         struct hfs_changefs_cargs cargs;
 474         u_int32_t mount_flags;
 475
 476         hfsmp = VFSTOHFS(mp);
 477         vcb = HFSTOVCB(hfsmp);
 478         mount_flags = (unsigned int)vfs_flags(mp);
 479
 480         hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
 481
 482         permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
 483                        ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
 484                       (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
 485                        (mount_flags & MNT_UNKNOWNPERMISSIONS)));
 486
 487         /* The root filesystem must operate with actual permissions: */
 488         if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
 489                 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));  /* Just say "No". */
 490                 retval = EINVAL;
 491                 goto exit;
 492         }
 493         if (mount_flags & MNT_UNKNOWNPERMISSIONS)
 494                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
 495         else
 496                 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
 497
 498         namefix = permfix = 0;
 499
 500         /*
 501          * Tracking of hot files requires up-to-date access times.  So if
 502          * access time updates are disabled, we must also disable hot files.
 503          */
 504         if (mount_flags & MNT_NOATIME) {
 505                 (void) hfs_recording_suspend(hfsmp);
 506         }
 507
 508         /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
 509         if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
 510                 gTimeZone = args->hfs_timezone;
 511         }
 512
 513         /* Change the default uid, gid and/or mask */
 514         if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
 515                 hfsmp->hfs_uid = args->hfs_uid;
 516                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 517                         ++permfix;
 518         }
 519         if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
 520                 hfsmp->hfs_gid = args->hfs_gid;
 521                 if (vcb->vcbSigWord == kHFSPlusSigWord)
 522                         ++permfix;
 523         }
 524         if (args->hfs_mask != (mode_t)VNOVAL) {
 525                 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
 526                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
 527                         hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
 528                         if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
 529                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
 530                         if (vcb->vcbSigWord == kHFSPlusSigWord)
 531                                 ++permfix;
 532                 }
 533         }
 534
 535         /* Change the hfs encoding value (hfs only) */
 536         if ((vcb->vcbSigWord == kHFSSigWord)    &&
 537             (args->hfs_encoding != (u_int32_t)VNOVAL)              &&
 538             (hfsmp->hfs_encoding != args->hfs_encoding)) {
 539
 540                 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
 541                 if (retval)
 542                         goto exit;
 543
 544                 /*
 545                  * Connect the new hfs_get_unicode converter but leave
 546                  * the old hfs_get_hfsname converter in place so that
 547                  * we can lookup existing vnodes to get their correctly
 548                  * encoded names.
 549                  *
 550                  * When we're all finished, we can then connect the new
 551                  * hfs_get_hfsname converter and release our interest
 552                  * in the old converters.
 553                  */
 554                 hfsmp->hfs_get_unicode = get_unicode_func;
 555                 old_encoding = hfsmp->hfs_encoding;
 556                 hfsmp->hfs_encoding = args->hfs_encoding;
 557                 ++namefix;
 558         }
 559
 560         if (!(namefix || permfix || permswitch))
 561                 goto exit;
 562
 563         /* XXX 3762912 hack to support HFS filesystem 'owner' */
 564         if (permfix)
 565                 vfs_setowner(mp,
 566                     hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
 567                     hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
 568
 569         /*
 570          * For each active vnode fix things that changed
 571          *
 572          * Note that we can visit a vnode more than once
 573          * and we can race with fsync.
 574          *
 575          * hfs_changefs_callback will be called for each vnode
 576          * hung off of this mount point
 577          *
 578          * The vnode will be properly referenced and unreferenced
 579          * around the callback
 580          */
 581         cargs.hfsmp = hfsmp;
 582         cargs.namefix = namefix;
 583         cargs.permfix = permfix;
 584         cargs.permswitch = permswitch;
 585
 586         vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
 587
 588         /*
 589          * If we're switching name converters we can now
 590          * connect the new hfs_get_hfsname converter and
 591          * release our interest in the old converters.
 592          */
 593         if (namefix) {
 594                 hfsmp->hfs_get_hfsname = get_hfsname_func;
 595                 vcb->volumeNameEncodingHint = args->hfs_encoding;
 596                 (void) hfs_relconverter(old_encoding);
 597         }
 598 exit:
 599         hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
 600         return (retval);
 601 }
 602
 603
 604 struct hfs_reload_cargs {
 605         struct hfsmount *hfsmp;
 606         int             error;
 607 };
 608
 609 static int
 610 hfs_reload_callback(struct vnode *vp, void *cargs)
 611 {
 612         struct cnode *cp;
 613         struct hfs_reload_cargs *args;
 614         int lockflags;
 615
 616         args = (struct hfs_reload_cargs *)cargs;
 617         /*
 618          * flush all the buffers associated with this node
 619          */
 620         (void) buf_invalidateblks(vp, 0, 0, 0);
 621
 622         cp = VTOC(vp);
 623         /*
 624          * Remove any directory hints
 625          */
 626         if (vnode_isdir(vp))
 627                 hfs_reldirhints(cp, 0);
 628
 629         /*
 630          * Re-read cnode data for all active vnodes (non-metadata files).
 631          */
 632         if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp)) {
 633                 struct cat_fork *datafork;
 634                 struct cat_desc desc;
 635
 636                 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
 637
 638                 /* lookup by fileID since name could have changed */
 639                 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 640                 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
 641                 hfs_systemfile_unlock(args->hfsmp, lockflags);
 642                 if (args->error) {
 643                         return (VNODE_RETURNED_DONE);
 644                 }
 645
 646                 /* update cnode's catalog descriptor */
 647                 (void) replace_desc(cp, &desc);
 648         }
 649         return (VNODE_RETURNED);
 650 }
 651
 652 /*
 653  * Reload all incore data for a filesystem (used after running fsck on
 654  * the root filesystem and finding things to fix). The filesystem must
 655  * be mounted read-only.
 656  *
 657  * Things to do to update the mount:
 658  *      invalidate all cached meta-data.
 659  *      invalidate all inactive vnodes.
 660  *      invalidate all cached file data.
 661  *      re-read volume header from disk.
 662  *      re-load meta-file info (extents, file size).
 663  *      re-load B-tree header data.
 664  *      re-read cnode data for all active vnodes.
 665  */
 666 static int
 667 hfs_reload(struct mount *mountp)
 668 {
 669         register struct vnode *devvp;
 670         struct buf *bp;
 671         int error, i;
 672         struct hfsmount *hfsmp;
 673         struct HFSPlusVolumeHeader *vhp;
 674         ExtendedVCB *vcb;
 675         struct filefork *forkp;
 676         struct cat_desc cndesc;
 677         struct hfs_reload_cargs args;
 678         daddr64_t priIDSector;
 679
 680         hfsmp = VFSTOHFS(mountp);
 681         vcb = HFSTOVCB(hfsmp);
 682
 683         if (vcb->vcbSigWord == kHFSSigWord)
 684                 return (EINVAL);        /* rooting from HFS is not supported! */
 685
 686         /*
 687          * Invalidate all cached meta-data.
 688          */
 689         devvp = hfsmp->hfs_devvp;
 690         if (buf_invalidateblks(devvp, 0, 0, 0))
 691                 panic("hfs_reload: dirty1");
 692
 693         args.hfsmp = hfsmp;
 694         args.error = 0;
 695         /*
 696          * hfs_reload_callback will be called for each vnode
 697          * hung off of this mount point that can't be recycled...
 698          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 699          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 700          * properly referenced and unreferenced around the callback
 701          */
 702         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
 703
 704         if (args.error)
 705                 return (args.error);
 706
 707         /*
 708          * Re-read VolumeHeader from disk.
 709          */
 710         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 711                         HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 712
 713         error = (int)buf_meta_bread(hfsmp->hfs_devvp,
 714                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
 715                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
 716         if (error) {
 717                 if (bp != NULL)
 718                         buf_brelse(bp);
 719                 return (error);
 720         }
 721
 722         vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 723
 724         /* Do a quick sanity check */
 725         if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
 726              SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
 727             (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
 728              SWAP_BE16(vhp->version) != kHFSXVersion) ||
 729             SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
 730                 buf_brelse(bp);
 731                 return (EIO);
 732         }
 733
 734         vcb->vcbLsMod           = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 735         vcb->vcbAtrb            = SWAP_BE32 (vhp->attributes);
 736         vcb->vcbJinfoBlock  = SWAP_BE32(vhp->journalInfoBlock);
 737         vcb->vcbClpSiz          = SWAP_BE32 (vhp->rsrcClumpSize);
 738         vcb->vcbNxtCNID         = SWAP_BE32 (vhp->nextCatalogID);
 739         vcb->vcbVolBkUp         = to_bsd_time(SWAP_BE32(vhp->backupDate));
 740         vcb->vcbWrCnt           = SWAP_BE32 (vhp->writeCount);
 741         vcb->vcbFilCnt          = SWAP_BE32 (vhp->fileCount);
 742         vcb->vcbDirCnt          = SWAP_BE32 (vhp->folderCount);
 743         HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
 744         vcb->totalBlocks        = SWAP_BE32 (vhp->totalBlocks);
 745         vcb->freeBlocks         = SWAP_BE32 (vhp->freeBlocks);
 746         vcb->encodingsBitmap    = SWAP_BE64 (vhp->encodingsBitmap);
 747         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 748         vcb->localCreateDate    = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
 749
 750         /*
 751          * Re-load meta-file vnode data (extent info, file size, etc).
 752          */
 753         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 754         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 755                 forkp->ff_extents[i].startBlock =
 756                         SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 757                 forkp->ff_extents[i].blockCount =
 758                         SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 759         }
 760         forkp->ff_size      = SWAP_BE64 (vhp->extentsFile.logicalSize);
 761         forkp->ff_blocks    = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 762         forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
 763
 764
 765         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 766         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 767                 forkp->ff_extents[i].startBlock =
 768                         SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 769                 forkp->ff_extents[i].blockCount =
 770                         SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 771         }
 772         forkp->ff_size      = SWAP_BE64 (vhp->catalogFile.logicalSize);
 773         forkp->ff_blocks    = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 774         forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
 775
 776         if (hfsmp->hfs_attribute_vp) {
 777                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 778                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 779                         forkp->ff_extents[i].startBlock =
 780                                 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 781                         forkp->ff_extents[i].blockCount =
 782                                 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 783                 }
 784                 forkp->ff_size      = SWAP_BE64 (vhp->attributesFile.logicalSize);
 785                 forkp->ff_blocks    = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 786                 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
 787         }
 788
 789         forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
 790         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 791                 forkp->ff_extents[i].startBlock =
 792                         SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 793                 forkp->ff_extents[i].blockCount =
 794                         SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 795         }
 796         forkp->ff_size      = SWAP_BE64 (vhp->allocationFile.logicalSize);
 797         forkp->ff_blocks    = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 798         forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
 799
 800         buf_brelse(bp);
 801         vhp = NULL;
 802
 803         /*
 804          * Re-load B-tree header data
 805          */
 806         forkp = VTOF((struct vnode *)vcb->extentsRefNum);
 807         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 808                 return (error);
 809
 810         forkp = VTOF((struct vnode *)vcb->catalogRefNum);
 811         if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 812                 return (error);
 813
 814         if (hfsmp->hfs_attribute_vp) {
 815                 forkp = VTOF(hfsmp->hfs_attribute_vp);
 816                 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
 817                         return (error);
 818         }
 819
 820         /* Reload the volume name */
 821         if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
 822                 return (error);
 823         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 824         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 825         cat_releasedesc(&cndesc);
 826
 827         /* Re-establish private/hidden directories. */
 828         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 829         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 830
 831         /* In case any volume information changed to trigger a notification */
 832         hfs_generate_volume_notifications(hfsmp);
 833
 834         return (0);
 835 }
 836
 837
 838
 839 static void
 840 hfs_syncer(void *arg0, void *unused)
 841 {
 842 #pragma unused(unused)
 843
 844     struct hfsmount *hfsmp = arg0;
 845     clock_sec_t secs;
 846     clock_usec_t usecs;
 847     uint32_t delay = HFS_META_DELAY;
 848     uint64_t now;
 849     static int no_max=1;
 850
 851     clock_get_calendar_microtime(&secs, &usecs);
 852     now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 853
 854     //
 855     // If the amount of pending writes is more than our limit, wait
 856     // for 2/3 of it to drain and then flush the journal.
 857     //
 858     if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
 859             int counter=0;
 860             uint64_t pending_io, start, rate;
 861
 862             no_max = 0;
 863
 864             hfs_start_transaction(hfsmp);   // so we hold off any new i/o's
 865
 866             pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
 867
 868             clock_get_calendar_microtime(&secs, &usecs);
 869             start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 870
 871             while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
 872                     tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
 873             }
 874
 875             if (counter >= 500) {
 876                     printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
 877             }
 878
 879             if (hfsmp->jnl) {
 880                     journal_flush(hfsmp->jnl);
 881             } else {
 882                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
 883             }
 884
 885             clock_get_calendar_microtime(&secs, &usecs);
 886             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 887             hfsmp->hfs_last_sync_time = now;
 888             rate = ((pending_io * 1000000ULL) / (now - start));     // yields bytes per second
 889
 890             hfs_end_transaction(hfsmp);
 891
 892             //
 893             // If a reasonable amount of time elapsed then check the
 894             // i/o rate.  If it's taking less than 1 second or more
 895             // than 2 seconds, adjust hfs_max_pending_io so that we
 896             // will allow about 1.5 seconds of i/o to queue up.
 897             //
 898             if ((now - start) >= 300000) {
 899                     uint64_t scale = (pending_io * 100) / rate;
 900
 901                     if (scale < 100 || scale > 200) {
 902                             // set it so that it should take about 1.5 seconds to drain
 903                             hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
 904                     }
 905             }
 906
 907     } else if (   ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
 908                || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
 909                    && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
 910                    && (hfsmp->hfs_active_threads == 0)
 911                    && (hfsmp->hfs_global_lock_nesting == 0))) {
 912
 913             //
 914             // Flush the journal if more than 5 seconds elapsed since
 915             // the last sync OR we have not sync'ed recently and the
 916             // last sync request time was more than 100 milliseconds
 917             // ago and no one is in the middle of a transaction right
 918             // now.  Else we defer the sync and reschedule it.
 919             //
 920             if (hfsmp->jnl) {
 921                     lck_rw_lock_shared(&hfsmp->hfs_global_lock);
 922
 923                     journal_flush(hfsmp->jnl);
 924
 925                     lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
 926             } else {
 927                     hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
 928             }
 929
 930             clock_get_calendar_microtime(&secs, &usecs);
 931             now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
 932             hfsmp->hfs_last_sync_time = now;
 933
 934     } else if (hfsmp->hfs_active_threads == 0) {
 935             uint64_t deadline;
 936
 937             clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
 938             thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
 939
 940             // note: we intentionally return early here and do not
 941             // decrement the sync_scheduled and sync_incomplete
 942             // variables because we rescheduled the timer.
 943
 944             return;
 945     }
 946
 947     //
 948     // NOTE: we decrement these *after* we're done the journal_flush() since
 949     // it can take a significant amount of time and so we don't want more
 950     // callbacks scheduled until we're done this one.
 951     //
 952     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
 953     OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
 954     wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
 955 }
 956
 957
 958 extern int IOBSDIsMediaEjectable( const char *cdev_name );
 959
 960 /*
 961  * Common code for mount and mountroot
 962  */
 963 static int
 964 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 965             int journal_replay_only, vfs_context_t context)
 966 {
 967         struct proc *p = vfs_context_proc(context);
 968         int retval = E_NONE;
 969         struct hfsmount *hfsmp = NULL;
 970         struct buf *bp;
 971         dev_t dev;
 972         HFSMasterDirectoryBlock *mdbp = NULL;
 973         int ronly;
 974 #if QUOTA
 975         int i;
 976 #endif
 977         int mntwrapper;
 978         kauth_cred_t cred;
 979         u_int64_t disksize;
 980         daddr64_t log_blkcnt;
 981         u_int32_t log_blksize;
 982         u_int32_t phys_blksize;
 983         u_int32_t minblksize;
 984         u_int32_t iswritable;
 985         daddr64_t mdb_offset;
 986         int isvirtual = 0;
 987         int isroot = 0;
 988
 989         if (args == NULL) {
 990                 /* only hfs_mountroot passes us NULL as the 'args' argument */
 991                 isroot = 1;
 992         }
 993
 994         ronly = vfs_isrdonly(mp);
 995         dev = vnode_specrdev(devvp);
 996         cred = p ? vfs_context_ucred(context) : NOCRED;
 997         mntwrapper = 0;
 998
 999         bp = NULL;
1000         hfsmp = NULL;
1001         mdbp = NULL;
1002         minblksize = kHFSBlockSize;
1003
1004         /* Advisory locking should be handled at the VFS layer */
1005         vfs_setlocklocal(mp);
1006
1007         /* Get the logical block size (treated as physical block size everywhere) */
1008         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1009                 retval = ENXIO;
1010                 goto error_exit;
1011         }
1012         if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1013                 printf("hfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
1014                 retval = ENXIO;
1015                 goto error_exit;
1016         }
1017
1018         /* Get the physical block size. */
1019         retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1020         if (retval) {
1021                 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1022                         retval = ENXIO;
1023                         goto error_exit;
1024                 }
1025                 /* If device does not support this ioctl, assume that physical
1026                  * block size is same as logical block size
1027                  */
1028                 phys_blksize = log_blksize;
1029         }
1030         if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1031                 printf("hfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
1032                 retval = ENXIO;
1033                 goto error_exit;
1034         }
1035
1036         /* Switch to 512 byte sectors (temporarily) */
1037         if (log_blksize > 512) {
1038                 u_int32_t size512 = 512;
1039
1040                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1041                         retval = ENXIO;
1042                         goto error_exit;
1043                 }
1044         }
1045         /* Get the number of 512 byte physical blocks. */
1046         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1047                 /* resetting block size may fail if getting block count did */
1048                 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1049
1050                 retval = ENXIO;
1051                 goto error_exit;
1052         }
1053         /* Compute an accurate disk size (i.e. within 512 bytes) */
1054         disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1055
1056         /*
1057          * On Tiger it is not necessary to switch the device
1058          * block size to be 4k if there are more than 31-bits
1059          * worth of blocks but to insure compatibility with
1060          * pre-Tiger systems we have to do it.
1061          *
1062          * If the device size is not a multiple of 4K (8 * 512), then
1063          * switching the logical block size isn't going to help because
1064          * we will be unable to write the alternate volume header.
1065          * In this case, just leave the logical block size unchanged.
1066          */
1067         if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1068                 minblksize = log_blksize = 4096;
1069                 if (phys_blksize < log_blksize)
1070                         phys_blksize = log_blksize;
1071         }
1072
1073         /*
1074          * The cluster layer is not currently prepared to deal with a logical
1075          * block size larger than the system's page size.  (It can handle
1076          * blocks per page, but not multiple pages per block.)  So limit the
1077          * logical block size to the page size.
1078          */
1079         if (log_blksize > PAGE_SIZE)
1080                 log_blksize = PAGE_SIZE;
1081
1082         /* Now switch to our preferred physical block size. */
1083         if (log_blksize > 512) {
1084                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1085                         retval = ENXIO;
1086                         goto error_exit;
1087                 }
1088                 /* Get the count of physical blocks. */
1089                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1090                         retval = ENXIO;
1091                         goto error_exit;
1092                 }
1093         }
1094         /*
1095          * At this point:
1096          *   minblksize is the minimum physical block size
1097          *   log_blksize has our preferred physical block size
1098          *   log_blkcnt has the total number of physical blocks
1099          */
1100
1101         mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1102         if ((retval = (int)buf_meta_bread(devvp,
1103                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1104                                 phys_blksize, cred, &bp))) {
1105                 goto error_exit;
1106         }
1107         MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1108         if (mdbp == NULL) {
1109                 retval = ENOMEM;
1110                 goto error_exit;
1111         }
1112         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1113         buf_brelse(bp);
1114         bp = NULL;
1115
1116         MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1117         if (hfsmp == NULL) {
1118                 retval = ENOMEM;
1119                 goto error_exit;
1120         }
1121         bzero(hfsmp, sizeof(struct hfsmount));
1122
1123         hfs_chashinit_finish(hfsmp);
1124
1125         /*
1126          *  Init the volume information structure
1127          */
1128
1129         lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1130         lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1131         lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1132         lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1133
1134         vfs_setfsprivate(mp, hfsmp);
1135         hfsmp->hfs_mp = mp;                     /* Make VFSTOHFS work */
1136         hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1137         hfsmp->hfs_devvp = devvp;
1138         vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
1139         hfsmp->hfs_logical_block_size = log_blksize;
1140         hfsmp->hfs_logical_block_count = log_blkcnt;
1141         hfsmp->hfs_physical_block_size = phys_blksize;
1142         hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1143         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1144         if (ronly)
1145                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1146         if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1147                 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1148
1149 #if QUOTA
1150         for (i = 0; i < MAXQUOTAS; i++)
1151                 dqfileinit(&hfsmp->hfs_qfiles[i]);
1152 #endif
1153
1154         if (args) {
1155                 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1156                 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1157                 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1158                 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1159                 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                               /* tell the VFS */
1160                 if (args->hfs_mask != (mode_t)VNOVAL) {
1161                         hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1162                         if (args->flags & HFSFSMNT_NOXONFILES) {
1163                                 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1164                         } else {
1165                                 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1166                         }
1167                 } else {
1168                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1169                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1170                 }
1171                 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1172                         mntwrapper = 1;
1173         } else {
1174                 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1175                 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1176                         hfsmp->hfs_uid = UNKNOWNUID;
1177                         hfsmp->hfs_gid = UNKNOWNGID;
1178                         vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);                       /* tell the VFS */
1179                         hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;            /* 0777: rwx---rwx */
1180                         hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;        /* 0666: no --x by default? */
1181                 }
1182         }
1183
1184         /* Find out if disk media is writable. */
1185         if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1186                 if (iswritable)
1187                         hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1188                 else
1189                         hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1190         }
1191
1192         // record the current time at which we're mounting this volume
1193         struct timeval tv;
1194         microtime(&tv);
1195         hfsmp->hfs_mount_time = tv.tv_sec;
1196
1197         /* Mount a standard HFS disk */
1198         if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1199             (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1200
1201                 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1202                 if (vfs_isrdwr(mp)) {
1203                         retval = EROFS;
1204                         goto error_exit;
1205                 }
1206                 /* Treat it as if it's read-only and not writeable */
1207                 hfsmp->hfs_flags |= HFS_READ_ONLY;
1208                 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1209
1210                 /* If only journal replay is requested, exit immediately */
1211                 if (journal_replay_only) {
1212                         retval = 0;
1213                         goto error_exit;
1214                 }
1215
1216                 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1217                         retval = EINVAL;  /* Cannot root from HFS standard disks */
1218                         goto error_exit;
1219                 }
1220                 /* HFS disks can only use 512 byte physical blocks */
1221                 if (log_blksize > kHFSBlockSize) {
1222                         log_blksize = kHFSBlockSize;
1223                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1224                                 retval = ENXIO;
1225                                 goto error_exit;
1226                         }
1227                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1228                                 retval = ENXIO;
1229                                 goto error_exit;
1230                         }
1231                         hfsmp->hfs_logical_block_size = log_blksize;
1232                         hfsmp->hfs_logical_block_count = log_blkcnt;
1233                         hfsmp->hfs_physical_block_size = log_blksize;
1234                         hfsmp->hfs_log_per_phys = 1;
1235                 }
1236                 if (args) {
1237                         hfsmp->hfs_encoding = args->hfs_encoding;
1238                         HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1239
1240                         /* establish the timezone */
1241                         gTimeZone = args->hfs_timezone;
1242                 }
1243
1244                 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1245                                         &hfsmp->hfs_get_hfsname);
1246                 if (retval)
1247                         goto error_exit;
1248
1249                 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1250                 if (retval)
1251                         (void) hfs_relconverter(hfsmp->hfs_encoding);
1252
1253         } else /* Mount an HFS Plus disk */ {
1254                 HFSPlusVolumeHeader *vhp;
1255                 off_t embeddedOffset;
1256                 int   jnl_disable = 0;
1257
1258                 /* Get the embedded Volume Header */
1259                 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1260                         embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1261                         embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1262                                           (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1263
1264                         /*
1265                          * If the embedded volume doesn't start on a block
1266                          * boundary, then switch the device to a 512-byte
1267                          * block size so everything will line up on a block
1268                          * boundary.
1269                          */
1270                         if ((embeddedOffset % log_blksize) != 0) {
1271                                 printf("hfs_mountfs: embedded volume offset not"
1272                                     " a multiple of physical block size (%d);"
1273                                     " switching to 512\n", log_blksize);
1274                                 log_blksize = 512;
1275                                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1276                                     (caddr_t)&log_blksize, FWRITE, context)) {
1277                                         retval = ENXIO;
1278                                         goto error_exit;
1279                                 }
1280                                 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1281                                     (caddr_t)&log_blkcnt, 0, context)) {
1282                                         retval = ENXIO;
1283                                         goto error_exit;
1284                                 }
1285                                 /* Note: relative block count adjustment */
1286                                 hfsmp->hfs_logical_block_count *=
1287                                     hfsmp->hfs_logical_block_size / log_blksize;
1288
1289                                 /* Update logical /physical block size */
1290                                 hfsmp->hfs_logical_block_size = log_blksize;
1291                                 hfsmp->hfs_physical_block_size = log_blksize;
1292                                 phys_blksize = log_blksize;
1293                                 hfsmp->hfs_log_per_phys = 1;
1294                         }
1295
1296                         disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1297                                    (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1298
1299                         hfsmp->hfs_logical_block_count = disksize / log_blksize;
1300
1301                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1302                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1303                                         phys_blksize, cred, &bp);
1304                         if (retval)
1305                                 goto error_exit;
1306                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1307                         buf_brelse(bp);
1308                         bp = NULL;
1309                         vhp = (HFSPlusVolumeHeader*) mdbp;
1310
1311                 } else /* pure HFS+ */ {
1312                         embeddedOffset = 0;
1313                         vhp = (HFSPlusVolumeHeader*) mdbp;
1314                 }
1315
1316                 /*
1317                  * On inconsistent disks, do not allow read-write mount
1318                  * unless it is the boot volume being mounted.  We also
1319                  * always want to replay the journal if the journal_replay_only
1320                  * flag is set because that will (most likely) get the
1321                  * disk into a consistent state before fsck_hfs starts
1322                  * looking at it.
1323                  */
1324                 if (  !(vfs_flags(mp) & MNT_ROOTFS)
1325                    && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1326                    && !journal_replay_only
1327                    && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1328                         retval = EINVAL;
1329                         goto error_exit;
1330                 }
1331
1332
1333                 // XXXdbg
1334                 //
1335                 hfsmp->jnl = NULL;
1336                 hfsmp->jvp = NULL;
1337                 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1338                     args->journal_disable) {
1339                     jnl_disable = 1;
1340                 }
1341
1342                 //
1343                 // We only initialize the journal here if the last person
1344                 // to mount this volume was journaling aware.  Otherwise
1345                 // we delay journal initialization until later at the end
1346                 // of hfs_MountHFSPlusVolume() because the last person who
1347                 // mounted it could have messed things up behind our back
1348                 // (so we need to go find the .journal file, make sure it's
1349                 // the right size, re-sync up if it was moved, etc).
1350                 //
1351                 if (   (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1352                         && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1353                         && !jnl_disable) {
1354
1355                         // if we're able to init the journal, mark the mount
1356                         // point as journaled.
1357                         //
1358                         if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1359                                 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1360                         } else {
1361                                 if (retval == EROFS) {
1362                                         // EROFS is a special error code that means the volume has an external
1363                                         // journal which we couldn't find.  in that case we do not want to
1364                                         // rewrite the volume header - we'll just refuse to mount the volume.
1365                                         retval = EINVAL;
1366                                         goto error_exit;
1367                                 }
1368
1369                                 // if the journal failed to open, then set the lastMountedVersion
1370                                 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1371                                 // of just bailing out because the volume is journaled.
1372                                 if (!ronly) {
1373                                     HFSPlusVolumeHeader *jvhp;
1374
1375                                     hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1376
1377                                     if (mdb_offset == 0) {
1378                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1379                                     }
1380
1381                                     bp = NULL;
1382                                     retval = (int)buf_meta_bread(devvp,
1383                                                     HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1384                                                     phys_blksize, cred, &bp);
1385                                     if (retval == 0) {
1386                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1387
1388                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1389                                                 printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1390                                             jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1391                                             buf_bwrite(bp);
1392                                         } else {
1393                                             buf_brelse(bp);
1394                                         }
1395                                         bp = NULL;
1396                                     } else if (bp) {
1397                                         buf_brelse(bp);
1398                                         // clear this so the error exit path won't try to use it
1399                                         bp = NULL;
1400                                     }
1401                                 }
1402
1403                                 // if this isn't the root device just bail out.
1404                                 // If it is the root device we just continue on
1405                                 // in the hopes that fsck_hfs will be able to
1406                                 // fix any damage that exists on the volume.
1407                                 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1408                                     retval = EINVAL;
1409                                     goto error_exit;
1410                                 }
1411                         }
1412                 }
1413                 // XXXdbg
1414
1415                 /* Either the journal is replayed successfully, or there
1416                  * was nothing to replay, or no journal exists.  In any case,
1417                  * return success.
1418                  */
1419                 if (journal_replay_only) {
1420                         retval = 0;
1421                         goto error_exit;
1422                 }
1423
1424                 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1425
1426                 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1427                 /*
1428                  * If the backend didn't like our physical blocksize
1429                  * then retry with physical blocksize of 512.
1430                  */
1431                 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1432                         printf("hfs_mountfs: could not use physical block size "
1433                                 "(%d) switching to 512\n", log_blksize);
1434                         log_blksize = 512;
1435                         if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1436                                 retval = ENXIO;
1437                                 goto error_exit;
1438                         }
1439                         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1440                                 retval = ENXIO;
1441                                 goto error_exit;
1442                         }
1443                         devvp->v_specsize = log_blksize;
1444                         /* Note: relative block count adjustment (in case this is an embedded volume). */
1445                         hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1446                         hfsmp->hfs_logical_block_size = log_blksize;
1447                         hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1448
1449                         if (hfsmp->jnl && hfsmp->jvp == devvp) {
1450                             // close and re-open this with the new block size
1451                             journal_close(hfsmp->jnl);
1452                             hfsmp->jnl = NULL;
1453                             if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1454                                         vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1455                                 } else {
1456                                         // if the journal failed to open, then set the lastMountedVersion
1457                                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
1458                                         // of just bailing out because the volume is journaled.
1459                                         if (!ronly) {
1460                                         HFSPlusVolumeHeader *jvhp;
1461
1462                                         hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1463
1464                                         if (mdb_offset == 0) {
1465                                                         mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1466                                         }
1467
1468                                                 bp = NULL;
1469                                         retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1470                                                         phys_blksize, cred, &bp);
1471                                         if (retval == 0) {
1472                                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1473
1474                                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1475                                                                 printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
1476                                                         jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1477                                                         buf_bwrite(bp);
1478                                                         } else {
1479                                                         buf_brelse(bp);
1480                                                         }
1481                                                         bp = NULL;
1482                                         } else if (bp) {
1483                                                         buf_brelse(bp);
1484                                                         // clear this so the error exit path won't try to use it
1485                                                         bp = NULL;
1486                                         }
1487                                         }
1488
1489                                         // if this isn't the root device just bail out.
1490                                         // If it is the root device we just continue on
1491                                         // in the hopes that fsck_hfs will be able to
1492                                         // fix any damage that exists on the volume.
1493                                         if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1494                                         retval = EINVAL;
1495                                         goto error_exit;
1496                                         }
1497                                 }
1498                         }
1499
1500                         /* Try again with a smaller block size... */
1501                         retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1502                 }
1503                 if (retval)
1504                         (void) hfs_relconverter(0);
1505         }
1506
1507         // save off a snapshot of the mtime from the previous mount
1508         // (for matador).
1509         hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1510
1511         if ( retval ) {
1512                 goto error_exit;
1513         }
1514
1515         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1516         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1517         vfs_setmaxsymlen(mp, 0);
1518
1519         mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1520 #if NAMEDSTREAMS
1521         mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1522 #endif
1523         if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1524                 /* Tell VFS that we support directory hard links. */
1525                 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1526         } else {
1527                 /* HFS standard doesn't support extended readdir! */
1528                 mp->mnt_vtable->vfc_vfsflags &= ~VFC_VFSREADDIR_EXTENDED;
1529         }
1530
1531         if (args) {
1532                 /*
1533                  * Set the free space warning levels for a non-root volume:
1534                  *
1535                  * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1536                  * is less.  Set the "warning" limit to 2% of the volume size or 150MB,
1537                  * whichever is less.  And last, set the "desired" freespace level to
1538                  * to 3% of the volume size or 200MB, whichever is less.
1539                  */
1540                 hfsmp->hfs_freespace_notify_dangerlimit =
1541                         MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1542                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1543                 hfsmp->hfs_freespace_notify_warninglimit =
1544                         MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1545                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1546                 hfsmp->hfs_freespace_notify_desiredlevel =
1547                         MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1548                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1549         } else {
1550                 /*
1551                  * Set the free space warning levels for the root volume:
1552                  *
1553                  * Set the "danger" limit to 5% of the volume size or 125MB, whichever
1554                  * is less.  Set the "warning" limit to 10% of the volume size or 250MB,
1555                  * whichever is less.  And last, set the "desired" freespace level to
1556                  * to 11% of the volume size or 375MB, whichever is less.
1557                  */
1558                 hfsmp->hfs_freespace_notify_dangerlimit =
1559                         MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1560                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1561                 hfsmp->hfs_freespace_notify_warninglimit =
1562                         MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1563                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1564                 hfsmp->hfs_freespace_notify_desiredlevel =
1565                         MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1566                                 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1567         };
1568
1569         /* Check if the file system exists on virtual device, like disk image */
1570         if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1571                 if (isvirtual) {
1572                         hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1573                 }
1574         }
1575
1576         /* do not allow ejectability checks on the root device */
1577         if (isroot == 0) {
1578                 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1579                                 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1580                         hfsmp->hfs_max_pending_io = 4096*1024;   // a reasonable value to start with.
1581                         hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1582                         if (hfsmp->hfs_syncer == NULL) {
1583                                 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1584                                                 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1585                         }
1586                 }
1587         }
1588
1589         /*
1590          * Start looking for free space to drop below this level and generate a
1591          * warning immediately if needed:
1592          */
1593         hfsmp->hfs_notification_conditions = 0;
1594         hfs_generate_volume_notifications(hfsmp);
1595
1596         if (ronly == 0) {
1597                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1598         }
1599         FREE(mdbp, M_TEMP);
1600         return (0);
1601
1602 error_exit:
1603         if (bp)
1604                 buf_brelse(bp);
1605         if (mdbp)
1606                 FREE(mdbp, M_TEMP);
1607
1608         if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1609                 vnode_clearmountedon(hfsmp->jvp);
1610                 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1611                 hfsmp->jvp = NULL;
1612         }
1613         if (hfsmp) {
1614                 if (hfsmp->hfs_devvp) {
1615                         vnode_rele(hfsmp->hfs_devvp);
1616                 }
1617                 hfs_delete_chash(hfsmp);
1618
1619                 FREE(hfsmp, M_HFSMNT);
1620                 vfs_setfsprivate(mp, NULL);
1621         }
1622         return (retval);
1623 }
1624
1625
1626 /*
1627  * Make a filesystem operational.
1628  * Nothing to do at the moment.
1629  */
1630 /* ARGSUSED */
1631 static int
1632 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
1633 {
1634         return (0);
1635 }
1636
1637
1638 /*
1639  * unmount system call
1640  */
1641 static int
1642 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
1643 {
1644         struct proc *p = vfs_context_proc(context);
1645         struct hfsmount *hfsmp = VFSTOHFS(mp);
1646         int retval = E_NONE;
1647         int flags;
1648         int force;
1649         int started_tr = 0;
1650
1651         flags = 0;
1652         force = 0;
1653         if (mntflags & MNT_FORCE) {
1654                 flags |= FORCECLOSE;
1655                 force = 1;
1656         }
1657
1658         if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
1659                 return (retval);
1660
1661         if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
1662                 (void) hfs_recording_suspend(hfsmp);
1663
1664         /*
1665          * Cancel any pending timers for this volume.  Then wait for any timers
1666          * which have fired, but whose callbacks have not yet completed.
1667          */
1668         if (hfsmp->hfs_syncer)
1669         {
1670                 struct timespec ts = {0, 100000000};    /* 0.1 seconds */
1671
1672                 /*
1673                  * Cancel any timers that have been scheduled, but have not
1674                  * fired yet.  NOTE: The kernel considers a timer complete as
1675                  * soon as it starts your callback, so the kernel does not
1676                  * keep track of the number of callbacks in progress.
1677                  */
1678                 if (thread_call_cancel(hfsmp->hfs_syncer))
1679                         OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1680                 thread_call_free(hfsmp->hfs_syncer);
1681                 hfsmp->hfs_syncer = NULL;
1682
1683                 /*
1684                  * This waits for all of the callbacks that were entered before
1685                  * we did thread_call_cancel above, but have not completed yet.
1686                  */
1687                 while(hfsmp->hfs_sync_incomplete > 0)
1688                 {
1689                         msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
1690                 }
1691
1692                 if (hfsmp->hfs_sync_incomplete < 0)
1693                         panic("hfs_unmount: pm_sync_incomplete underflow!\n");
1694         }
1695
1696         /*
1697          * Flush out the b-trees, volume bitmap and Volume Header
1698          */
1699         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1700                 retval = hfs_start_transaction(hfsmp);
1701                 if (retval == 0) {
1702                     started_tr = 1;
1703                 } else if (!force) {
1704                     goto err_exit;
1705                 }
1706
1707                 if (hfsmp->hfs_startup_vp) {
1708                         (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
1709                         retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
1710                         hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
1711                         if (retval && !force)
1712                                 goto err_exit;
1713                 }
1714
1715                 if (hfsmp->hfs_attribute_vp) {
1716                         (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
1717                         retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
1718                         hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
1719                         if (retval && !force)
1720                                 goto err_exit;
1721                 }
1722
1723                 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
1724                 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
1725                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
1726                 if (retval && !force)
1727                         goto err_exit;
1728
1729                 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
1730                 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
1731                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
1732                 if (retval && !force)
1733                         goto err_exit;
1734
1735                 if (hfsmp->hfs_allocation_vp) {
1736                         (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
1737                         retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
1738                         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
1739                         if (retval && !force)
1740                                 goto err_exit;
1741                 }
1742
1743                 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
1744                         retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
1745                         if (retval && !force)
1746                                 goto err_exit;
1747                 }
1748
1749                 /* If runtime corruption was detected, indicate that the volume
1750                  * was not unmounted cleanly.
1751                  */
1752                 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
1753                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
1754                 } else {
1755                         HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
1756                 }
1757
1758                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1759                         int i;
1760                         u_int32_t min_start = hfsmp->totalBlocks;
1761
1762                         // set the nextAllocation pointer to the smallest free block number
1763                         // we've seen so on the next mount we won't rescan unnecessarily
1764                         for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
1765                                 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
1766                                         min_start = hfsmp->vcbFreeExt[i].startBlock;
1767                                 }
1768                         }
1769                         if (min_start < hfsmp->nextAllocation) {
1770                                 hfsmp->nextAllocation = min_start;
1771                         }
1772                 }
1773
1774
1775                 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1776                 if (retval) {
1777                         HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
1778                         if (!force)
1779                                 goto err_exit;  /* could not flush everything */
1780                 }
1781
1782                 if (started_tr) {
1783                     hfs_end_transaction(hfsmp);
1784                     started_tr = 0;
1785                 }
1786         }
1787
1788         if (hfsmp->jnl) {
1789                 hfs_journal_flush(hfsmp);
1790         }
1791
1792         /*
1793          *      Invalidate our caches and release metadata vnodes
1794          */
1795         (void) hfsUnmount(hfsmp, p);
1796
1797         /*
1798          * Last chance to dump unreferenced system files.
1799          */
1800         (void) vflush(mp, NULLVP, FORCECLOSE);
1801
1802         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
1803                 (void) hfs_relconverter(hfsmp->hfs_encoding);
1804
1805         // XXXdbg
1806         if (hfsmp->jnl) {
1807             journal_close(hfsmp->jnl);
1808             hfsmp->jnl = NULL;
1809         }
1810
1811         VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
1812
1813         if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1814             vnode_clearmountedon(hfsmp->jvp);
1815             retval = VNOP_CLOSE(hfsmp->jvp,
1816                                hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
1817                                vfs_context_kernel());
1818             vnode_put(hfsmp->jvp);
1819             hfsmp->jvp = NULL;
1820         }
1821         // XXXdbg
1822
1823 #ifdef HFS_SPARSE_DEV
1824         /* Drop our reference on the backing fs (if any). */
1825         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1826                 struct vnode * tmpvp;
1827
1828                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1829                 tmpvp = hfsmp->hfs_backingfs_rootvp;
1830                 hfsmp->hfs_backingfs_rootvp = NULLVP;
1831                 vnode_rele(tmpvp);
1832         }
1833 #endif /* HFS_SPARSE_DEV */
1834         lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
1835         vnode_rele(hfsmp->hfs_devvp);
1836
1837         hfs_delete_chash(hfsmp);
1838         FREE(hfsmp, M_HFSMNT);
1839
1840         return (0);
1841
1842   err_exit:
1843         if (started_tr) {
1844                 hfs_end_transaction(hfsmp);
1845         }
1846         return retval;
1847 }
1848
1849
1850 /*
1851  * Return the root of a filesystem.
1852  */
1853 static int
1854 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
1855 {
1856         return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1);
1857 }
1858
1859
1860 /*
1861  * Do operations associated with quotas
1862  */
1863 #if !QUOTA
1864 static int
1865 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
1866 {
1867         return (ENOTSUP);
1868 }
1869 #else
1870 static int
1871 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
1872 {
1873         struct proc *p = vfs_context_proc(context);
1874         int cmd, type, error;
1875
1876         if (uid == ~0U)
1877                 uid = vfs_context_ucred(context)->cr_ruid;
1878         cmd = cmds >> SUBCMDSHIFT;
1879
1880         switch (cmd) {
1881         case Q_SYNC:
1882         case Q_QUOTASTAT:
1883                 break;
1884         case Q_GETQUOTA:
1885                 if (uid == vfs_context_ucred(context)->cr_ruid)
1886                         break;
1887                 /* fall through */
1888         default:
1889                 if ( (error = vfs_context_suser(context)) )
1890                         return (error);
1891         }
1892
1893         type = cmds & SUBCMDMASK;
1894         if ((u_int)type >= MAXQUOTAS)
1895                 return (EINVAL);
1896         if (vfs_busy(mp, LK_NOWAIT))
1897                 return (0);
1898
1899         switch (cmd) {
1900
1901         case Q_QUOTAON:
1902                 error = hfs_quotaon(p, mp, type, datap);
1903                 break;
1904
1905         case Q_QUOTAOFF:
1906                 error = hfs_quotaoff(p, mp, type);
1907                 break;
1908
1909         case Q_SETQUOTA:
1910                 error = hfs_setquota(mp, uid, type, datap);
1911                 break;
1912
1913         case Q_SETUSE:
1914                 error = hfs_setuse(mp, uid, type, datap);
1915                 break;
1916
1917         case Q_GETQUOTA:
1918                 error = hfs_getquota(mp, uid, type, datap);
1919                 break;
1920
1921         case Q_SYNC:
1922                 error = hfs_qsync(mp);
1923                 break;
1924
1925         case Q_QUOTASTAT:
1926                 error = hfs_quotastat(mp, type, datap);
1927                 break;
1928
1929         default:
1930                 error = EINVAL;
1931                 break;
1932         }
1933         vfs_unbusy(mp);
1934
1935         return (error);
1936 }
1937 #endif /* QUOTA */
1938
1939 /* Subtype is composite of bits */
1940 #define HFS_SUBTYPE_JOURNALED      0x01
1941 #define HFS_SUBTYPE_CASESENSITIVE  0x02
1942 /* bits 2 - 6 reserved */
1943 #define HFS_SUBTYPE_STANDARDHFS    0x80
1944
1945 /*
1946  * Get file system statistics.
1947  */
1948 static int
1949 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
1950 {
1951         ExtendedVCB *vcb = VFSTOVCB(mp);
1952         struct hfsmount *hfsmp = VFSTOHFS(mp);
1953         u_int32_t freeCNIDs;
1954         u_int16_t subtype = 0;
1955
1956         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
1957
1958         sbp->f_bsize = (u_int32_t)vcb->blockSize;
1959         sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
1960         sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
1961         sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
1962         sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
1963         sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2));  /* max files is constrained by total blocks */
1964         sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
1965
1966         /*
1967          * Subtypes (flavors) for HFS
1968          *   0:   Mac OS Extended
1969          *   1:   Mac OS Extended (Journaled)
1970          *   2:   Mac OS Extended (Case Sensitive)
1971          *   3:   Mac OS Extended (Case Sensitive, Journaled)
1972          *   4 - 127:   Reserved
1973          * 128:   Mac OS Standard
1974          *
1975          */
1976         if (hfsmp->hfs_flags & HFS_STANDARD) {
1977                 subtype = HFS_SUBTYPE_STANDARDHFS;
1978         } else /* HFS Plus */ {
1979                 if (hfsmp->jnl)
1980                         subtype |= HFS_SUBTYPE_JOURNALED;
1981                 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
1982                         subtype |= HFS_SUBTYPE_CASESENSITIVE;
1983         }
1984         sbp->f_fssubtype = subtype;
1985
1986         return (0);
1987 }
1988
1989
1990 //
1991 // XXXdbg -- this is a callback to be used by the journal to
1992 //           get meta data blocks flushed out to disk.
1993 //
1994 // XXXdbg -- be smarter and don't flush *every* block on each
1995 //           call.  try to only flush some so we don't wind up
1996 //           being too synchronous.
1997 //
1998 __private_extern__
1999 void
2000 hfs_sync_metadata(void *arg)
2001 {
2002         struct mount *mp = (struct mount *)arg;
2003         struct hfsmount *hfsmp;
2004         ExtendedVCB *vcb;
2005         buf_t   bp;
2006         int  retval;
2007         daddr64_t priIDSector;
2008         hfsmp = VFSTOHFS(mp);
2009         vcb = HFSTOVCB(hfsmp);
2010
2011         // now make sure the super block is flushed
2012         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2013                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2014
2015         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2016                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2017                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
2018         if ((retval != 0 ) && (retval != ENXIO)) {
2019                 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2020                        (int)priIDSector, retval);
2021         }
2022
2023         if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2024             buf_bwrite(bp);
2025         } else if (bp) {
2026             buf_brelse(bp);
2027         }
2028
2029         // the alternate super block...
2030         // XXXdbg - we probably don't need to do this each and every time.
2031         //          hfs_btreeio.c:FlushAlternate() should flag when it was
2032         //          written...
2033         if (hfsmp->hfs_alt_id_sector) {
2034                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2035                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2036                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2037                 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2038                     buf_bwrite(bp);
2039                 } else if (bp) {
2040                     buf_brelse(bp);
2041                 }
2042         }
2043 }
2044
2045
2046 struct hfs_sync_cargs {
2047         kauth_cred_t cred;
2048         struct proc  *p;
2049         int    waitfor;
2050         int    error;
2051 };
2052
2053
2054 static int
2055 hfs_sync_callback(struct vnode *vp, void *cargs)
2056 {
2057         struct cnode *cp;
2058         struct hfs_sync_cargs *args;
2059         int error;
2060
2061         args = (struct hfs_sync_cargs *)cargs;
2062
2063         if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2064                 return (VNODE_RETURNED);
2065         }
2066         cp = VTOC(vp);
2067
2068         if ((cp->c_flag & C_MODIFIED) ||
2069             (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2070             vnode_hasdirtyblks(vp)) {
2071                 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2072
2073                 if (error)
2074                         args->error = error;
2075         }
2076         hfs_unlock(cp);
2077         return (VNODE_RETURNED);
2078 }
2079
2080
2081
2082 /*
2083  * Go through the disk queues to initiate sandbagged IO;
2084  * go through the inodes to write those that have been modified;
2085  * initiate the writing of the super block if it has been modified.
2086  *
2087  * Note: we are always called with the filesystem marked `MPBUSY'.
2088  */
2089 static int
2090 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2091 {
2092         struct proc *p = vfs_context_proc(context);
2093         struct cnode *cp;
2094         struct hfsmount *hfsmp;
2095         ExtendedVCB *vcb;
2096         struct vnode *meta_vp[4];
2097         int i;
2098         int error, allerror = 0;
2099         struct hfs_sync_cargs args;
2100
2101         hfsmp = VFSTOHFS(mp);
2102
2103         /*
2104          * hfs_changefs might be manipulating vnodes so back off
2105          */
2106         if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2107                 return (0);
2108
2109         if (hfsmp->hfs_flags & HFS_READ_ONLY)
2110                 return (EROFS);
2111
2112         /* skip over frozen volumes */
2113         if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2114                 return 0;
2115
2116         args.cred = kauth_cred_get();
2117         args.waitfor = waitfor;
2118         args.p = p;
2119         args.error = 0;
2120         /*
2121          * hfs_sync_callback will be called for each vnode
2122          * hung off of this mount point... the vnode will be
2123          * properly referenced and unreferenced around the callback
2124          */
2125         vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2126
2127         if (args.error)
2128                 allerror = args.error;
2129
2130         vcb = HFSTOVCB(hfsmp);
2131
2132         meta_vp[0] = vcb->extentsRefNum;
2133         meta_vp[1] = vcb->catalogRefNum;
2134         meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
2135         meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2136
2137         /* Now sync our three metadata files */
2138         for (i = 0; i < 4; ++i) {
2139                 struct vnode *btvp;
2140
2141                 btvp = meta_vp[i];;
2142                 if ((btvp==0) || (vnode_mount(btvp) != mp))
2143                         continue;
2144
2145                 /* XXX use hfs_systemfile_lock instead ? */
2146                 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2147                 cp = VTOC(btvp);
2148
2149                 if (((cp->c_flag &  C_MODIFIED) == 0) &&
2150                     (cp->c_touch_acctime == 0) &&
2151                     (cp->c_touch_chgtime == 0) &&
2152                     (cp->c_touch_modtime == 0) &&
2153                     vnode_hasdirtyblks(btvp) == 0) {
2154                         hfs_unlock(VTOC(btvp));
2155                         continue;
2156                 }
2157                 error = vnode_get(btvp);
2158                 if (error) {
2159                         hfs_unlock(VTOC(btvp));
2160                         continue;
2161                 }
2162                 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2163                         allerror = error;
2164
2165                 hfs_unlock(cp);
2166                 vnode_put(btvp);
2167         };
2168
2169         /*
2170          * Force stale file system control information to be flushed.
2171          */
2172         if (vcb->vcbSigWord == kHFSSigWord) {
2173                 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2174                         allerror = error;
2175                 }
2176         }
2177 #if QUOTA
2178         hfs_qsync(mp);
2179 #endif /* QUOTA */
2180
2181         hfs_hotfilesync(hfsmp, vfs_context_kernel());
2182
2183         /*
2184          * Write back modified superblock.
2185          */
2186         if (IsVCBDirty(vcb)) {
2187                 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2188                 if (error)
2189                         allerror = error;
2190         }
2191
2192         if (hfsmp->jnl) {
2193             hfs_journal_flush(hfsmp);
2194         }
2195
2196         {
2197                 clock_sec_t secs;
2198                 clock_usec_t usecs;
2199                 uint64_t now;
2200
2201                 clock_get_calendar_microtime(&secs, &usecs);
2202                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2203                 hfsmp->hfs_last_sync_time = now;
2204         }
2205
2206         lck_rw_unlock_shared(&hfsmp->hfs_insync);
2207         return (allerror);
2208 }
2209
2210
2211 /*
2212  * File handle to vnode
2213  *
2214  * Have to be really careful about stale file handles:
2215  * - check that the cnode id is valid
2216  * - call hfs_vget() to get the locked cnode
2217  * - check for an unallocated cnode (i_mode == 0)
2218  * - check that the given client host has export rights and return
2219  *   those rights via. exflagsp and credanonp
2220  */
2221 static int
2222 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2223 {
2224         struct hfsfid *hfsfhp;
2225         struct vnode *nvp;
2226         int result;
2227
2228         *vpp = NULL;
2229         hfsfhp = (struct hfsfid *)fhp;
2230
2231         if (fhlen < (int)sizeof(struct hfsfid))
2232                 return (EINVAL);
2233
2234         result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0);
2235         if (result) {
2236                 if (result == ENOENT)
2237                         result = ESTALE;
2238                 return result;
2239         }
2240
2241         /*
2242          * We used to use the create time as the gen id of the file handle,
2243          * but it is not static enough because it can change at any point
2244          * via system calls.  We still don't have another volume ID or other
2245          * unique identifier to use for a generation ID across reboots that
2246          * persists until the file is removed.  Using only the CNID exposes
2247          * us to the potential wrap-around case, but as of 2/2008, it would take
2248          * over 2 months to wrap around if the machine did nothing but allocate
2249          * CNIDs.  Using some kind of wrap counter would only be effective if
2250          * each file had the wrap counter associated with it.  For now,
2251          * we use only the CNID to identify the file as it's good enough.
2252          */
2253
2254         *vpp = nvp;
2255
2256         hfs_unlock(VTOC(nvp));
2257         return (0);
2258 }
2259
2260
2261 /*
2262  * Vnode pointer to File handle
2263  */
2264 /* ARGSUSED */
2265 static int
2266 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2267 {
2268         struct cnode *cp;
2269         struct hfsfid *hfsfhp;
2270
2271         if (ISHFS(VTOVCB(vp)))
2272                 return (ENOTSUP);       /* hfs standard is not exportable */
2273
2274         if (*fhlenp < (int)sizeof(struct hfsfid))
2275                 return (EOVERFLOW);
2276
2277         cp = VTOC(vp);
2278         hfsfhp = (struct hfsfid *)fhp;
2279         /* only the CNID is used to identify the file now */
2280         hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2281         hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2282         *fhlenp = sizeof(struct hfsfid);
2283
2284         return (0);
2285 }
2286
2287
2288 /*
2289  * Initial HFS filesystems, done only once.
2290  */
2291 static int
2292 hfs_init(__unused struct vfsconf *vfsp)
2293 {
2294         static int done = 0;
2295
2296         if (done)
2297                 return (0);
2298         done = 1;
2299         hfs_chashinit();
2300         hfs_converterinit();
2301
2302         BTReserveSetup();
2303
2304
2305         hfs_lock_attr    = lck_attr_alloc_init();
2306         hfs_group_attr   = lck_grp_attr_alloc_init();
2307         hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2308         hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2309
2310 #if HFS_COMPRESSION
2311     decmpfs_init();
2312 #endif
2313
2314         return (0);
2315 }
2316
2317 static int
2318 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2319 {
2320         struct hfsmount * hfsmp;
2321         char fstypename[MFSNAMELEN];
2322
2323         if (vp == NULL)
2324                 return (EINVAL);
2325
2326         if (!vnode_isvroot(vp))
2327                 return (EINVAL);
2328
2329         vnode_vfsname(vp, fstypename);
2330         if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2331                 return (EINVAL);
2332
2333         hfsmp = VTOHFS(vp);
2334
2335         if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2336                 return (EINVAL);
2337
2338         *hfsmpp = hfsmp;
2339
2340         return (0);
2341 }
2342
2343 // XXXdbg
2344 #include <sys/filedesc.h>
2345
2346 /*
2347  * HFS filesystem related variables.
2348  */
2349 static int
2350 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2351                         user_addr_t newp, size_t newlen, vfs_context_t context)
2352 {
2353         struct proc *p = vfs_context_proc(context);
2354         int error;
2355         struct hfsmount *hfsmp;
2356
2357         /* all sysctl names at this level are terminal */
2358
2359         if (name[0] == HFS_ENCODINGBIAS) {
2360                 int bias;
2361
2362                 bias = hfs_getencodingbias();
2363                 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2364                 if (error == 0 && newp)
2365                         hfs_setencodingbias(bias);
2366                 return (error);
2367
2368         } else if (name[0] == HFS_EXTEND_FS) {
2369         u_int64_t  newsize;
2370                 vnode_t vp = vfs_context_cwd(context);
2371
2372                 if (newp == USER_ADDR_NULL || vp == NULLVP)
2373                         return (EINVAL);
2374                 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2375                         return (error);
2376                 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2377                 if (error)
2378                         return (error);
2379
2380                 error = hfs_extendfs(hfsmp, newsize, context);
2381                 return (error);
2382
2383         } else if (name[0] == HFS_ENCODINGHINT) {
2384                 size_t bufsize;
2385                 size_t bytes;
2386                 u_int32_t hint;
2387                 u_int16_t *unicode_name = NULL;
2388                 char *filename = NULL;
2389
2390                 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2391                         return (EINVAL);
2392
2393                 bufsize = MAX(newlen * 3, MAXPATHLEN);
2394                 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2395                 if (filename == NULL) {
2396                         error = ENOMEM;
2397                         goto encodinghint_exit;
2398                 }
2399                 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2400                 if (filename == NULL) {
2401                         error = ENOMEM;
2402                         goto encodinghint_exit;
2403                 }
2404
2405                 error = copyin(newp, (caddr_t)filename, newlen);
2406                 if (error == 0) {
2407                         error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2408                                                &bytes, bufsize, 0, UTF_DECOMPOSED);
2409                         if (error == 0) {
2410                                 hint = hfs_pickencoding(unicode_name, bytes / 2);
2411                                 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2412                         }
2413                 }
2414
2415 encodinghint_exit:
2416                 if (unicode_name)
2417                         FREE(unicode_name, M_TEMP);
2418                 if (filename)
2419                         FREE(filename, M_TEMP);
2420                 return (error);
2421
2422         } else if (name[0] == HFS_ENABLE_JOURNALING) {
2423                 // make the file system journaled...
2424                 vnode_t vp = vfs_context_cwd(context);
2425                 vnode_t jvp;
2426                 ExtendedVCB *vcb;
2427                 struct cat_attr jnl_attr, jinfo_attr;
2428                 struct cat_fork jnl_fork, jinfo_fork;
2429                 void *jnl = NULL;
2430                 int lockflags;
2431
2432                 /* Only root can enable journaling */
2433                 if (!is_suser()) {
2434                         return (EPERM);
2435                 }
2436                 if (vp == NULLVP)
2437                         return EINVAL;
2438
2439                 hfsmp = VTOHFS(vp);
2440                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2441                         return EROFS;
2442                 }
2443                 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2444                         printf("hfs: can't make a plain hfs volume journaled.\n");
2445                         return EINVAL;
2446                 }
2447
2448                 if (hfsmp->jnl) {
2449                     printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2450                     return EAGAIN;
2451                 }
2452
2453                 vcb = HFSTOVCB(hfsmp);
2454                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2455                 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2456                         BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2457
2458                         printf("hfs: volume has a btree w/non-contiguous nodes.  can not enable journaling.\n");
2459                         hfs_systemfile_unlock(hfsmp, lockflags);
2460                         return EINVAL;
2461                 }
2462                 hfs_systemfile_unlock(hfsmp, lockflags);
2463
2464                 // make sure these both exist!
2465                 if (   GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2466                         || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2467
2468                         return EINVAL;
2469                 }
2470
2471                 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2472
2473                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2474                            (off_t)name[2], (off_t)name[3]);
2475
2476                 //
2477                 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2478                 //          enabling the journal on a separate device so it is safe
2479                 //          to just copy hfs_devvp here.  If hfs_util gets the ability
2480                 //          to dynamically enable the journal on a separate device then
2481                 //          we will have to do the same thing as hfs_early_journal_init()
2482                 //          to locate and open the journal device.
2483                 //
2484                 jvp = hfsmp->hfs_devvp;
2485                 jnl = journal_create(jvp,
2486                                                          (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2487                                                          + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2488                                                          (off_t)((unsigned)name[3]),
2489                                                          hfsmp->hfs_devvp,
2490                                                          hfsmp->hfs_logical_block_size,
2491                                                          0,
2492                                                          0,
2493                                                          hfs_sync_metadata, hfsmp->hfs_mp);
2494
2495                 if (jnl == NULL) {
2496                         printf("hfs: FAILED to create the journal!\n");
2497                         if (jvp && jvp != hfsmp->hfs_devvp) {
2498                                 vnode_clearmountedon(jvp);
2499                                 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2500                         }
2501                         jvp = NULL;
2502
2503                         return EINVAL;
2504                 }
2505
2506                 hfs_global_exclusive_lock_acquire(hfsmp);
2507
2508                 /*
2509                  * Flush all dirty metadata buffers.
2510                  */
2511                 buf_flushdirtyblks(hfsmp->hfs_devvp, MNT_WAIT, 0, "hfs_sysctl");
2512                 buf_flushdirtyblks(hfsmp->hfs_extents_vp, MNT_WAIT, 0, "hfs_sysctl");
2513                 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, "hfs_sysctl");
2514                 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, "hfs_sysctl");
2515                 if (hfsmp->hfs_attribute_vp)
2516                         buf_flushdirtyblks(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, "hfs_sysctl");
2517
2518                 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2519                 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2520                 hfsmp->jvp = jvp;
2521                 hfsmp->jnl = jnl;
2522
2523                 // save this off for the hack-y check in hfs_remove()
2524                 hfsmp->jnl_start        = (u_int32_t)name[2];
2525                 hfsmp->jnl_size         = (off_t)((unsigned)name[3]);
2526                 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2527                 hfsmp->hfs_jnlfileid    = jnl_attr.ca_fileid;
2528
2529                 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2530
2531                 hfs_global_exclusive_lock_release(hfsmp);
2532                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2533
2534                 {
2535                         fsid_t fsid;
2536
2537                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2538                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2539                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2540                 }
2541                 return 0;
2542         } else if (name[0] == HFS_DISABLE_JOURNALING) {
2543                 // clear the journaling bit
2544                 vnode_t vp = vfs_context_cwd(context);
2545
2546                 /* Only root can disable journaling */
2547                 if (!is_suser()) {
2548                         return (EPERM);
2549                 }
2550                 if (vp == NULLVP)
2551                         return EINVAL;
2552
2553                 hfsmp = VTOHFS(vp);
2554
2555                 /*
2556                  * Disabling journaling is disallowed on volumes with directory hard links
2557                  * because we have not tested the relevant code path.
2558                  */
2559                 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2560                         printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2561                         return EPERM;
2562                 }
2563
2564                 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2565
2566                 hfs_global_exclusive_lock_acquire(hfsmp);
2567
2568                 // Lights out for you buddy!
2569                 journal_close(hfsmp->jnl);
2570                 hfsmp->jnl = NULL;
2571
2572                 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2573                         vnode_clearmountedon(hfsmp->jvp);
2574                         VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2575                         vnode_put(hfsmp->jvp);
2576                 }
2577                 hfsmp->jvp = NULL;
2578                 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2579                 hfsmp->jnl_start        = 0;
2580                 hfsmp->hfs_jnlinfoblkid = 0;
2581                 hfsmp->hfs_jnlfileid    = 0;
2582
2583                 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2584
2585                 hfs_global_exclusive_lock_release(hfsmp);
2586                 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2587
2588                 {
2589                         fsid_t fsid;
2590
2591                         fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2592                         fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2593                         vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2594                 }
2595                 return 0;
2596         } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2597                 vnode_t vp = vfs_context_cwd(context);
2598                 off_t jnl_start, jnl_size;
2599
2600                 if (vp == NULLVP)
2601                         return EINVAL;
2602
2603                 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
2604                 if (proc_is64bit(current_proc()))
2605                         return EINVAL;
2606
2607                 hfsmp = VTOHFS(vp);
2608             if (hfsmp->jnl == NULL) {
2609                         jnl_start = 0;
2610                         jnl_size  = 0;
2611             } else {
2612                         jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2613                         jnl_size  = (off_t)hfsmp->jnl_size;
2614             }
2615
2616             if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
2617                         return error;
2618                 }
2619             if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
2620                         return error;
2621                 }
2622
2623                 return 0;
2624         } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
2625
2626             return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
2627
2628         } else if (name[0] == VFS_CTL_QUERY) {
2629         struct sysctl_req *req;
2630         union union_vfsidctl vc;
2631         struct mount *mp;
2632             struct vfsquery vq;
2633
2634                 req = CAST_DOWN(struct sysctl_req *, oldp);     /* we're new style vfs sysctl. */
2635
2636         error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
2637                 if (error) return (error);
2638
2639                 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
2640         if (mp == NULL) return (ENOENT);
2641
2642                 hfsmp = VFSTOHFS(mp);
2643                 bzero(&vq, sizeof(vq));
2644                 vq.vq_flags = hfsmp->hfs_notification_conditions;
2645                 return SYSCTL_OUT(req, &vq, sizeof(vq));;
2646         } else if (name[0] == HFS_REPLAY_JOURNAL) {
2647                 vnode_t devvp = NULL;
2648                 int device_fd;
2649                 if (namelen != 2) {
2650                         return (EINVAL);
2651                 }
2652                 device_fd = name[1];
2653                 error = file_vnode(device_fd, &devvp);
2654                 if (error) {
2655                         return error;
2656                 }
2657                 error = vnode_getwithref(devvp);
2658                 if (error) {
2659                         file_drop(device_fd);
2660                         return error;
2661                 }
2662                 error = hfs_journal_replay(devvp, context);
2663                 file_drop(device_fd);
2664                 vnode_put(devvp);
2665                 return error;
2666         }
2667
2668         return (ENOTSUP);
2669 }
2670
2671 /*
2672  * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
2673  * the build_path ioctl.  We use it to leverage the code below that updates
2674  * the origin list cache if necessary
2675  */
2676
2677 int
2678 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
2679 {
2680         int error;
2681         int lockflags;
2682         struct hfsmount *hfsmp;
2683
2684         hfsmp = VFSTOHFS(mp);
2685
2686         error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1);
2687         if (error)
2688                 return (error);
2689
2690         /*
2691          * ADLs may need to have their origin state updated
2692          * since build_path needs a valid parent.  The same is true
2693          * for hardlinked files as well.  There isn't a race window here
2694          * in re-acquiring the cnode lock since we aren't pulling any data
2695          * out of the cnode; instead, we're going to the catalog.
2696          */
2697         if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
2698             (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
2699                 cnode_t *cp = VTOC(*vpp);
2700                 struct cat_desc cdesc;
2701
2702                 if (!hfs_haslinkorigin(cp)) {
2703                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
2704                         error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
2705                         hfs_systemfile_unlock(hfsmp, lockflags);
2706                         if (error == 0) {
2707                                 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
2708                                         (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
2709                                         hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
2710                                 }
2711                                 cat_releasedesc(&cdesc);
2712                         }
2713                 }
2714                 hfs_unlock(cp);
2715         }
2716         return (0);
2717 }
2718
2719
2720 /*
2721  * Look up an HFS object by ID.
2722  *
2723  * The object is returned with an iocount reference and the cnode locked.
2724  *
2725  * If the object is a file then it will represent the data fork.
2726  */
2727 __private_extern__
2728 int
2729 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
2730 {
2731         struct vnode *vp = NULLVP;
2732         struct cat_desc cndesc;
2733         struct cat_attr cnattr;
2734         struct cat_fork cnfork;
2735         u_int32_t linkref = 0;
2736         int error;
2737
2738         /* Check for cnids that should't be exported. */
2739         if ((cnid < kHFSFirstUserCatalogNodeID) &&
2740             (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
2741                 return (ENOENT);
2742         }
2743         /* Don't export our private directories. */
2744         if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
2745             cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
2746                 return (ENOENT);
2747         }
2748         /*
2749          * Check the hash first
2750          */
2751         vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock);
2752         if (vp) {
2753                 *vpp = vp;
2754                 return(0);
2755         }
2756
2757         bzero(&cndesc, sizeof(cndesc));
2758         bzero(&cnattr, sizeof(cnattr));
2759         bzero(&cnfork, sizeof(cnfork));
2760
2761         /*
2762          * Not in hash, lookup in catalog
2763          */
2764         if (cnid == kHFSRootParentID) {
2765                 static char hfs_rootname[] = "/";
2766
2767                 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
2768                 cndesc.cd_namelen = 1;
2769                 cndesc.cd_parentcnid = kHFSRootParentID;
2770                 cndesc.cd_cnid = kHFSRootFolderID;
2771                 cndesc.cd_flags = CD_ISDIR;
2772
2773                 cnattr.ca_fileid = kHFSRootFolderID;
2774                 cnattr.ca_linkcount = 1;
2775                 cnattr.ca_entries = 1;
2776                 cnattr.ca_dircount = 1;
2777                 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
2778         } else {
2779                 int lockflags;
2780                 cnid_t pid;
2781                 const char *nameptr;
2782
2783                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
2784                 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
2785                 hfs_systemfile_unlock(hfsmp, lockflags);
2786
2787                 if (error) {
2788                         *vpp = NULL;
2789                         return (error);
2790                 }
2791
2792                 /*
2793                  * Check for a raw hardlink inode and save its linkref.
2794                  */
2795                 pid = cndesc.cd_parentcnid;
2796                 nameptr = (const char *)cndesc.cd_nameptr;
2797
2798                 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
2799                     (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
2800                         linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
2801
2802                 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
2803                            (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
2804                         linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
2805
2806                 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
2807                            (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
2808                         *vpp = NULL;
2809                         cat_releasedesc(&cndesc);
2810                         return (ENOENT);  /* open unlinked file */
2811                 }
2812         }
2813
2814         /*
2815          * Finish initializing cnode descriptor for hardlinks.
2816          *
2817          * We need a valid name and parent for reverse lookups.
2818          */
2819         if (linkref) {
2820                 cnid_t nextlinkid;
2821                 cnid_t prevlinkid;
2822                 struct cat_desc linkdesc;
2823                 int lockflags;
2824
2825                 cnattr.ca_linkref = linkref;
2826
2827                 /*
2828                  * Pick up the first link in the chain and get a descriptor for it.
2829                  * This allows blind volfs paths to work for hardlinks.
2830                  */
2831                 if ((hfs_lookuplink(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
2832                     (nextlinkid != 0)) {
2833                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
2834                         error = cat_findname(hfsmp, nextlinkid, &linkdesc);
2835                         hfs_systemfile_unlock(hfsmp, lockflags);
2836                         if (error == 0) {
2837                                 cat_releasedesc(&cndesc);
2838                                 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
2839                         }
2840                 }
2841         }
2842
2843         if (linkref) {
2844                 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cnfork, &vp);
2845                 if (error == 0) {
2846                         VTOC(vp)->c_flag |= C_HARDLINK;
2847                         vnode_setmultipath(vp);
2848                 }
2849         } else {
2850                 struct componentname cn;
2851
2852                 /* Supply hfs_getnewvnode with a component name. */
2853                 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
2854                 cn.cn_nameiop = LOOKUP;
2855                 cn.cn_flags = ISLASTCN | HASBUF;
2856                 cn.cn_context = NULL;
2857                 cn.cn_pnlen = MAXPATHLEN;
2858                 cn.cn_nameptr = cn.cn_pnbuf;
2859                 cn.cn_namelen = cndesc.cd_namelen;
2860                 cn.cn_hash = 0;
2861                 cn.cn_consume = 0;
2862                 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
2863
2864                 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp);
2865
2866                 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
2867                         hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
2868                 }
2869                 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
2870         }
2871         cat_releasedesc(&cndesc);
2872
2873         *vpp = vp;
2874         if (vp && skiplock) {
2875                 hfs_unlock(VTOC(vp));
2876         }
2877         return (error);
2878 }
2879
2880
2881 /*
2882  * Flush out all the files in a filesystem.
2883  */
2884 static int
2885 #if QUOTA
2886 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
2887 #else
2888 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
2889 #endif /* QUOTA */
2890 {
2891         struct hfsmount *hfsmp;
2892         struct vnode *skipvp = NULLVP;
2893         int error;
2894 #if QUOTA
2895         int quotafilecnt;
2896         int i;
2897 #endif
2898
2899         hfsmp = VFSTOHFS(mp);
2900
2901 #if QUOTA
2902         /*
2903          * The open quota files have an indirect reference on
2904          * the root directory vnode.  We must account for this
2905          * extra reference when doing the intial vflush.
2906          */
2907         quotafilecnt = 0;
2908         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
2909
2910                 /* Find out how many quota files we have open. */
2911                 for (i = 0; i < MAXQUOTAS; i++) {
2912                         if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
2913                                 ++quotafilecnt;
2914                 }
2915
2916                 /* Obtain the root vnode so we can skip over it. */
2917                 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0);
2918         }
2919 #endif /* QUOTA */
2920
2921         error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
2922         if (error != 0)
2923                 return(error);
2924
2925         error = vflush(mp, skipvp, SKIPSYSTEM | flags);
2926
2927 #if QUOTA
2928         if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
2929                 if (skipvp) {
2930                         /*
2931                          * See if there are additional references on the
2932                          * root vp besides the ones obtained from the open
2933                          * quota files and the hfs_chash_getvnode call above.
2934                          */
2935                         if ((error == 0) &&
2936                             (vnode_isinuse(skipvp,  quotafilecnt))) {
2937                                 error = EBUSY;  /* root directory is still open */
2938                         }
2939                         hfs_unlock(VTOC(skipvp));
2940                         vnode_put(skipvp);
2941                 }
2942                 if (error && (flags & FORCECLOSE) == 0)
2943                         return (error);
2944
2945                 for (i = 0; i < MAXQUOTAS; i++) {
2946                         if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
2947                                 continue;
2948                         hfs_quotaoff(p, mp, i);
2949                 }
2950                 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
2951         }
2952 #endif /* QUOTA */
2953
2954         return (error);
2955 }
2956
2957 /*
2958  * Update volume encoding bitmap (HFS Plus only)
2959  */
2960 __private_extern__
2961 void
2962 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
2963 {
2964 #define  kIndexMacUkrainian     48  /* MacUkrainian encoding is 152 */
2965 #define  kIndexMacFarsi         49  /* MacFarsi encoding is 140 */
2966
2967         u_int32_t       index;
2968
2969         switch (encoding) {
2970         case kTextEncodingMacUkrainian:
2971                 index = kIndexMacUkrainian;
2972                 break;
2973         case kTextEncodingMacFarsi:
2974                 index = kIndexMacFarsi;
2975                 break;
2976         default:
2977                 index = encoding;
2978                 break;
2979         }
2980
2981         if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
2982                 HFS_MOUNT_LOCK(hfsmp, TRUE)
2983                 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
2984                 MarkVCBDirty(hfsmp);
2985                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2986         }
2987 }
2988
2989 /*
2990  * Update volume stats
2991  *
2992  * On journal volumes this will cause a volume header flush
2993  */
2994 __private_extern__
2995 int
2996 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
2997 {
2998         struct timeval tv;
2999
3000         microtime(&tv);
3001
3002         lck_mtx_lock(&hfsmp->hfs_mutex);
3003
3004         MarkVCBDirty(hfsmp);
3005         hfsmp->hfs_mtime = tv.tv_sec;
3006
3007         switch (op) {
3008         case VOL_UPDATE:
3009                 break;
3010         case VOL_MKDIR:
3011                 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3012                         ++hfsmp->hfs_dircount;
3013                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3014                         ++hfsmp->vcbNmRtDirs;
3015                 break;
3016         case VOL_RMDIR:
3017                 if (hfsmp->hfs_dircount != 0)
3018                         --hfsmp->hfs_dircount;
3019                 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3020                         --hfsmp->vcbNmRtDirs;
3021                 break;
3022         case VOL_MKFILE:
3023                 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3024                         ++hfsmp->hfs_filecount;
3025                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3026                         ++hfsmp->vcbNmFls;
3027                 break;
3028         case VOL_RMFILE:
3029                 if (hfsmp->hfs_filecount != 0)
3030                         --hfsmp->hfs_filecount;
3031                 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3032                         --hfsmp->vcbNmFls;
3033                 break;
3034         }
3035
3036         lck_mtx_unlock(&hfsmp->hfs_mutex);
3037
3038         if (hfsmp->jnl) {
3039                 hfs_flushvolumeheader(hfsmp, 0, 0);
3040         }
3041
3042         return (0);
3043 }
3044
3045
3046 static int
3047 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3048 {
3049         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3050         struct filefork *fp;
3051         HFSMasterDirectoryBlock *mdb;
3052         struct buf *bp = NULL;
3053         int retval;
3054         int sectorsize;
3055         ByteCount namelen;
3056
3057         sectorsize = hfsmp->hfs_logical_block_size;
3058         retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3059         if (retval) {
3060                 if (bp)
3061                         buf_brelse(bp);
3062                 return retval;
3063         }
3064
3065         lck_mtx_lock(&hfsmp->hfs_mutex);
3066
3067         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3068
3069         mdb->drCrDate   = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbCrDate)));
3070         mdb->drLsMod    = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3071         mdb->drAtrb     = SWAP_BE16 (vcb->vcbAtrb);
3072         mdb->drNmFls    = SWAP_BE16 (vcb->vcbNmFls);
3073         mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3074         mdb->drClpSiz   = SWAP_BE32 (vcb->vcbClpSiz);
3075         mdb->drNxtCNID  = SWAP_BE32 (vcb->vcbNxtCNID);
3076         mdb->drFreeBks  = SWAP_BE16 (vcb->freeBlocks);
3077
3078         namelen = strlen((char *)vcb->vcbVN);
3079         retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3080         /* Retry with MacRoman in case that's how it was exported. */
3081         if (retval)
3082                 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3083
3084         mdb->drVolBkUp  = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3085         mdb->drWrCnt    = SWAP_BE32 (vcb->vcbWrCnt);
3086         mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3087         mdb->drFilCnt   = SWAP_BE32 (vcb->vcbFilCnt);
3088         mdb->drDirCnt   = SWAP_BE32 (vcb->vcbDirCnt);
3089
3090         bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3091
3092         fp = VTOF(vcb->extentsRefNum);
3093         mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3094         mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3095         mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3096         mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3097         mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3098         mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3099         mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3100         mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3101         FTOC(fp)->c_flag &= ~C_MODIFIED;
3102
3103         fp = VTOF(vcb->catalogRefNum);
3104         mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3105         mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3106         mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3107         mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3108         mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3109         mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3110         mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3111         mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3112         FTOC(fp)->c_flag &= ~C_MODIFIED;
3113
3114         MarkVCBClean( vcb );
3115
3116         lck_mtx_unlock(&hfsmp->hfs_mutex);
3117
3118         /* If requested, flush out the alternate MDB */
3119         if (altflush) {
3120                 struct buf *alt_bp = NULL;
3121
3122                 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3123                         bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3124
3125                         (void) VNOP_BWRITE(alt_bp);
3126                 } else if (alt_bp)
3127                         buf_brelse(alt_bp);
3128         }
3129
3130         if (waitfor != MNT_WAIT)
3131                 buf_bawrite(bp);
3132         else
3133                 retval = VNOP_BWRITE(bp);
3134
3135         return (retval);
3136 }
3137
3138 /*
3139  *  Flush any dirty in-memory mount data to the on-disk
3140  *  volume header.
3141  *
3142  *  Note: the on-disk volume signature is intentionally
3143  *  not flushed since the on-disk "H+" and "HX" signatures
3144  *  are always stored in-memory as "H+".
3145  */
3146 __private_extern__
3147 int
3148 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3149 {
3150         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3151         struct filefork *fp;
3152         HFSPlusVolumeHeader *volumeHeader, *altVH;
3153         int retval;
3154         struct buf *bp, *alt_bp;
3155         int i;
3156         daddr64_t priIDSector;
3157         int critical;
3158         u_int16_t  signature;
3159         u_int16_t  hfsversion;
3160
3161         if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3162                 return(0);
3163         }
3164         if (hfsmp->hfs_flags & HFS_STANDARD) {
3165                 return hfs_flushMDB(hfsmp, waitfor, altflush);
3166         }
3167         critical = altflush;
3168         priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3169                                   HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3170
3171         if (hfs_start_transaction(hfsmp) != 0) {
3172             return EINVAL;
3173         }
3174
3175         bp = NULL;
3176         alt_bp = NULL;
3177
3178         retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3179                         HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3180                         hfsmp->hfs_physical_block_size, NOCRED, &bp);
3181         if (retval) {
3182                 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3183                 goto err_exit;
3184         }
3185
3186         volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3187                         HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3188
3189         /*
3190          * Sanity check what we just read.  If it's bad, try the alternate
3191          * instead.
3192          */
3193         signature = SWAP_BE16 (volumeHeader->signature);
3194         hfsversion   = SWAP_BE16 (volumeHeader->version);
3195         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3196             (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3197             (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3198                 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3199                       vcb->vcbVN, signature, hfsversion,
3200                       SWAP_BE32 (volumeHeader->blockSize),
3201                       hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3202                 hfs_mark_volume_inconsistent(hfsmp);
3203
3204                 if (hfsmp->hfs_alt_id_sector) {
3205                         retval = buf_meta_bread(hfsmp->hfs_devvp,
3206                             HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3207                             hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3208                         if (retval) {
3209                                 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3210                                 goto err_exit;
3211                         }
3212
3213                         altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3214                                 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3215                         signature = SWAP_BE16(altVH->signature);
3216                         hfsversion = SWAP_BE16(altVH->version);
3217
3218                         if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3219                             (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3220                             (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3221                                 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3222                                     vcb->vcbVN, signature, hfsversion,
3223                                     SWAP_BE32(altVH->blockSize));
3224                                 retval = EIO;
3225                                 goto err_exit;
3226                         }
3227
3228                         /* The alternate is plausible, so use it. */
3229                         bcopy(altVH, volumeHeader, kMDBSize);
3230                         buf_brelse(alt_bp);
3231                         alt_bp = NULL;
3232                 } else {
3233                         /* No alternate VH, nothing more we can do. */
3234                         retval = EIO;
3235                         goto err_exit;
3236                 }
3237         }
3238
3239         if (hfsmp->jnl) {
3240                 journal_modify_block_start(hfsmp->jnl, bp);
3241         }
3242
3243         /*
3244          * For embedded HFS+ volumes, update create date if it changed
3245          * (ie from a setattrlist call)
3246          */
3247         if ((vcb->hfsPlusIOPosOffset != 0) &&
3248             (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3249                 struct buf *bp2;
3250                 HFSMasterDirectoryBlock *mdb;
3251
3252                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3253                                 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3254                                 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3255                 if (retval) {
3256                         if (bp2)
3257                                 buf_brelse(bp2);
3258                         retval = 0;
3259                 } else {
3260                         mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3261                                 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3262
3263                         if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3264                           {
3265                                 if (hfsmp->jnl) {
3266                                     journal_modify_block_start(hfsmp->jnl, bp2);
3267                                 }
3268
3269                                 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);       /* pick up the new create date */
3270
3271                                 if (hfsmp->jnl) {
3272                                         journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3273                                 } else {
3274                                         (void) VNOP_BWRITE(bp2);                /* write out the changes */
3275                                 }
3276                           }
3277                         else
3278                           {
3279                                 buf_brelse(bp2);                                                /* just release it */
3280                           }
3281                   }
3282         }
3283
3284         lck_mtx_lock(&hfsmp->hfs_mutex);
3285
3286         /* Note: only update the lower 16 bits worth of attributes */
3287         volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
3288         volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3289         if (hfsmp->jnl) {
3290                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3291         } else {
3292                 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3293         }
3294         volumeHeader->createDate        = SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
3295         volumeHeader->modifyDate        = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3296         volumeHeader->backupDate        = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3297         volumeHeader->fileCount         = SWAP_BE32 (vcb->vcbFilCnt);
3298         volumeHeader->folderCount       = SWAP_BE32 (vcb->vcbDirCnt);
3299         volumeHeader->totalBlocks       = SWAP_BE32 (vcb->totalBlocks);
3300         volumeHeader->freeBlocks        = SWAP_BE32 (vcb->freeBlocks);
3301         volumeHeader->nextAllocation    = SWAP_BE32 (vcb->nextAllocation);
3302         volumeHeader->rsrcClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3303         volumeHeader->dataClumpSize     = SWAP_BE32 (vcb->vcbClpSiz);
3304         volumeHeader->nextCatalogID     = SWAP_BE32 (vcb->vcbNxtCNID);
3305         volumeHeader->writeCount        = SWAP_BE32 (vcb->vcbWrCnt);
3306         volumeHeader->encodingsBitmap   = SWAP_BE64 (vcb->encodingsBitmap);
3307
3308         if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3309                 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3310                 critical = 1;
3311         }
3312
3313         /*
3314          * System files are only dirty when altflush is set.
3315          */
3316         if (altflush == 0) {
3317                 goto done;
3318         }
3319
3320         /* Sync Extents over-flow file meta data */
3321         fp = VTOF(vcb->extentsRefNum);
3322         if (FTOC(fp)->c_flag & C_MODIFIED) {
3323                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3324                         volumeHeader->extentsFile.extents[i].startBlock =
3325                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3326                         volumeHeader->extentsFile.extents[i].blockCount =
3327                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3328                 }
3329                 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3330                 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3331                 volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3332                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3333         }
3334
3335         /* Sync Catalog file meta data */
3336         fp = VTOF(vcb->catalogRefNum);
3337         if (FTOC(fp)->c_flag & C_MODIFIED) {
3338                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3339                         volumeHeader->catalogFile.extents[i].startBlock =
3340                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3341                         volumeHeader->catalogFile.extents[i].blockCount =
3342                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3343                 }
3344                 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3345                 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3346                 volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3347                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3348         }
3349
3350         /* Sync Allocation file meta data */
3351         fp = VTOF(vcb->allocationsRefNum);
3352         if (FTOC(fp)->c_flag & C_MODIFIED) {
3353                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3354                         volumeHeader->allocationFile.extents[i].startBlock =
3355                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3356                         volumeHeader->allocationFile.extents[i].blockCount =
3357                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3358                 }
3359                 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3360                 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3361                 volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3362                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3363         }
3364
3365         /* Sync Attribute file meta data */
3366         if (hfsmp->hfs_attribute_vp) {
3367                 fp = VTOF(hfsmp->hfs_attribute_vp);
3368                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3369                         volumeHeader->attributesFile.extents[i].startBlock =
3370                                 SWAP_BE32 (fp->ff_extents[i].startBlock);
3371                         volumeHeader->attributesFile.extents[i].blockCount =
3372                                 SWAP_BE32 (fp->ff_extents[i].blockCount);
3373                 }
3374                 FTOC(fp)->c_flag &= ~C_MODIFIED;
3375                 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3376                 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3377                 volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3378         }
3379
3380         /* Sync Startup file meta data */
3381         if (hfsmp->hfs_startup_vp) {
3382                 fp = VTOF(hfsmp->hfs_startup_vp);
3383                 if (FTOC(fp)->c_flag & C_MODIFIED) {
3384                         for (i = 0; i < kHFSPlusExtentDensity; i++) {
3385                                 volumeHeader->startupFile.extents[i].startBlock =
3386                                         SWAP_BE32 (fp->ff_extents[i].startBlock);
3387                                 volumeHeader->startupFile.extents[i].blockCount =
3388                                         SWAP_BE32 (fp->ff_extents[i].blockCount);
3389                         }
3390                         volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3391                         volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3392                         volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
3393                         FTOC(fp)->c_flag &= ~C_MODIFIED;
3394                 }
3395         }
3396
3397 done:
3398         MarkVCBClean(hfsmp);
3399         lck_mtx_unlock(&hfsmp->hfs_mutex);
3400
3401         /* If requested, flush out the alternate volume header */
3402         if (altflush && hfsmp->hfs_alt_id_sector) {
3403                 if (buf_meta_bread(hfsmp->hfs_devvp,
3404                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3405                                 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3406                         if (hfsmp->jnl) {
3407                                 journal_modify_block_start(hfsmp->jnl, alt_bp);
3408                         }
3409
3410                         bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3411                                         HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3412                                         kMDBSize);
3413
3414                         if (hfsmp->jnl) {
3415                                 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3416                         } else {
3417                                 (void) VNOP_BWRITE(alt_bp);
3418                         }
3419                 } else if (alt_bp)
3420                         buf_brelse(alt_bp);
3421         }
3422
3423         if (hfsmp->jnl) {
3424                 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3425         } else {
3426                 if (waitfor != MNT_WAIT)
3427                         buf_bawrite(bp);
3428                 else {
3429                     retval = VNOP_BWRITE(bp);
3430                     /* When critical data changes, flush the device cache */
3431                     if (critical && (retval == 0)) {
3432                         (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3433                                          NULL, FWRITE, NULL);
3434                     }
3435                 }
3436         }
3437         hfs_end_transaction(hfsmp);
3438
3439         return (retval);
3440
3441 err_exit:
3442         if (alt_bp)
3443                 buf_brelse(alt_bp);
3444         if (bp)
3445                 buf_brelse(bp);
3446         hfs_end_transaction(hfsmp);
3447         return retval;
3448 }
3449
3450
3451 /*
3452  * Extend a file system.
3453  */
3454 __private_extern__
3455 int
3456 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3457 {
3458         struct proc *p = vfs_context_proc(context);
3459         kauth_cred_t cred = vfs_context_ucred(context);
3460         struct  vnode *vp;
3461         struct  vnode *devvp;
3462         struct  buf *bp;
3463         struct  filefork *fp = NULL;
3464         ExtendedVCB  *vcb;
3465         struct  cat_fork forkdata;
3466         u_int64_t  oldsize;
3467         u_int64_t  newblkcnt;
3468         u_int64_t  prev_phys_block_count;
3469         u_int32_t  addblks;
3470         u_int64_t  sectorcnt;
3471         u_int32_t  sectorsize;
3472         u_int32_t  phys_sectorsize;
3473         daddr64_t  prev_alt_sector;
3474         daddr_t    bitmapblks;
3475         int  lockflags = 0;
3476         int  error;
3477         int64_t oldBitmapSize;
3478         Boolean  usedExtendFileC = false;
3479         int transaction_begun = 0;
3480
3481         devvp = hfsmp->hfs_devvp;
3482         vcb = HFSTOVCB(hfsmp);
3483
3484         /*
3485          * - HFS Plus file systems only.
3486          * - Journaling must be enabled.
3487          * - No embedded volumes.
3488          */
3489         if ((vcb->vcbSigWord == kHFSSigWord) ||
3490              (hfsmp->jnl == NULL) ||
3491              (vcb->hfsPlusIOPosOffset != 0)) {
3492                 return (EPERM);
3493         }
3494         /*
3495          * If extending file system by non-root, then verify
3496          * ownership and check permissions.
3497          */
3498         if (suser(cred, NULL)) {
3499                 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0);
3500
3501                 if (error)
3502                         return (error);
3503                 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3504                 if (error == 0) {
3505                         error = hfs_write_access(vp, cred, p, false);
3506                 }
3507                 hfs_unlock(VTOC(vp));
3508                 vnode_put(vp);
3509                 if (error)
3510                         return (error);
3511
3512                 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3513                 if (error)
3514                         return (error);
3515         }
3516         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3517                 return (ENXIO);
3518         }
3519         if (sectorsize != hfsmp->hfs_logical_block_size) {
3520                 return (ENXIO);
3521         }
3522         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3523                 return (ENXIO);
3524         }
3525         if ((sectorsize * sectorcnt) < newsize) {
3526                 printf("hfs_extendfs: not enough space on device\n");
3527                 return (ENOSPC);
3528         }
3529         error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3530         if (error) {
3531                 if ((error != ENOTSUP) && (error != ENOTTY)) {
3532                         return (ENXIO);
3533                 }
3534                 /* If ioctl is not supported, force physical and logical sector size to be same */
3535                 phys_sectorsize = sectorsize;
3536         }
3537         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3538
3539         /*
3540          * Validate new size.
3541          */
3542         if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3543                 printf("hfs_extendfs: invalid size\n");
3544                 return (EINVAL);
3545         }
3546         newblkcnt = newsize / vcb->blockSize;
3547         if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3548                 return (EOVERFLOW);
3549
3550         addblks = newblkcnt - vcb->totalBlocks;
3551
3552         printf("hfs_extendfs: growing %s by %d blocks\n", vcb->vcbVN, addblks);
3553
3554         HFS_MOUNT_LOCK(hfsmp, TRUE);
3555         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3556                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3557                 error = EALREADY;
3558                 goto out;
3559         }
3560         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3561         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3562
3563         /* Invalidate the current free extent cache */
3564         invalidate_free_extent_cache(hfsmp);
3565
3566         /*
3567          * Enclose changes inside a transaction.
3568          */
3569         if (hfs_start_transaction(hfsmp) != 0) {
3570                 error = EINVAL;
3571                 goto out;
3572         }
3573         transaction_begun = 1;
3574
3575         /*
3576          * Note: we take the attributes lock in case we have an attribute data vnode
3577          * which needs to change size.
3578          */
3579         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3580         vp = vcb->allocationsRefNum;
3581         fp = VTOF(vp);
3582         bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3583
3584         /*
3585          * Calculate additional space required (if any) by allocation bitmap.
3586          */
3587         oldBitmapSize = fp->ff_size;
3588         bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3589         if (bitmapblks > (daddr_t)fp->ff_blocks)
3590                 bitmapblks -= fp->ff_blocks;
3591         else
3592                 bitmapblks = 0;
3593
3594         if (bitmapblks > 0) {
3595                 daddr64_t blkno;
3596                 daddr_t blkcnt;
3597                 off_t bytesAdded;
3598
3599                 /*
3600                  * Get the bitmap's current size (in allocation blocks) so we know
3601                  * where to start zero filling once the new space is added.  We've
3602                  * got to do this before the bitmap is grown.
3603                  */
3604                 blkno  = (daddr64_t)fp->ff_blocks;
3605
3606                 /*
3607                  * Try to grow the allocation file in the normal way, using allocation
3608                  * blocks already existing in the file system.  This way, we might be
3609                  * able to grow the bitmap contiguously, or at least in the metadata
3610                  * zone.
3611                  */
3612                 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
3613                                 kEFAllMask | kEFNoClumpMask | kEFReserveMask | kEFMetadataMask,
3614                                 &bytesAdded);
3615
3616                 if (error == 0) {
3617                         usedExtendFileC = true;
3618                 } else {
3619                         /*
3620                          * If the above allocation failed, fall back to allocating the new
3621                          * extent of the bitmap from the space we're going to add.  Since those
3622                          * blocks don't yet belong to the file system, we have to update the
3623                          * extent list directly, and manually adjust the file size.
3624                          */
3625                         bytesAdded = 0;
3626                         error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
3627                         if (error) {
3628                                 printf("hfs_extendfs: error %d adding extents\n", error);
3629                                 goto out;
3630                         }
3631                         fp->ff_blocks += bitmapblks;
3632                         VTOC(vp)->c_blocks = fp->ff_blocks;
3633                         VTOC(vp)->c_flag |= C_MODIFIED;
3634                 }
3635
3636                 /*
3637                  * Update the allocation file's size to include the newly allocated
3638                  * blocks.  Note that ExtendFileC doesn't do this, which is why this
3639                  * statement is outside the above "if" statement.
3640                  */
3641                 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
3642
3643                 /*
3644                  * Zero out the new bitmap blocks.
3645                  */
3646                 {
3647
3648                         bp = NULL;
3649                         blkcnt = bitmapblks;
3650                         while (blkcnt > 0) {
3651                                 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
3652                                 if (error) {
3653                                         if (bp) {
3654                                                 buf_brelse(bp);
3655                                         }
3656                                         break;
3657                                 }
3658                                 bzero((char *)buf_dataptr(bp), vcb->blockSize);
3659                                 buf_markaged(bp);
3660                                 error = (int)buf_bwrite(bp);
3661                                 if (error)
3662                                         break;
3663                                 --blkcnt;
3664                                 ++blkno;
3665                         }
3666                 }
3667                 if (error) {
3668                         printf("hfs_extendfs: error %d  clearing blocks\n", error);
3669                         goto out;
3670                 }
3671                 /*
3672                  * Mark the new bitmap space as allocated.
3673                  *
3674                  * Note that ExtendFileC will have marked any blocks it allocated, so
3675                  * this is only needed if we used AddFileExtent.  Also note that this
3676                  * has to come *after* the zero filling of new blocks in the case where
3677                  * we used AddFileExtent (since the part of the bitmap we're touching
3678                  * is in those newly allocated blocks).
3679                  */
3680                 if (!usedExtendFileC) {
3681                         error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
3682                         if (error) {
3683                                 printf("hfs_extendfs: error %d setting bitmap\n", error);
3684                                 goto out;
3685                         }
3686                         vcb->freeBlocks -= bitmapblks;
3687                 }
3688         }
3689         /*
3690          * Mark the new alternate VH as allocated.
3691          */
3692         if (vcb->blockSize == 512)
3693                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
3694         else
3695                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
3696         if (error) {
3697                 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
3698                 goto out;
3699         }
3700         /*
3701          * Mark the old alternate VH as free.
3702          */
3703         if (vcb->blockSize == 512)
3704                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
3705         else
3706                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
3707         /*
3708          * Adjust file system variables for new space.
3709          */
3710         prev_phys_block_count = hfsmp->hfs_logical_block_count;
3711         prev_alt_sector = hfsmp->hfs_alt_id_sector;
3712
3713         vcb->totalBlocks += addblks;
3714         vcb->freeBlocks += addblks;
3715         hfsmp->hfs_logical_block_count = newsize / sectorsize;
3716         hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
3717                                   HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
3718         MarkVCBDirty(vcb);
3719         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3720         if (error) {
3721                 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
3722                 /*
3723                  * Restore to old state.
3724                  */
3725                 if (usedExtendFileC) {
3726                         (void) TruncateFileC(vcb, fp, oldBitmapSize, false);
3727                 } else {
3728                         fp->ff_blocks -= bitmapblks;
3729                         fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
3730                         /*
3731                          * No need to mark the excess blocks free since those bitmap blocks
3732                          * are no longer part of the bitmap.  But we do need to undo the
3733                          * effect of the "vcb->freeBlocks -= bitmapblks" above.
3734                          */
3735                         vcb->freeBlocks += bitmapblks;
3736                 }
3737                 vcb->totalBlocks -= addblks;
3738                 vcb->freeBlocks -= addblks;
3739                 hfsmp->hfs_logical_block_count = prev_phys_block_count;
3740                 hfsmp->hfs_alt_id_sector = prev_alt_sector;
3741                 MarkVCBDirty(vcb);
3742                 if (vcb->blockSize == 512)
3743                         (void) BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2);
3744                 else
3745                         (void) BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1);
3746                 goto out;
3747         }
3748         /*
3749          * Invalidate the old alternate volume header.
3750          */
3751         bp = NULL;
3752         if (prev_alt_sector) {
3753                 if (buf_meta_bread(hfsmp->hfs_devvp,
3754                                 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
3755                                 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
3756                         journal_modify_block_start(hfsmp->jnl, bp);
3757
3758                         bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
3759
3760                         journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3761                 } else if (bp) {
3762                         buf_brelse(bp);
3763                 }
3764         }
3765
3766         /*
3767          * Update the metadata zone size based on current volume size
3768          */
3769         hfs_metadatazone_init(hfsmp);
3770
3771         /*
3772          * Adjust the size of hfsmp->hfs_attrdata_vp
3773          */
3774         if (hfsmp->hfs_attrdata_vp) {
3775                 struct cnode *attr_cp;
3776                 struct filefork *attr_fp;
3777
3778                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
3779                         attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
3780                         attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
3781
3782                         attr_cp->c_blocks = newblkcnt;
3783                         attr_fp->ff_blocks = newblkcnt;
3784                         attr_fp->ff_extents[0].blockCount = newblkcnt;
3785                         attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
3786                         ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
3787                         vnode_put(hfsmp->hfs_attrdata_vp);
3788                 }
3789         }
3790
3791 out:
3792         if (error && fp) {
3793                 /* Restore allocation fork. */
3794                 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
3795                 VTOC(vp)->c_blocks = fp->ff_blocks;
3796
3797         }
3798         /*
3799            Regardless of whether or not the totalblocks actually increased,
3800            we should reset the allocLimit field. If it changed, it will
3801            get updated; if not, it will remain the same.
3802         */
3803         HFS_MOUNT_LOCK(hfsmp, TRUE);
3804         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
3805         hfsmp->allocLimit = vcb->totalBlocks;
3806         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3807         if (lockflags) {
3808                 hfs_systemfile_unlock(hfsmp, lockflags);
3809         }
3810         if (transaction_begun) {
3811                 hfs_end_transaction(hfsmp);
3812         }
3813
3814         return (error);
3815 }
3816
3817 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
3818
3819 /*
3820  * Truncate a file system (while still mounted).
3821  */
3822 __private_extern__
3823 int
3824 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3825 {
3826         struct  buf *bp = NULL;
3827         u_int64_t oldsize;
3828         u_int32_t newblkcnt;
3829         u_int32_t reclaimblks = 0;
3830         int lockflags = 0;
3831         int transaction_begun = 0;
3832         Boolean updateFreeBlocks = false;
3833         int error;
3834
3835         HFS_MOUNT_LOCK(hfsmp, TRUE);
3836         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3837                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3838                 return (EALREADY);
3839         }
3840         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3841         hfsmp->hfs_resize_filesmoved = 0;
3842         hfsmp->hfs_resize_totalfiles = 0;
3843         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3844
3845         /*
3846          * - Journaled HFS Plus volumes only.
3847          * - No embedded volumes.
3848          */
3849         if ((hfsmp->jnl == NULL) ||
3850             (hfsmp->hfsPlusIOPosOffset != 0)) {
3851                 error = EPERM;
3852                 goto out;
3853         }
3854         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3855         newblkcnt = newsize / hfsmp->blockSize;
3856         reclaimblks = hfsmp->totalBlocks - newblkcnt;
3857
3858         if (hfs_resize_debug) {
3859                 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
3860                 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
3861         }
3862
3863         /* Make sure new size is valid. */
3864         if ((newsize < HFS_MIN_SIZE) ||
3865             (newsize >= oldsize) ||
3866             (newsize % hfsmp->hfs_logical_block_size) ||
3867             (newsize % hfsmp->hfs_physical_block_size)) {
3868                 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
3869                 error = EINVAL;
3870                 goto out;
3871         }
3872         /* Make sure that the file system has enough free blocks reclaim */
3873         if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
3874                 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
3875                 error = ENOSPC;
3876                 goto out;
3877         }
3878
3879         /* Invalidate the current free extent cache */
3880         invalidate_free_extent_cache(hfsmp);
3881
3882         /* Start with a clean journal. */
3883         hfs_journal_flush(hfsmp);
3884
3885         if (hfs_start_transaction(hfsmp) != 0) {
3886                 error = EINVAL;
3887                 goto out;
3888         }
3889         transaction_begun = 1;
3890
3891         /*
3892          * Prevent new allocations from using the part we're trying to truncate.
3893          *
3894          * NOTE: allocLimit is set to the allocation block number where the new
3895          * alternate volume header will be.  That way there will be no files to
3896          * interfere with allocating the new alternate volume header, and no files
3897          * in the allocation blocks beyond (i.e. the blocks we're trying to
3898          * truncate away.
3899          */
3900         HFS_MOUNT_LOCK(hfsmp, TRUE);
3901         if (hfsmp->blockSize == 512)
3902                 hfsmp->allocLimit = newblkcnt - 2;
3903         else
3904                 hfsmp->allocLimit = newblkcnt - 1;
3905         /*
3906          * Update the volume free block count to reflect the total number
3907          * of free blocks that will exist after a successful resize.
3908          * Relocation of extents will result in no net change in the total
3909          * free space on the disk.  Therefore the code that allocates
3910          * space for new extent and deallocates the old extent explicitly
3911          * prevents updating the volume free block count.  It will also
3912          * prevent false disk full error when the number of blocks in
3913          * an extent being relocated is more than the free blocks that
3914          * will exist after the volume is resized.
3915          */
3916         hfsmp->freeBlocks -= reclaimblks;
3917         updateFreeBlocks = true;
3918         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3919
3920         /*
3921          * Update the metadata zone size, and, if required, disable it
3922          */
3923         hfs_metadatazone_init(hfsmp);
3924
3925         /*
3926          * Look for files that have blocks at or beyond the location of the
3927          * new alternate volume header
3928          */
3929         if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
3930                 /*
3931                  * hfs_reclaimspace will use separate transactions when
3932                  * relocating files (so we don't overwhelm the journal).
3933                  */
3934                 hfs_end_transaction(hfsmp);
3935                 transaction_begun = 0;
3936
3937                 /* Attempt to reclaim some space. */
3938                 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
3939                 if (error != 0) {
3940                         printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
3941                         error = ENOSPC;
3942                         goto out;
3943                 }
3944                 if (hfs_start_transaction(hfsmp) != 0) {
3945                         error = EINVAL;
3946                         goto out;
3947                 }
3948                 transaction_begun = 1;
3949
3950                 /* Check if we're clear now. */
3951                 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
3952                 if (error != 0) {
3953                         printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
3954                         error = EAGAIN;  /* tell client to try again */
3955                         goto out;
3956                 }
3957         }
3958
3959         /*
3960          * Note: we take the attributes lock in case we have an attribute data vnode
3961          * which needs to change size.
3962          */
3963         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3964
3965         /*
3966          * Mark the old alternate volume header as free.
3967          * We don't bother shrinking allocation bitmap file.
3968          */
3969         if (hfsmp->blockSize == 512)
3970                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
3971         else
3972                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
3973
3974         /*
3975          * Allocate last 1KB for alternate volume header.
3976          */
3977         error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
3978         if (error) {
3979                 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
3980                 goto out;
3981         }
3982
3983         /*
3984          * Invalidate the existing alternate volume header.
3985          *
3986          * Don't include this in a transaction (don't call journal_modify_block)
3987          * since this block will be outside of the truncated file system!
3988          */
3989         if (hfsmp->hfs_alt_id_sector) {
3990                 error = buf_meta_bread(hfsmp->hfs_devvp,
3991                                 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3992                                 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3993                 if (error == 0) {
3994                         bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
3995                         (void) VNOP_BWRITE(bp);
3996                 } else {
3997                         if (bp) {
3998                                 buf_brelse(bp);
3999                         }
4000                 }
4001                 bp = NULL;
4002         }
4003
4004         /* Log successful shrinking. */
4005         printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4006                hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4007
4008         /*
4009          * Adjust file system variables and flush them to disk.
4010          */
4011         hfsmp->totalBlocks = newblkcnt;
4012         hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4013         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4014         MarkVCBDirty(hfsmp);
4015         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4016         if (error)
4017                 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4018
4019         /*
4020          * Adjust the size of hfsmp->hfs_attrdata_vp
4021          */
4022         if (hfsmp->hfs_attrdata_vp) {
4023                 struct cnode *cp;
4024                 struct filefork *fp;
4025
4026                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4027                         cp = VTOC(hfsmp->hfs_attrdata_vp);
4028                         fp = VTOF(hfsmp->hfs_attrdata_vp);
4029
4030                         cp->c_blocks = newblkcnt;
4031                         fp->ff_blocks = newblkcnt;
4032                         fp->ff_extents[0].blockCount = newblkcnt;
4033                         fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4034                         ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4035                         vnode_put(hfsmp->hfs_attrdata_vp);
4036                 }
4037         }
4038
4039 out:
4040         lck_mtx_lock(&hfsmp->hfs_mutex);
4041         if (error && (updateFreeBlocks == true))
4042                 hfsmp->freeBlocks += reclaimblks;
4043         hfsmp->allocLimit = hfsmp->totalBlocks;
4044         if (hfsmp->nextAllocation >= hfsmp->allocLimit)
4045                 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4046         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4047         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4048         /* On error, reset the metadata zone for original volume size */
4049         if (error && (updateFreeBlocks == true)) {
4050                 hfs_metadatazone_init(hfsmp);
4051         }
4052
4053         if (lockflags) {
4054                 hfs_systemfile_unlock(hfsmp, lockflags);
4055         }
4056         if (transaction_begun) {
4057                 hfs_end_transaction(hfsmp);
4058                 hfs_journal_flush(hfsmp);
4059                 /* Just to be sure, sync all data to the disk */
4060                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4061         }
4062
4063         return (error);
4064 }
4065
4066
4067 /*
4068  * Invalidate the physical block numbers associated with buffer cache blocks
4069  * in the given extent of the given vnode.
4070  */
4071 struct hfs_inval_blk_no {
4072         daddr64_t sectorStart;
4073         daddr64_t sectorCount;
4074 };
4075 static int
4076 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4077 {
4078         daddr64_t blkno;
4079         struct hfs_inval_blk_no *args;
4080
4081         blkno = buf_blkno(bp);
4082         args = args_in;
4083
4084         if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4085                 buf_setblkno(bp, buf_lblkno(bp));
4086
4087         return BUF_RETURNED;
4088 }
4089 static void
4090 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4091 {
4092         struct hfs_inval_blk_no args;
4093         args.sectorStart = sectorStart;
4094         args.sectorCount = sectorCount;
4095
4096         buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4097 }
4098
4099
4100 /*
4101  * Copy the contents of an extent to a new location.  Also invalidates the
4102  * physical block number of any buffer cache block in the copied extent
4103  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4104  * determine the new physical block number).
4105  */
4106 static int
4107 hfs_copy_extent(
4108         struct hfsmount *hfsmp,
4109         struct vnode *vp,               /* The file whose extent is being copied. */
4110         u_int32_t oldStart,             /* The start of the source extent. */
4111         u_int32_t newStart,             /* The start of the destination extent. */
4112         u_int32_t blockCount,   /* The number of allocation blocks to copy. */
4113         vfs_context_t context)
4114 {
4115         int err = 0;
4116         size_t bufferSize;
4117         void *buffer = NULL;
4118         struct vfsioattr ioattr;
4119         buf_t bp = NULL;
4120         off_t resid;
4121         size_t ioSize;
4122         u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
4123         daddr64_t srcSector, destSector;
4124         u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4125
4126         /*
4127          * Sanity check that we have locked the vnode of the file we're copying.
4128          *
4129          * But since hfs_systemfile_lock() doesn't actually take the lock on
4130          * the allocation file if a journal is active, ignore the check if the
4131          * file being copied is the allocation file.
4132          */
4133         struct cnode *cp = VTOC(vp);
4134         if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4135                 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4136
4137         /*
4138          * Determine the I/O size to use
4139          *
4140          * NOTE: Many external drives will result in an ioSize of 128KB.
4141          * TODO: Should we use a larger buffer, doing several consecutive
4142          * reads, then several consecutive writes?
4143          */
4144         vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4145         bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4146         if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4147                 return ENOMEM;
4148
4149         /* Get a buffer for doing the I/O */
4150         bp = buf_alloc(hfsmp->hfs_devvp);
4151         buf_setdataptr(bp, (uintptr_t)buffer);
4152
4153         resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4154         srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4155         destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4156         while (resid > 0) {
4157                 ioSize = MIN(bufferSize, (size_t) resid);
4158                 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4159
4160                 /* Prepare the buffer for reading */
4161                 buf_reset(bp, B_READ);
4162                 buf_setsize(bp, ioSize);
4163                 buf_setcount(bp, ioSize);
4164                 buf_setblkno(bp, srcSector);
4165                 buf_setlblkno(bp, srcSector);
4166
4167                 /* Do the read */
4168                 err = VNOP_STRATEGY(bp);
4169                 if (!err)
4170                         err = buf_biowait(bp);
4171                 if (err) {
4172                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4173                         break;
4174                 }
4175
4176                 /* Prepare the buffer for writing */
4177                 buf_reset(bp, B_WRITE);
4178                 buf_setsize(bp, ioSize);
4179                 buf_setcount(bp, ioSize);
4180                 buf_setblkno(bp, destSector);
4181                 buf_setlblkno(bp, destSector);
4182                 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4183                         buf_markfua(bp);
4184
4185                 /* Do the write */
4186                 vnode_startwrite(hfsmp->hfs_devvp);
4187                 err = VNOP_STRATEGY(bp);
4188                 if (!err)
4189                         err = buf_biowait(bp);
4190                 if (err) {
4191                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4192                         break;
4193                 }
4194
4195                 resid -= ioSize;
4196                 srcSector += ioSizeSectors;
4197                 destSector += ioSizeSectors;
4198         }
4199         if (bp)
4200                 buf_free(bp);
4201         if (buffer)
4202                 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4203
4204         /* Make sure all writes have been flushed to disk. */
4205         if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4206                 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4207                 if (err) {
4208                         printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4209                         err = 0;        /* Don't fail the copy. */
4210                 }
4211         }
4212
4213         if (!err)
4214                 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4215
4216         return err;
4217 }
4218
4219
4220 static int
4221 hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state)
4222 {
4223         bcopy(state, record, sizeof(HFSPlusExtentRecord));
4224         return 0;
4225 }
4226
4227 /*
4228  * Reclaim space at the end of a volume, used by a given file.
4229  *
4230  * This routine attempts to move any extent which contains allocation blocks
4231  * at or after "startblk."  A separate transaction is used to do the move.
4232  * The contents of any moved extents are read and written via the volume's
4233  * device vnode -- NOT via "vp."  During the move, moved blocks which are part
4234  * of a transaction have their physical block numbers invalidated so they will
4235  * eventually be written to their new locations.
4236  *
4237  * Inputs:
4238  *    hfsmp       The volume being resized.
4239  *    startblk    Blocks >= this allocation block need to be moved.
4240  *    locks       Which locks need to be taken for the given system file.
4241  *    vp          The vnode for the system file.
4242  *
4243  *    The caller of this function, hfs_reclaimspace(), grabs cnode lock
4244  *    for non-system files before calling this function.
4245  *
4246  * Outputs:
4247  *    blks_moved  Total number of allocation blocks moved by this routine.
4248  */
4249 static int
4250 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, u_int32_t *blks_moved, vfs_context_t context)
4251 {
4252         int error;
4253         int lockflags;
4254         int i;
4255         u_long datablks;
4256         u_long end_block;
4257         u_int32_t oldStartBlock;
4258         u_int32_t newStartBlock;
4259         u_int32_t oldBlockCount;
4260         u_int32_t newBlockCount;
4261         struct filefork *fp;
4262         struct cnode *cp;
4263         int is_sysfile;
4264         int took_truncate_lock = 0;
4265         struct BTreeIterator *iterator = NULL;
4266         u_int8_t forktype;
4267         u_int32_t fileID;
4268         u_int32_t alloc_flags;
4269
4270         /* If there is no vnode for this file, then there's nothing to do. */
4271         if (vp == NULL)
4272                 return 0;
4273
4274         cp = VTOC(vp);
4275         fileID = cp->c_cnid;
4276         is_sysfile = vnode_issystem(vp);
4277         forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0;
4278
4279         /* Flush all the buffer cache blocks and cluster pages associated with
4280          * this vnode.
4281          *
4282          * If the current vnode is a system vnode, all the buffer cache blocks
4283          * associated with it should already be sync'ed to the disk as part of
4284          * journal flush in hfs_truncatefs().  Normally there should not be
4285          * buffer cache blocks for regular files, but for objects like symlinks,
4286          * we can have buffer cache blocks associated with the vnode.  Therefore
4287          * we call buf_flushdirtyblks() always.  Resource fork data for directory
4288          * hard links are directly written using buffer cache for device vnode,
4289          * which should also be sync'ed as part of journal flush in hfs_truncatefs().
4290          *
4291          * Flushing cluster pages should be the normal case for regular files,
4292          * and really should not do anything for system files.  But just to be
4293          * sure that all blocks associated with this vnode is sync'ed to the
4294          * disk, we call both buffer cache and cluster layer functions.
4295          */
4296         buf_flushdirtyblks(vp, MNT_NOWAIT, 0, "hfs_reclaim_file");
4297
4298         if (!is_sysfile) {
4299                 /* The caller grabs cnode lock for non-system files only, therefore
4300                  * we unlock only non-system files before calling cluster layer.
4301                  */
4302                 hfs_unlock(cp);
4303                 hfs_lock_truncate(cp, TRUE);
4304                 took_truncate_lock = 1;
4305         }
4306         (void) cluster_push(vp, 0);
4307         if (!is_sysfile) {
4308                 error = hfs_lock(cp, HFS_FORCE_LOCK);
4309                 if (error) {
4310                         hfs_unlock_truncate(cp, TRUE);
4311                         return error;
4312                 }
4313
4314                 /* If the file no longer exists, nothing left to do */
4315                 if (cp->c_flag & C_NOEXISTS) {
4316                         hfs_unlock_truncate(cp, TRUE);
4317                         return 0;
4318                 }
4319         }
4320
4321         /* Wait for any in-progress writes to this vnode to complete, so that we'll
4322          * be copying consistent bits.  (Otherwise, it's possible that an async
4323          * write will complete to the old extent after we read from it.  That
4324          * could lead to corruption.)
4325          */
4326         error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
4327         if (error) {
4328                 printf("hfs_reclaim_file: Error %d from vnode_waitforwrites\n", error);
4329                 return error;
4330         }
4331
4332         if (hfs_resize_debug) {
4333                 printf("hfs_reclaim_file: Start relocating %sfork for fileid=%u name=%.*s\n", (forktype ? "rsrc" : "data"), fileID, cp->c_desc.cd_namelen, cp->c_desc.cd_nameptr);
4334         }
4335
4336         /* We always need the allocation bitmap and extents B-tree */
4337         locks |= SFL_BITMAP | SFL_EXTENTS;
4338
4339         error = hfs_start_transaction(hfsmp);
4340         if (error) {
4341                 printf("hfs_reclaim_file: hfs_start_transaction returned %d\n", error);
4342                 if (took_truncate_lock) {
4343                         hfs_unlock_truncate(cp, TRUE);
4344                 }
4345                 return error;
4346         }
4347         lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK);
4348         fp = VTOF(vp);
4349         datablks = 0;
4350         *blks_moved = 0;
4351
4352         /* Relocate non-overflow extents */
4353         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
4354                 if (fp->ff_extents[i].blockCount == 0)
4355                         break;
4356                 oldStartBlock = fp->ff_extents[i].startBlock;
4357                 oldBlockCount = fp->ff_extents[i].blockCount;
4358                 datablks += oldBlockCount;
4359                 end_block = oldStartBlock + oldBlockCount;
4360                 /* Check if the file overlaps the target space */
4361                 if (end_block > startblk) {
4362                         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
4363                         if (is_sysfile) {
4364                                 alloc_flags |= HFS_ALLOC_METAZONE;
4365                         }
4366                         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
4367                         if (error) {
4368                                 if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) {
4369                                         /* Try allocating again using the metadata zone */
4370                                         alloc_flags |= HFS_ALLOC_METAZONE;
4371                                         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
4372                                 }
4373                                 if (error) {
4374                                         printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
4375                                         goto fail;
4376                                 } else {
4377                                         if (hfs_resize_debug) {
4378                                                 printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount);
4379                                         }
4380                                 }
4381                         }
4382
4383                         /* Copy data from old location to new location */
4384                         error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
4385                         if (error) {
4386                                 printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
4387                                 if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) {
4388                                         hfs_mark_volume_inconsistent(hfsmp);
4389                                 }
4390                                 goto fail;
4391                         }
4392                         fp->ff_extents[i].startBlock = newStartBlock;
4393                         cp->c_flag |= C_MODIFIED;
4394                         *blks_moved += newBlockCount;
4395
4396                         /* Deallocate the old extent */
4397                         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
4398                         if (error) {
4399                                 printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
4400                                 hfs_mark_volume_inconsistent(hfsmp);
4401                                 goto fail;
4402                         }
4403
4404                         /* If this is a system file, sync the volume header on disk */
4405                         if (is_sysfile) {
4406                                 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4407                                 if (error) {
4408                                         printf("hfs_reclaim_file: hfs_flushvolumeheader returned %d\n", error);
4409                                         hfs_mark_volume_inconsistent(hfsmp);
4410                                         goto fail;
4411                                 }
4412                         }
4413
4414                         if (hfs_resize_debug) {
4415                                 printf ("hfs_reclaim_file: Relocated %u:(%u,%u) to %u:(%u,%u)\n", i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
4416                         }
4417                 }
4418         }
4419
4420         /* Relocate overflow extents (if any) */
4421         if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) {
4422                 struct FSBufferDescriptor btdata;
4423                 HFSPlusExtentRecord record;
4424                 HFSPlusExtentKey *key;
4425                 FCB *fcb;
4426                 int overflow_count = 0;
4427
4428                 if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) {
4429                         printf("hfs_reclaim_file: kmem_alloc failed!\n");
4430                         error = ENOMEM;
4431                         goto fail;
4432                 }
4433
4434                 bzero(iterator, sizeof(*iterator));
4435                 key = (HFSPlusExtentKey *) &iterator->key;
4436                 key->keyLength = kHFSPlusExtentKeyMaximumLength;
4437                 key->forkType = forktype;
4438                 key->fileID = fileID;
4439                 key->startBlock = datablks;
4440
4441                 btdata.bufferAddress = &record;
4442                 btdata.itemSize = sizeof(record);
4443                 btdata.itemCount = 1;
4444
4445                 fcb = VTOF(hfsmp->hfs_extents_vp);
4446
4447                 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
4448                 while (error == 0) {
4449                         /* Stop when we encounter a different file or fork. */
4450                         if ((key->fileID != fileID) ||
4451                             (key->forkType != forktype)) {
4452                                 break;
4453                         }
4454
4455                         /* Just track the overflow extent record number for debugging... */
4456                         if (hfs_resize_debug) {
4457                                 overflow_count++;
4458                         }
4459
4460                         /*
4461                          * Check if the file overlaps target space.
4462                          */
4463                         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
4464                                 if (record[i].blockCount == 0) {
4465                                         goto fail;
4466                                 }
4467                                 oldStartBlock = record[i].startBlock;
4468                                 oldBlockCount = record[i].blockCount;
4469                                 end_block = oldStartBlock + oldBlockCount;
4470                                 if (end_block > startblk) {
4471                                         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
4472                                         if (is_sysfile) {
4473                                                 alloc_flags |= HFS_ALLOC_METAZONE;
4474                                         }
4475                                         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
4476                                         if (error) {
4477                                                 if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) {
4478                                                         /* Try allocating again using the metadata zone */
4479                                                         alloc_flags |= HFS_ALLOC_METAZONE;
4480                                                         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
4481                                                 }
4482                                                 if (error) {
4483                                                         printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
4484                                                         goto fail;
4485                                                 } else {
4486                                                         if (hfs_resize_debug) {
4487                                                                 printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount);
4488                                                         }
4489                                                 }
4490                                         }
4491                                         error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
4492                                         if (error) {
4493                                                 printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
4494                                                 if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) {
4495                                                         hfs_mark_volume_inconsistent(hfsmp);
4496                                                 }
4497                                                 goto fail;
4498                                         }
4499                                         record[i].startBlock = newStartBlock;
4500                                         cp->c_flag |= C_MODIFIED;
4501                                         *blks_moved += newBlockCount;
4502
4503                                         /*
4504                                          * NOTE: To support relocating overflow extents of the
4505                                          * allocation file, we must update the BTree record BEFORE
4506                                          * deallocating the old extent so that BlockDeallocate will
4507                                          * use the extent's new location to calculate physical block
4508                                          * numbers.  (This is for the case where the old extent's
4509                                          * bitmap bits actually reside in the extent being moved.)
4510                                          */
4511                                         error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record);
4512                                         if (error) {
4513                                                 printf("hfs_reclaim_file: BTUpdateRecord returned %d\n", error);
4514                                                 hfs_mark_volume_inconsistent(hfsmp);
4515                                                 goto fail;
4516                                         }
4517                                         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
4518                                         if (error) {
4519                                                 printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
4520                                                 hfs_mark_volume_inconsistent(hfsmp);
4521                                                 goto fail;
4522                                         }
4523                                         if (hfs_resize_debug) {
4524                                                 printf ("hfs_reclaim_file: Relocated overflow#%d %u:(%u,%u) to %u:(%u,%u)\n", overflow_count, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
4525                                         }
4526                                 }
4527                         }
4528                         /* Look for more records. */
4529                         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
4530                         if (error == btNotFound) {
4531                                 error = 0;
4532                                 break;
4533                         }
4534                 }
4535         }
4536
4537 fail:
4538         if (iterator) {
4539                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
4540         }
4541
4542         (void) hfs_systemfile_unlock(hfsmp, lockflags);
4543
4544         if ((*blks_moved != 0) && (is_sysfile == false)) {
4545                 (void) hfs_update(vp, MNT_WAIT);
4546         }
4547
4548         (void) hfs_end_transaction(hfsmp);
4549
4550         if (took_truncate_lock) {
4551                 hfs_unlock_truncate(cp, TRUE);
4552         }
4553
4554         if (hfs_resize_debug) {
4555                 printf("hfs_reclaim_file: Finished relocating %sfork for fileid=%u (error=%d)\n", (forktype ? "rsrc" : "data"), fileID, error);
4556         }
4557
4558         return error;
4559 }
4560
4561
4562 /*
4563  * This journal_relocate callback updates the journal info block to point
4564  * at the new journal location.  This write must NOT be done using the
4565  * transaction.  We must write the block immediately.  We must also force
4566  * it to get to the media so that the new journal location will be seen by
4567  * the replay code before we can safely let journaled blocks be written
4568  * to their normal locations.
4569  *
4570  * The tests for journal_uses_fua below are mildly hacky.  Since the journal
4571  * and the file system are both on the same device, I'm leveraging what
4572  * the journal has decided about FUA.
4573  */
4574 struct hfs_journal_relocate_args {
4575         struct hfsmount *hfsmp;
4576         vfs_context_t context;
4577         u_int32_t newStartBlock;
4578 };
4579
4580 static errno_t
4581 hfs_journal_relocate_callback(void *_args)
4582 {
4583         int error;
4584         struct hfs_journal_relocate_args *args = _args;
4585         struct hfsmount *hfsmp = args->hfsmp;
4586         buf_t bp;
4587         JournalInfoBlock *jibp;
4588
4589         error = buf_meta_bread(hfsmp->hfs_devvp,
4590                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
4591                 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
4592         if (error) {
4593                 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
4594                 return error;
4595         }
4596         jibp = (JournalInfoBlock*) buf_dataptr(bp);
4597         jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
4598         jibp->size = SWAP_BE64(hfsmp->jnl_size);
4599         if (journal_uses_fua(hfsmp->jnl))
4600                 buf_markfua(bp);
4601         error = buf_bwrite(bp);
4602         if (error) {
4603                 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
4604                 return error;
4605         }
4606         if (!journal_uses_fua(hfsmp->jnl)) {
4607                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
4608                 if (error) {
4609                         printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
4610                         error = 0;              /* Don't fail the operation. */
4611                 }
4612         }
4613
4614         return error;
4615 }
4616
4617
4618 static int
4619 hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
4620 {
4621         int error;
4622         int lockflags;
4623         u_int32_t oldStartBlock;
4624         u_int32_t newStartBlock;
4625         u_int32_t oldBlockCount;
4626         u_int32_t newBlockCount;
4627         struct cat_desc journal_desc;
4628         struct cat_attr journal_attr;
4629         struct cat_fork journal_fork;
4630         struct hfs_journal_relocate_args callback_args;
4631
4632         error = hfs_start_transaction(hfsmp);
4633         if (error) {
4634                 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
4635                 return error;
4636         }
4637         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4638
4639         oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
4640
4641         /* TODO: Allow the journal to change size based on the new volume size. */
4642         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
4643                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
4644                          &newStartBlock, &newBlockCount);
4645         if (error) {
4646                 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
4647                 goto fail;
4648         }
4649         if (newBlockCount != oldBlockCount) {
4650                 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
4651                 goto free_fail;
4652         }
4653
4654         error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
4655         if (error) {
4656                 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
4657                 goto free_fail;
4658         }
4659
4660         /* Update the catalog record for .journal */
4661         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
4662         if (error) {
4663                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
4664                 goto free_fail;
4665         }
4666         oldStartBlock = journal_fork.cf_extents[0].startBlock;
4667         journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
4668         journal_fork.cf_extents[0].startBlock = newStartBlock;
4669         journal_fork.cf_extents[0].blockCount = newBlockCount;
4670         journal_fork.cf_blocks = newBlockCount;
4671         error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
4672         cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
4673         if (error) {
4674                 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
4675                 goto free_fail;
4676         }
4677         callback_args.hfsmp = hfsmp;
4678         callback_args.context = context;
4679         callback_args.newStartBlock = newStartBlock;
4680
4681         error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
4682                 (off_t)newBlockCount*hfsmp->blockSize, 0,
4683                 hfs_journal_relocate_callback, &callback_args);
4684         if (error) {
4685                 /* NOTE: journal_relocate will mark the journal invalid. */
4686                 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
4687                 goto fail;
4688         }
4689         hfsmp->jnl_start = newStartBlock;
4690         hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
4691
4692         hfs_systemfile_unlock(hfsmp, lockflags);
4693         error = hfs_end_transaction(hfsmp);
4694         if (error) {
4695                 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
4696         }
4697
4698         if (!error && hfs_resize_debug) {
4699                 printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
4700         }
4701         return error;
4702
4703 free_fail:
4704         (void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
4705 fail:
4706         hfs_systemfile_unlock(hfsmp, lockflags);
4707         (void) hfs_end_transaction(hfsmp);
4708         if (hfs_resize_debug) {
4709                 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
4710         }
4711         return error;
4712 }
4713
4714
4715 /*
4716  * Move the journal info block to a new location.  We have to make sure the
4717  * new copy of the journal info block gets to the media first, then change
4718  * the field in the volume header and the catalog record.
4719  */
4720 static int
4721 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
4722 {
4723         int error;
4724         int lockflags;
4725         u_int32_t oldBlock;
4726         u_int32_t newBlock;
4727         u_int32_t blockCount;
4728         struct cat_desc jib_desc;
4729         struct cat_attr jib_attr;
4730         struct cat_fork jib_fork;
4731         buf_t old_bp, new_bp;
4732
4733         error = hfs_start_transaction(hfsmp);
4734         if (error) {
4735                 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
4736                 return error;
4737         }
4738         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4739
4740         error = BlockAllocate(hfsmp, 1, 1, 1,
4741                         HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
4742                         &newBlock, &blockCount);
4743         if (error) {
4744                 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
4745                 goto fail;
4746         }
4747         if (blockCount != 1) {
4748                 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
4749                 goto free_fail;
4750         }
4751         error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
4752         if (error) {
4753                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
4754                 goto free_fail;
4755         }
4756
4757         /* Copy the old journal info block content to the new location */
4758         error = buf_meta_bread(hfsmp->hfs_devvp,
4759                 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
4760                 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
4761         if (error) {
4762                 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
4763                 goto free_fail;
4764         }
4765         new_bp = buf_getblk(hfsmp->hfs_devvp,
4766                 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
4767                 hfsmp->blockSize, 0, 0, BLK_META);
4768         bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
4769         buf_brelse(old_bp);
4770         if (journal_uses_fua(hfsmp->jnl))
4771                 buf_markfua(new_bp);
4772         error = buf_bwrite(new_bp);
4773         if (error) {
4774                 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
4775                 goto free_fail;
4776         }
4777         if (!journal_uses_fua(hfsmp->jnl)) {
4778                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4779                 if (error) {
4780                         printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
4781                         /* Don't fail the operation. */
4782                 }
4783         }
4784
4785         /* Update the catalog record for .journal_info_block */
4786         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
4787         if (error) {
4788                 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
4789                 goto fail;
4790         }
4791         oldBlock = jib_fork.cf_extents[0].startBlock;
4792         jib_fork.cf_size = hfsmp->blockSize;
4793         jib_fork.cf_extents[0].startBlock = newBlock;
4794         jib_fork.cf_extents[0].blockCount = 1;
4795         jib_fork.cf_blocks = 1;
4796         error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
4797         cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
4798         if (error) {
4799                 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
4800                 goto fail;
4801         }
4802
4803         /* Update the pointer to the journal info block in the volume header. */
4804         hfsmp->vcbJinfoBlock = newBlock;
4805         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4806         if (error) {
4807                 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
4808                 goto fail;
4809         }
4810         hfs_systemfile_unlock(hfsmp, lockflags);
4811         error = hfs_end_transaction(hfsmp);
4812         if (error) {
4813                 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
4814         }
4815         error = hfs_journal_flush(hfsmp);
4816         if (error) {
4817                 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
4818         }
4819
4820         if (!error && hfs_resize_debug) {
4821                 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
4822         }
4823         return error;
4824
4825 free_fail:
4826         (void) BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
4827 fail:
4828         hfs_systemfile_unlock(hfsmp, lockflags);
4829         (void) hfs_end_transaction(hfsmp);
4830         if (hfs_resize_debug) {
4831                 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
4832         }
4833         return error;
4834 }
4835
4836
4837 /*
4838  * Reclaim space at the end of a file system.
4839  *
4840  * Inputs -
4841  *      startblk        - start block of the space being reclaimed
4842  *      reclaimblks     - number of allocation blocks to reclaim
4843  */
4844 static int
4845 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context)
4846 {
4847         struct vnode *vp = NULL;
4848         FCB *fcb;
4849         struct BTreeIterator * iterator = NULL;
4850         struct FSBufferDescriptor btdata;
4851         struct HFSPlusCatalogFile filerec;
4852         u_int32_t  saved_next_allocation;
4853         cnid_t * cnidbufp;
4854         size_t cnidbufsize;
4855         int filecnt = 0;
4856         int maxfilecnt;
4857         u_int32_t block;
4858         int lockflags;
4859         int i, j;
4860         int error;
4861         int lastprogress = 0;
4862         u_int32_t blks_moved = 0;
4863         u_int32_t total_blks_moved = 0;
4864         Boolean need_relocate;
4865
4866         /* Relocate extents of the Allocation file if they're in the way. */
4867         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &blks_moved, context);
4868         if (error) {
4869                 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
4870                 return error;
4871         }
4872         total_blks_moved += blks_moved;
4873
4874         /* Relocate extents of the Extents B-tree if they're in the way. */
4875         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &blks_moved, context);
4876         if (error) {
4877                 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
4878                 return error;
4879         }
4880         total_blks_moved += blks_moved;
4881
4882         /* Relocate extents of the Catalog B-tree if they're in the way. */
4883         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &blks_moved, context);
4884         if (error) {
4885                 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
4886                 return error;
4887         }
4888         total_blks_moved += blks_moved;
4889
4890         /* Relocate extents of the Attributes B-tree if they're in the way. */
4891         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &blks_moved, context);
4892         if (error) {
4893                 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
4894                 return error;
4895         }
4896         total_blks_moved += blks_moved;
4897
4898         /* Relocate extents of the Startup File if there is one and they're in the way. */
4899         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &blks_moved, context);
4900         if (error) {
4901                 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
4902                 return error;
4903         }
4904         total_blks_moved += blks_moved;
4905
4906         /*
4907          * We need to make sure the alternate volume header gets flushed if we moved
4908          * any extents in the volume header.  But we need to do that before
4909          * shrinking the size of the volume, or else the journal code will panic
4910          * with an invalid (too large) block number.
4911          *
4912          * Note that total_blks_moved will be set if ANY extent was moved, even
4913          * if it was just an overflow extent.  In this case, the journal_flush isn't
4914          * strictly required, but shouldn't hurt.
4915          */
4916         if (total_blks_moved) {
4917                 hfs_journal_flush(hfsmp);
4918         }
4919
4920         if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) {
4921                 error = hfs_reclaim_journal_file(hfsmp, context);
4922                 if (error) {
4923                         printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
4924                         return error;
4925                 }
4926         }
4927
4928         if (hfsmp->vcbJinfoBlock >= startblk) {
4929                 error = hfs_reclaim_journal_info_block(hfsmp, context);
4930                 if (error) {
4931                         printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
4932                         return error;
4933                 }
4934         }
4935
4936         /* For now move a maximum of 250,000 files. */
4937         maxfilecnt = MIN(hfsmp->hfs_filecount, 250000);
4938         maxfilecnt = MIN((u_int32_t)maxfilecnt, reclaimblks);
4939         cnidbufsize = maxfilecnt * sizeof(cnid_t);
4940         if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) {
4941                 return (ENOMEM);
4942         }
4943         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
4944                 kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize);
4945                 return (ENOMEM);
4946         }
4947
4948         saved_next_allocation = hfsmp->nextAllocation;
4949         /* Always try allocating new blocks after the metadata zone */
4950         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start);
4951
4952         fcb = VTOF(hfsmp->hfs_catalog_vp);
4953         bzero(iterator, sizeof(*iterator));
4954
4955         btdata.bufferAddress = &filerec;
4956         btdata.itemSize = sizeof(filerec);
4957         btdata.itemCount = 1;
4958
4959         /* Keep the Catalog and extents files locked during iteration. */
4960         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_SHARED_LOCK);
4961
4962         error = BTIterateRecord(fcb, kBTreeFirstRecord, iterator, NULL, NULL);
4963         if (error) {
4964                 goto end_iteration;
4965         }
4966         /*
4967          * Iterate over all the catalog records looking for files
4968          * that overlap into the space we're trying to free up and
4969          * the total number of blocks that will require relocation.
4970          */
4971         for (filecnt = 0; filecnt < maxfilecnt; ) {
4972                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
4973                 if (error) {
4974                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
4975                                 error = 0;
4976                         }
4977                         break;
4978                 }
4979                 if (filerec.recordType != kHFSPlusFileRecord) {
4980                         continue;
4981                 }
4982
4983                 need_relocate = false;
4984                 /* Check if data fork overlaps the target space */
4985                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
4986                         if (filerec.dataFork.extents[i].blockCount == 0) {
4987                                 break;
4988                         }
4989                         block = filerec.dataFork.extents[i].startBlock +
4990                                 filerec.dataFork.extents[i].blockCount;
4991                         if (block >= startblk) {
4992                                 if ((filerec.fileID == hfsmp->hfs_jnlfileid) ||
4993                                     (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) {
4994                                         printf("hfs_reclaimspace: cannot move active journal\n");
4995                                         error = EPERM;
4996                                         goto end_iteration;
4997                                 }
4998                                 need_relocate = true;
4999                                 goto save_fileid;
5000                         }
5001                 }
5002
5003                 /* Check if resource fork overlaps the target space */
5004                 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
5005                         if (filerec.resourceFork.extents[j].blockCount == 0) {
5006                                 break;
5007                         }
5008                         block = filerec.resourceFork.extents[j].startBlock +
5009                                 filerec.resourceFork.extents[j].blockCount;
5010                         if (block >= startblk) {
5011                                 need_relocate = true;
5012                                 goto save_fileid;
5013                         }
5014                 }
5015
5016                 /* Check if any forks' overflow extents overlap the target space */
5017                 if ((i == kHFSPlusExtentDensity) || (j == kHFSPlusExtentDensity)) {
5018                         if (hfs_overlapped_overflow_extents(hfsmp, startblk, filerec.fileID)) {
5019                                 need_relocate = true;
5020                                 goto save_fileid;
5021                         }
5022                 }
5023
5024 save_fileid:
5025                 if (need_relocate == true) {
5026                         cnidbufp[filecnt++] = filerec.fileID;
5027                         if (hfs_resize_debug) {
5028                                 printf ("hfs_reclaimspace: Will relocate extents for fileID=%u\n", filerec.fileID);
5029                         }
5030                 }
5031         }
5032
5033 end_iteration:
5034         /* If no regular file was found to be relocated and
5035          * no system file was moved, we probably do not have
5036          * enough space to relocate the system files, or
5037          * something else went wrong.
5038          */
5039         if ((filecnt == 0) && (total_blks_moved == 0)) {
5040                 printf("hfs_reclaimspace: no files moved\n");
5041                 error = ENOSPC;
5042         }
5043         /* All done with catalog. */
5044         hfs_systemfile_unlock(hfsmp, lockflags);
5045         if (error || filecnt == 0)
5046                 goto out;
5047
5048         hfsmp->hfs_resize_filesmoved = 0;
5049         hfsmp->hfs_resize_totalfiles = filecnt;
5050
5051         /* Now move any files that are in the way. */
5052         for (i = 0; i < filecnt; ++i) {
5053                 struct vnode *rvp;
5054                 struct cnode *cp;
5055                 struct filefork *datafork;
5056
5057                 if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0)
5058                         continue;
5059
5060                 cp = VTOC(vp);
5061                 datafork = VTOF(vp);
5062
5063                 /* Relocating directory hard links is not supported, so we punt (see radar 6217026). */
5064                 if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
5065                         printf("hfs_reclaimspace: Unable to relocate directory hard link id=%d\n", cp->c_cnid);
5066                         error = EINVAL;
5067                         goto out;
5068                 }
5069
5070                 /* Relocate any overlapping data fork blocks. */
5071                 if (datafork && datafork->ff_blocks > 0) {
5072                         error = hfs_reclaim_file(hfsmp, vp, startblk, 0, &blks_moved, context);
5073                         if (error)  {
5074                                 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
5075                                 break;
5076                         }
5077                         total_blks_moved += blks_moved;
5078                 }
5079
5080                 /* Relocate any overlapping resource fork blocks. */
5081                 if ((cp->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) {
5082                         error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
5083                         if (error) {
5084                                 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", cnidbufp[i], error);
5085                                 break;
5086                         }
5087                         error = hfs_reclaim_file(hfsmp, rvp, startblk, 0, &blks_moved, context);
5088                         VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
5089                         if (error) {
5090                                 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
5091                                 break;
5092                         }
5093                         total_blks_moved += blks_moved;
5094                 }
5095                 hfs_unlock(cp);
5096                 vnode_put(vp);
5097                 vp = NULL;
5098
5099                 ++hfsmp->hfs_resize_filesmoved;
5100
5101                 /* Report intermediate progress. */
5102                 if (filecnt > 100) {
5103                         int progress;
5104
5105                         progress = (i * 100) / filecnt;
5106                         if (progress > (lastprogress + 9)) {
5107                                 printf("hfs_reclaimspace: %d%% done...\n", progress);
5108                                 lastprogress = progress;
5109                         }
5110                 }
5111         }
5112         if (vp) {
5113                 hfs_unlock(VTOC(vp));
5114                 vnode_put(vp);
5115                 vp = NULL;
5116         }
5117         if (hfsmp->hfs_resize_filesmoved != 0) {
5118                 printf("hfs_reclaimspace: relocated %u blocks from %d files on \"%s\"\n",
5119                         total_blks_moved, (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN);
5120         }
5121 out:
5122         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
5123         kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize);
5124
5125         /*
5126          * Restore the roving allocation pointer on errors.
5127          * (but only if we didn't move any files)
5128          */
5129         if (error && hfsmp->hfs_resize_filesmoved == 0) {
5130                 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, saved_next_allocation);
5131         }
5132         return (error);
5133 }
5134
5135
5136 /*
5137  * Check if there are any overflow data or resource fork extents that overlap
5138  * into the disk space that is being reclaimed.
5139  *
5140  * Output -
5141  *      1 - One of the overflow extents need to be relocated
5142  *      0 - No overflow extents need to be relocated, or there was an error
5143  */
5144 static int
5145 hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID)
5146 {
5147         struct BTreeIterator * iterator = NULL;
5148         struct FSBufferDescriptor btdata;
5149         HFSPlusExtentRecord extrec;
5150         HFSPlusExtentKey *extkeyptr;
5151         FCB *fcb;
5152         int overlapped = 0;
5153         int i;
5154         int error;
5155
5156         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
5157                 return 0;
5158         }
5159         bzero(iterator, sizeof(*iterator));
5160         extkeyptr = (HFSPlusExtentKey *)&iterator->key;
5161         extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
5162         extkeyptr->forkType = 0;
5163         extkeyptr->fileID = fileID;
5164         extkeyptr->startBlock = 0;
5165
5166         btdata.bufferAddress = &extrec;
5167         btdata.itemSize = sizeof(extrec);
5168         btdata.itemCount = 1;
5169
5170         fcb = VTOF(hfsmp->hfs_extents_vp);
5171
5172         /* This will position the iterator just before the first overflow
5173          * extent record for given fileID.  It will always return btNotFound,
5174          * so we special case the error code.
5175          */
5176         error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
5177         if (error && (error != btNotFound)) {
5178                 goto out;
5179         }
5180
5181         /* BTIterateRecord() might return error if the btree is empty, and
5182          * therefore we return that the extent does not overflow to the caller
5183          */
5184         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
5185         while (error == 0) {
5186                 /* Stop when we encounter a different file. */
5187                 if (extkeyptr->fileID != fileID) {
5188                         break;
5189                 }
5190                 /* Check if any of the forks exist in the target space. */
5191                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5192                         if (extrec[i].blockCount == 0) {
5193                                 break;
5194                         }
5195                         if ((extrec[i].startBlock + extrec[i].blockCount) >= startblk) {
5196                                 overlapped = 1;
5197                                 goto out;
5198                         }
5199                 }
5200                 /* Look for more records. */
5201                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
5202         }
5203
5204 out:
5205         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
5206         return overlapped;
5207 }
5208
5209
5210 /*
5211  * Calculate the progress of a file system resize operation.
5212  */
5213 __private_extern__
5214 int
5215 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
5216 {
5217         if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
5218                 return (ENXIO);
5219         }
5220
5221         if (hfsmp->hfs_resize_totalfiles > 0)
5222                 *progress = (hfsmp->hfs_resize_filesmoved * 100) / hfsmp->hfs_resize_totalfiles;
5223         else
5224                 *progress = 0;
5225
5226         return (0);
5227 }
5228
5229
5230 /*
5231  * Creates a UUID from a unique "name" in the HFS UUID Name space.
5232  * See version 3 UUID.
5233  */
5234 static void
5235 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
5236 {
5237         MD5_CTX  md5c;
5238         uint8_t  rawUUID[8];
5239
5240         ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
5241         ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
5242
5243         MD5Init( &md5c );
5244         MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
5245         MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
5246         MD5Final( result, &md5c );
5247
5248         result[6] = 0x30 | ( result[6] & 0x0F );
5249         result[8] = 0x80 | ( result[8] & 0x3F );
5250 }
5251
5252 /*
5253  * Get file system attributes.
5254  */
5255 static int
5256 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
5257 {
5258 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
5259 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
5260
5261         ExtendedVCB *vcb = VFSTOVCB(mp);
5262         struct hfsmount *hfsmp = VFSTOHFS(mp);
5263         u_int32_t freeCNIDs;
5264
5265         freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
5266
5267         VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
5268         VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
5269         VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
5270         VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
5271         VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
5272         VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
5273         VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
5274         VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
5275         VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
5276         /* XXX needs clarification */
5277         VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
5278         /* Maximum files is constrained by total blocks. */
5279         VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
5280         VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
5281
5282         fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
5283         fsap->f_fsid.val[1] = vfs_typenum(mp);
5284         VFSATTR_SET_SUPPORTED(fsap, f_fsid);
5285
5286         VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
5287         VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
5288
5289         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
5290                 vol_capabilities_attr_t *cap;
5291
5292                 cap = &fsap->f_capabilities;
5293
5294                 if (hfsmp->hfs_flags & HFS_STANDARD) {
5295                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
5296                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
5297                                 VOL_CAP_FMT_CASE_PRESERVING |
5298                                 VOL_CAP_FMT_FAST_STATFS |
5299                                 VOL_CAP_FMT_HIDDEN_FILES |
5300                                 VOL_CAP_FMT_PATH_FROM_ID;
5301                 } else {
5302                         cap->capabilities[VOL_CAPABILITIES_FORMAT] =
5303                                 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
5304                                 VOL_CAP_FMT_SYMBOLICLINKS |
5305                                 VOL_CAP_FMT_HARDLINKS |
5306                                 VOL_CAP_FMT_JOURNAL |
5307                                 VOL_CAP_FMT_ZERO_RUNS |
5308                                 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
5309                                 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
5310                                 VOL_CAP_FMT_CASE_PRESERVING |
5311                                 VOL_CAP_FMT_FAST_STATFS |
5312                                 VOL_CAP_FMT_2TB_FILESIZE |
5313                                 VOL_CAP_FMT_HIDDEN_FILES |
5314 #if HFS_COMPRESSION
5315                                 VOL_CAP_FMT_PATH_FROM_ID |
5316                                 VOL_CAP_FMT_DECMPFS_COMPRESSION;
5317 #else
5318                                 VOL_CAP_FMT_PATH_FROM_ID;
5319 #endif
5320                 }
5321                 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
5322                         VOL_CAP_INT_SEARCHFS |
5323                         VOL_CAP_INT_ATTRLIST |
5324                         VOL_CAP_INT_NFSEXPORT |
5325                         VOL_CAP_INT_READDIRATTR |
5326                         VOL_CAP_INT_EXCHANGEDATA |
5327                         VOL_CAP_INT_ALLOCATE |
5328                         VOL_CAP_INT_VOL_RENAME |
5329                         VOL_CAP_INT_ADVLOCK |
5330                         VOL_CAP_INT_FLOCK |
5331 #if NAMEDSTREAMS
5332                         VOL_CAP_INT_EXTENDED_ATTR |
5333                         VOL_CAP_INT_NAMEDSTREAMS;
5334 #else
5335                         VOL_CAP_INT_EXTENDED_ATTR;
5336 #endif
5337                 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
5338                 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
5339
5340                 cap->valid[VOL_CAPABILITIES_FORMAT] =
5341                         VOL_CAP_FMT_PERSISTENTOBJECTIDS |
5342                         VOL_CAP_FMT_SYMBOLICLINKS |
5343                         VOL_CAP_FMT_HARDLINKS |
5344                         VOL_CAP_FMT_JOURNAL |
5345                         VOL_CAP_FMT_JOURNAL_ACTIVE |
5346                         VOL_CAP_FMT_NO_ROOT_TIMES |
5347                         VOL_CAP_FMT_SPARSE_FILES |
5348                         VOL_CAP_FMT_ZERO_RUNS |
5349                         VOL_CAP_FMT_CASE_SENSITIVE |
5350                         VOL_CAP_FMT_CASE_PRESERVING |
5351                         VOL_CAP_FMT_FAST_STATFS |
5352                         VOL_CAP_FMT_2TB_FILESIZE |
5353                         VOL_CAP_FMT_OPENDENYMODES |
5354                         VOL_CAP_FMT_HIDDEN_FILES |
5355 #if HFS_COMPRESSION
5356                         VOL_CAP_FMT_PATH_FROM_ID |
5357                         VOL_CAP_FMT_DECMPFS_COMPRESSION;
5358 #else
5359                         VOL_CAP_FMT_PATH_FROM_ID;
5360 #endif
5361                 cap->valid[VOL_CAPABILITIES_INTERFACES] =
5362                         VOL_CAP_INT_SEARCHFS |
5363                         VOL_CAP_INT_ATTRLIST |
5364                         VOL_CAP_INT_NFSEXPORT |
5365                         VOL_CAP_INT_READDIRATTR |
5366                         VOL_CAP_INT_EXCHANGEDATA |
5367                         VOL_CAP_INT_COPYFILE |
5368                         VOL_CAP_INT_ALLOCATE |
5369                         VOL_CAP_INT_VOL_RENAME |
5370                         VOL_CAP_INT_ADVLOCK |
5371                         VOL_CAP_INT_FLOCK |
5372                         VOL_CAP_INT_MANLOCK |
5373 #if NAMEDSTREAMS
5374                         VOL_CAP_INT_EXTENDED_ATTR |
5375                         VOL_CAP_INT_NAMEDSTREAMS;
5376 #else
5377                         VOL_CAP_INT_EXTENDED_ATTR;
5378 #endif
5379                 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
5380                 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
5381                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
5382         }
5383         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
5384                 vol_attributes_attr_t *attrp = &fsap->f_attributes;
5385
5386                 attrp->validattr.commonattr = HFS_ATTR_CMN_VALIDMASK;
5387                 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
5388                 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
5389                 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
5390                 attrp->validattr.forkattr = 0;
5391
5392                 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VALIDMASK;
5393                 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
5394                 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
5395                 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
5396                 attrp->nativeattr.forkattr = 0;
5397                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
5398         }
5399         fsap->f_create_time.tv_sec = hfsmp->vcbCrDate;
5400         fsap->f_create_time.tv_nsec = 0;
5401         VFSATTR_SET_SUPPORTED(fsap, f_create_time);
5402         fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
5403         fsap->f_modify_time.tv_nsec = 0;
5404         VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
5405
5406         fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
5407         fsap->f_backup_time.tv_nsec = 0;
5408         VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
5409         if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
5410                 u_int16_t subtype = 0;
5411
5412                 /*
5413                  * Subtypes (flavors) for HFS
5414                  *   0:   Mac OS Extended
5415                  *   1:   Mac OS Extended (Journaled)
5416                  *   2:   Mac OS Extended (Case Sensitive)
5417                  *   3:   Mac OS Extended (Case Sensitive, Journaled)
5418                  *   4 - 127:   Reserved
5419                  * 128:   Mac OS Standard
5420                  *
5421                  */
5422                 if (hfsmp->hfs_flags & HFS_STANDARD) {
5423                         subtype = HFS_SUBTYPE_STANDARDHFS;
5424                 } else /* HFS Plus */ {
5425                         if (hfsmp->jnl)
5426                                 subtype |= HFS_SUBTYPE_JOURNALED;
5427                         if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
5428                                 subtype |= HFS_SUBTYPE_CASESENSITIVE;
5429                 }
5430                 fsap->f_fssubtype = subtype;
5431                 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
5432         }
5433
5434         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
5435                 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
5436                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
5437         }
5438         if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
5439                 hfs_getvoluuid(hfsmp, fsap->f_uuid);
5440                 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
5441         }
5442         return (0);
5443 }
5444
5445 /*
5446  * Perform a volume rename.  Requires the FS' root vp.
5447  */
5448 static int
5449 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
5450 {
5451         ExtendedVCB *vcb = VTOVCB(vp);
5452         struct cnode *cp = VTOC(vp);
5453         struct hfsmount *hfsmp = VTOHFS(vp);
5454         struct cat_desc to_desc;
5455         struct cat_desc todir_desc;
5456         struct cat_desc new_desc;
5457         cat_cookie_t cookie;
5458         int lockflags;
5459         int error = 0;
5460
5461         /*
5462          * Ignore attempts to rename a volume to a zero-length name.
5463          */
5464         if (name[0] == 0)
5465                 return(0);
5466
5467         bzero(&to_desc, sizeof(to_desc));
5468         bzero(&todir_desc, sizeof(todir_desc));
5469         bzero(&new_desc, sizeof(new_desc));
5470         bzero(&cookie, sizeof(cookie));
5471
5472         todir_desc.cd_parentcnid = kHFSRootParentID;
5473         todir_desc.cd_cnid = kHFSRootFolderID;
5474         todir_desc.cd_flags = CD_ISDIR;
5475
5476         to_desc.cd_nameptr = (const u_int8_t *)name;
5477         to_desc.cd_namelen = strlen(name);
5478         to_desc.cd_parentcnid = kHFSRootParentID;
5479         to_desc.cd_cnid = cp->c_cnid;
5480         to_desc.cd_flags = CD_ISDIR;
5481
5482         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
5483                 if ((error = hfs_start_transaction(hfsmp)) == 0) {
5484                         if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
5485                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
5486
5487                                 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
5488
5489                                 /*
5490                                  * If successful, update the name in the VCB, ensure it's terminated.
5491                                  */
5492                                 if (!error) {
5493                                         strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
5494                                 }
5495
5496                                 hfs_systemfile_unlock(hfsmp, lockflags);
5497                                 cat_postflight(hfsmp, &cookie, p);
5498
5499                                 if (error)
5500                                         MarkVCBDirty(vcb);
5501                                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
5502                         }
5503                         hfs_end_transaction(hfsmp);
5504                 }
5505                 if (!error) {
5506                         /* Release old allocated name buffer */
5507                         if (cp->c_desc.cd_flags & CD_HASBUF) {
5508                                 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
5509
5510                                 cp->c_desc.cd_nameptr = 0;
5511                                 cp->c_desc.cd_namelen = 0;
5512                                 cp->c_desc.cd_flags &= ~CD_HASBUF;
5513                                 vfs_removename(tmp_name);
5514                         }
5515                         /* Update cnode's catalog descriptor */
5516                         replace_desc(cp, &new_desc);
5517                         vcb->volumeNameEncodingHint = new_desc.cd_encoding;
5518                         cp->c_touch_chgtime = TRUE;
5519                 }
5520
5521                 hfs_unlock(cp);
5522         }
5523
5524         return(error);
5525 }
5526
5527 /*
5528  * Get file system attributes.
5529  */
5530 static int
5531 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
5532 {
5533         kauth_cred_t cred = vfs_context_ucred(context);
5534         int error = 0;
5535
5536         /*
5537          * Must be superuser or owner of filesystem to change volume attributes
5538          */
5539         if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
5540                 return(EACCES);
5541
5542         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
5543                 vnode_t root_vp;
5544
5545                 error = hfs_vfs_root(mp, &root_vp, context);
5546                 if (error)
5547                         goto out;
5548
5549                 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
5550                 (void) vnode_put(root_vp);
5551                 if (error)
5552                         goto out;
5553
5554                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
5555         }
5556
5557 out:
5558         return error;
5559 }
5560
5561 /* If a runtime corruption is detected, set the volume inconsistent
5562  * bit in the volume attributes.  The volume inconsistent bit is a persistent
5563  * bit which represents that the volume is corrupt and needs repair.
5564  * The volume inconsistent bit can be set from the kernel when it detects
5565  * runtime corruption or from file system repair utilities like fsck_hfs when
5566  * a repair operation fails.  The bit should be cleared only from file system
5567  * verify/repair utility like fsck_hfs when a verify/repair succeeds.
5568  */
5569 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
5570 {
5571         HFS_MOUNT_LOCK(hfsmp, TRUE);
5572         if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
5573                 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
5574                 MarkVCBDirty(hfsmp);
5575         }
5576         if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
5577                 /* Log information to ASL log */
5578                 fslog_fs_corrupt(hfsmp->hfs_mp);
5579                 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
5580         }
5581         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
5582 }
5583
5584 /* Replay the journal on the device node provided.  Returns zero if
5585  * journal replay succeeded or no journal was supposed to be replayed.
5586  */
5587 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
5588 {
5589         int retval = 0;
5590         struct mount *mp = NULL;
5591         struct hfs_mount_args *args = NULL;
5592
5593         /* Replay allowed only on raw devices */
5594         if (!vnode_ischr(devvp)) {
5595                 retval = EINVAL;
5596                 goto out;
5597         }
5598
5599         /* Create dummy mount structures */
5600         MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
5601         if (mp == NULL) {
5602                 retval = ENOMEM;
5603                 goto out;
5604         }
5605         bzero(mp, sizeof(struct mount));
5606         mount_lock_init(mp);
5607
5608         MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
5609         if (args == NULL) {
5610                 retval = ENOMEM;
5611                 goto out;
5612         }
5613         bzero(args, sizeof(struct hfs_mount_args));
5614
5615         retval = hfs_mountfs(devvp, mp, args, 1, context);
5616         buf_flushdirtyblks(devvp, MNT_WAIT, 0, "hfs_journal_replay");
5617
5618 out:
5619         if (mp) {
5620                 mount_lock_destroy(mp);
5621                 FREE(mp, M_TEMP);
5622         }
5623         if (args) {
5624                 FREE(args, M_TEMP);
5625         }
5626         return retval;
5627 }
5628
5629 /*
5630  * hfs vfs operations.
5631  */
5632 struct vfsops hfs_vfsops = {
5633         hfs_mount,
5634         hfs_start,
5635         hfs_unmount,
5636         hfs_vfs_root,
5637         hfs_quotactl,
5638         hfs_vfs_getattr,        /* was hfs_statfs */
5639         hfs_sync,
5640         hfs_vfs_vget,
5641         hfs_fhtovp,
5642         hfs_vptofh,
5643         hfs_init,
5644         hfs_sysctl,
5645         hfs_vfs_setattr,
5646         {NULL}
5647 };