]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_vfsops.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
CommitLineData
1c79356b 1/*
13f56ec4 2 * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
9bccf70c 68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
1c79356b
A
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
1c79356b
A
72 */
73#include <sys/param.h>
74#include <sys/systm.h>
91447636 75#include <sys/kauth.h>
1c79356b
A
76
77#include <sys/ubc.h>
cf7d32b8 78#include <sys/ubc_internal.h>
91447636
A
79#include <sys/vnode_internal.h>
80#include <sys/mount_internal.h>
55e303ae 81#include <sys/sysctl.h>
1c79356b
A
82#include <sys/malloc.h>
83#include <sys/stat.h>
9bccf70c
A
84#include <sys/quota.h>
85#include <sys/disk.h>
55e303ae
A
86#include <sys/paths.h>
87#include <sys/utfconv.h>
91447636 88#include <sys/kdebug.h>
2d21ac55 89#include <sys/fslog.h>
6d2010ae 90#include <sys/ubc.h>
316670eb 91#include <sys/buf_internal.h>
91447636
A
92
93#include <kern/locks.h>
9bccf70c 94
b4c24cb9
A
95#include <vfs/vfs_journal.h>
96
1c79356b
A
97#include <miscfs/specfs/specdev.h>
98#include <hfs/hfs_mount.h>
99
b0d623f7
A
100#include <libkern/crypto/md5.h>
101#include <uuid/uuid.h>
102
1c79356b 103#include "hfs.h"
9bccf70c
A
104#include "hfs_catalog.h"
105#include "hfs_cnode.h"
1c79356b
A
106#include "hfs_dbg.h"
107#include "hfs_endian.h"
91447636 108#include "hfs_hotfiles.h"
9bccf70c 109#include "hfs_quota.h"
13f56ec4 110#include "hfs_btreeio.h"
1c79356b
A
111
112#include "hfscommon/headers/FileMgrInternal.h"
113#include "hfscommon/headers/BTreesInternal.h"
114
6d2010ae
A
115#if CONFIG_PROTECT
116#include <sys/cprotect.h>
117#endif
118
119#if CONFIG_HFS_ALLOC_RBTREE
120#include "hfscommon/headers/HybridAllocator.h"
121#endif
122
123#define HFS_MOUNT_DEBUG 1
124
1c79356b
A
125#if HFS_DIAGNOSTIC
126int hfs_dbg_all = 0;
1c79356b 127int hfs_dbg_err = 0;
1c79356b
A
128#endif
129
b7266188
A
130/* Enable/disable debugging code for live volume resizing */
131int hfs_resize_debug = 0;
d52fe63f 132
91447636
A
133lck_grp_attr_t * hfs_group_attr;
134lck_attr_t * hfs_lock_attr;
135lck_grp_t * hfs_mutex_group;
136lck_grp_t * hfs_rwlock_group;
6d2010ae 137lck_grp_t * hfs_spinlock_group;
91447636 138
1c79356b 139extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
b0d623f7 140extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
935ed37a 141
b0d623f7
A
142/* not static so we can re-use in hfs_readwrite.c for build_path calls */
143int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
1c79356b 144
91447636
A
145static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
146static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
147static int hfs_flushfiles(struct mount *, int, struct proc *);
148static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
149static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
150static int hfs_init(struct vfsconf *vfsp);
91447636
A
151static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
152static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
153static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
91447636 154static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
6d2010ae 155static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
b0d623f7 156static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
6d2010ae 157static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
316670eb 158static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
1c79356b 159
6d2010ae
A
160void hfs_initialize_allocator (struct hfsmount *hfsmp);
161int hfs_teardown_allocator (struct hfsmount *hfsmp);
316670eb 162void hfs_unmap_blocks (struct hfsmount *hfsmp);
6d2010ae
A
163
164int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
165int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
166int hfs_reload(struct mount *mp);
167int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
168int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
169int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
170 user_addr_t newp, size_t newlen, vfs_context_t context);
171int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
1c79356b
A
172
173/*
174 * Called by vfs_mountroot when mounting HFS Plus as root.
175 */
91447636 176
1c79356b 177int
91447636 178hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
1c79356b 179{
1c79356b 180 struct hfsmount *hfsmp;
9bccf70c 181 ExtendedVCB *vcb;
91447636 182 struct vfsstatfs *vfsp;
1c79356b 183 int error;
2d21ac55 184
6d2010ae
A
185 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
186 if (HFS_MOUNT_DEBUG) {
187 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
188 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
189 }
1c79356b 190 return (error);
6d2010ae 191 }
55e303ae 192
1c79356b
A
193 /* Init hfsmp */
194 hfsmp = VFSTOHFS(mp);
195
0b4e3aa0
A
196 hfsmp->hfs_uid = UNKNOWNUID;
197 hfsmp->hfs_gid = UNKNOWNGID;
198 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
199 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
1c79356b 200
9bccf70c
A
201 /* Establish the free block reserve. */
202 vcb = HFSTOVCB(hfsmp);
203 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
204 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
205
91447636
A
206 vfsp = vfs_statfs(mp);
207 (void)hfs_statfs(mp, vfsp, NULL);
208
1c79356b
A
209 return (0);
210}
211
212
213/*
214 * VFS Operations.
215 *
216 * mount system call
217 */
218
6d2010ae 219int
91447636 220hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
1c79356b 221{
91447636 222 struct proc *p = vfs_context_proc(context);
1c79356b 223 struct hfsmount *hfsmp = NULL;
1c79356b 224 struct hfs_mount_args args;
1c79356b 225 int retval = E_NONE;
2d21ac55 226 u_int32_t cmdflags;
1c79356b 227
91447636 228 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
6d2010ae
A
229 if (HFS_MOUNT_DEBUG) {
230 printf("hfs_mount: copyin returned %d for fs\n", retval);
231 }
91447636
A
232 return (retval);
233 }
2d21ac55 234 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
91447636 235 if (cmdflags & MNT_UPDATE) {
1c79356b 236 hfsmp = VFSTOHFS(mp);
91447636
A
237
238 /* Reload incore data after an fsck. */
239 if (cmdflags & MNT_RELOAD) {
6d2010ae
A
240 if (vfs_isrdonly(mp)) {
241 int error = hfs_reload(mp);
242 if (error && HFS_MOUNT_DEBUG) {
243 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
244 }
245 return error;
246 }
247 else {
248 if (HFS_MOUNT_DEBUG) {
249 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
250 }
91447636 251 return (EINVAL);
6d2010ae 252 }
91447636
A
253 }
254
255 /* Change to a read-only file system. */
55e303ae 256 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
91447636
A
257 vfs_isrdonly(mp)) {
258 int flags;
259
c910b4d9
A
260 /* Set flag to indicate that a downgrade to read-only
261 * is in progress and therefore block any further
262 * modifications to the file system.
263 */
6d2010ae 264 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
c910b4d9
A
265 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
266 hfsmp->hfs_downgrading_proc = current_thread();
6d2010ae 267 hfs_unlock_global (hfsmp);
c910b4d9 268
1c79356b 269 /* use VFS_SYNC to push out System (btree) files */
91447636 270 retval = VFS_SYNC(mp, MNT_WAIT, context);
c910b4d9
A
271 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
272 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
273 hfsmp->hfs_downgrading_proc = NULL;
6d2010ae
A
274 if (HFS_MOUNT_DEBUG) {
275 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
276 }
91447636 277 goto out;
c910b4d9 278 }
1c79356b
A
279
280 flags = WRITECLOSE;
91447636 281 if (cmdflags & MNT_FORCE)
1c79356b
A
282 flags |= FORCECLOSE;
283
c910b4d9
A
284 if ((retval = hfs_flushfiles(mp, flags, p))) {
285 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
286 hfsmp->hfs_downgrading_proc = NULL;
6d2010ae
A
287 if (HFS_MOUNT_DEBUG) {
288 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
289 }
91447636 290 goto out;
c910b4d9 291 }
593a1d5f
A
292
293 /* mark the volume cleanly unmounted */
294 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
9bccf70c 295 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
593a1d5f 296 hfsmp->hfs_flags |= HFS_READ_ONLY;
1c79356b 297
316670eb
A
298 /*
299 * Close down the journal.
300 *
301 * NOTE: It is critically important to close down the journal
302 * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
303 * In a journaled environment it is expected that the journal be
304 * the only actor permitted to issue I/O for metadata blocks in HFS.
305 * If we were to call VNOP_FSYNC prior to closing down the journal,
306 * we would inadvertantly issue (and wait for) the I/O we just
307 * initiated above as part of the flushvolumeheader call.
308 *
309 * To avoid this, we follow the same order of operations as in
310 * unmount and issue the journal_close prior to calling VNOP_FSYNC.
311 */
312
313 if (hfsmp->jnl) {
314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
315
316 journal_close(hfsmp->jnl);
317 hfsmp->jnl = NULL;
318
319 // Note: we explicitly don't want to shutdown
320 // access to the jvp because we may need
321 // it later if we go back to being read-write.
322
323 hfs_unlock_global (hfsmp);
324 }
325
326
327 /*
328 * Write out any pending I/O still outstanding against the device node
329 * now that the journal has been closed.
330 */
91447636
A
331 if (!retval) {
332 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
333 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
334 } else {
335 vnode_get(hfsmp->hfs_devvp);
336 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
337 vnode_put(hfsmp->hfs_devvp);
338 }
339 }
316670eb 340
1c79356b 341 if (retval) {
6d2010ae
A
342 if (HFS_MOUNT_DEBUG) {
343 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
344 }
c910b4d9
A
345 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
346 hfsmp->hfs_downgrading_proc = NULL;
55e303ae 347 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
91447636 348 goto out;
1c79356b 349 }
316670eb 350
6d2010ae
A
351#if CONFIG_HFS_ALLOC_RBTREE
352 (void) hfs_teardown_allocator(hfsmp);
353#endif
c910b4d9 354 hfsmp->hfs_downgrading_proc = NULL;
1c79356b
A
355 }
356
91447636
A
357 /* Change to a writable file system. */
358 if (vfs_iswriteupgrade(mp)) {
6d2010ae
A
359#if CONFIG_HFS_ALLOC_RBTREE
360 thread_t allocator_thread;
361#endif
2d21ac55
A
362
363 /*
364 * On inconsistent disks, do not allow read-write mount
365 * unless it is the boot volume being mounted.
366 */
367 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
368 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
6d2010ae
A
369 if (HFS_MOUNT_DEBUG) {
370 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
371 }
2d21ac55
A
372 retval = EINVAL;
373 goto out;
374 }
375
55e303ae
A
376 // If the journal was shut-down previously because we were
377 // asked to be read-only, let's start it back up again now
378
379 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
380 && hfsmp->jnl == NULL
381 && hfsmp->jvp != NULL) {
91447636 382 int jflags;
55e303ae
A
383
384 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
91447636 385 jflags = JOURNAL_RESET;
6d2010ae 386 } else {
91447636 387 jflags = 0;
6d2010ae
A
388 }
389
390 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
391
392 hfsmp->jnl = journal_open(hfsmp->jvp,
393 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
394 hfsmp->jnl_size,
395 hfsmp->hfs_devvp,
396 hfsmp->hfs_logical_block_size,
397 jflags,
398 0,
399 hfs_sync_metadata, hfsmp->hfs_mp);
400
401 /*
402 * Set up the trim callback function so that we can add
403 * recently freed extents to the free extent cache once
404 * the transaction that freed them is written to the
405 * journal on disk.
406 */
407 if (hfsmp->jnl)
408 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
409
410 hfs_unlock_global (hfsmp);
411
412 if (hfsmp->jnl == NULL) {
413 if (HFS_MOUNT_DEBUG) {
414 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
415 }
416 retval = EINVAL;
417 goto out;
418 } else {
419 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
420 }
55e303ae
A
421
422 }
423
b0d623f7
A
424 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
425 retval = hfs_erase_unused_nodes(hfsmp);
6d2010ae
A
426 if (retval != E_NONE) {
427 if (HFS_MOUNT_DEBUG) {
428 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
429 }
b0d623f7 430 goto out;
6d2010ae 431 }
1c79356b 432
c910b4d9
A
433 /* If this mount point was downgraded from read-write
434 * to read-only, clear that information as we are now
435 * moving back to read-write.
436 */
437 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
438 hfsmp->hfs_downgrading_proc = NULL;
439
593a1d5f
A
440 /* mark the volume dirty (clear clean unmount bit) */
441 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
442
443 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
6d2010ae
A
444 if (retval != E_NONE) {
445 if (HFS_MOUNT_DEBUG) {
446 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
447 }
593a1d5f 448 goto out;
6d2010ae
A
449 }
450
451 /* Only clear HFS_READ_ONLY after a successful write */
452 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
453
593a1d5f
A
454
455 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
2d21ac55
A
456 /* Setup private/hidden directories for hardlinks. */
457 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
458 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
459
b4c24cb9 460 hfs_remove_orphans(hfsmp);
91447636
A
461
462 /*
463 * Allow hot file clustering if conditions allow.
464 */
6d2010ae 465 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
316670eb 466 ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
91447636
A
467 (void) hfs_recording_init(hfsmp);
468 }
2d21ac55
A
469 /* Force ACLs on HFS+ file systems. */
470 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
471 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
472 }
55e303ae 473 }
6d2010ae
A
474
475#if CONFIG_HFS_ALLOC_RBTREE
476 /*
477 * Like the normal mount case, we need to handle creation of the allocation red-black tree
478 * if we're upgrading from read-only to read-write.
479 *
480 * We spawn a thread to create the pair of red-black trees for this volume.
481 * However, in so doing, we must be careful to ensure that if this thread is still
482 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
483 * we'll need to set a bit that indicates we're in progress building the trees here.
484 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
485 * notifies the tree generation code that an unmount is waiting. Also, mark the extent
486 * tree flags that the allocator is enabled for use before we spawn the thread that will start
487 * scanning the RB tree.
488 *
489 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
490 * which has not previously encountered a bad error on the red-black tree code. Also, don't
491 * try to re-build a tree that already exists.
316670eb
A
492 *
493 * When this is enabled, we must re-integrate the above function into our bitmap iteration
494 * so that we accurately send TRIMs down to the underlying disk device as needed.
6d2010ae
A
495 */
496
497 if (hfsmp->extent_tree_flags == 0) {
498 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
499 /* Initialize EOF counter so that the thread can assume it started at initial values */
500 hfsmp->offset_block_end = 0;
501
502 InitTree(hfsmp);
503
504 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
505 thread_deallocate(allocator_thread);
506 }
507
508#endif
1c79356b
A
509 }
510
91447636
A
511 /* Update file system parameters. */
512 retval = hfs_changefs(mp, &args);
6d2010ae
A
513 if (retval && HFS_MOUNT_DEBUG) {
514 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
515 }
1c79356b 516
91447636 517 } else /* not an update request */ {
1c79356b 518
91447636 519 /* Set the mount flag to indicate that we support volfs */
2d21ac55 520 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
1c79356b 521
2d21ac55 522 retval = hfs_mountfs(devvp, mp, &args, 0, context);
6d2010ae
A
523 if (retval && HFS_MOUNT_DEBUG) {
524 printf("hfs_mount: hfs_mountfs returned %d\n", retval);
525 }
526#if CONFIG_PROTECT
527 /*
528 * If above mount call was successful, and this mount is content protection
529 * enabled, then verify the on-disk EA on the root to ensure that the filesystem
530 * is of a suitable vintage to allow the mount to proceed.
531 */
532 if ((retval == 0) && (cp_fs_protected (mp))) {
533 int err = 0;
316670eb
A
534
535 struct cp_root_xattr *xattr = NULL;
536 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
537 if (xattr == NULL) {
538 err = ENOMEM;
539 goto badalloc;
540 }
541 bzero (xattr, sizeof(struct cp_root_xattr));
6d2010ae
A
542 hfsmp = vfs_fsprivate(mp);
543
544 /* go get the EA to get the version information */
316670eb
A
545 err = cp_getrootxattr (hfsmp, xattr);
546 /*
547 * If there was no EA there, then write one out.
548 * Assuming EA is not present on the root means
549 * this is an erase install or a very old FS
550 */
6d2010ae 551 if (err == ENOATTR) {
316670eb
A
552 printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
553 bzero(xattr, sizeof(struct cp_root_xattr));
554 xattr->major_version = CP_NEW_MAJOR_VERS;
555 xattr->minor_version = CP_MINOR_VERS;
556 xattr->flags = 0;
557
558 err = cp_setrootxattr (hfsmp, xattr);
559 }
6d2010ae 560
6d2010ae
A
561 /*
562 * For any other error, including having an out of date CP version in the
563 * EA, or for an error out of cp_setrootxattr, deny the mount
564 * and do not proceed further.
565 */
316670eb 566 if (err || (xattr->major_version != CP_NEW_MAJOR_VERS && xattr->major_version != CP_PREV_MAJOR_VERS)) {
6d2010ae
A
567 /* Deny the mount and tear down. */
568 retval = EPERM;
569 (void) hfs_unmount (mp, MNT_FORCE, context);
316670eb
A
570 }
571 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
572badalloc:
573 if(xattr) {
574 FREE(xattr, M_TEMP);
575 }
576 }
6d2010ae 577#endif
1c79356b 578 }
91447636
A
579out:
580 if (retval == 0) {
581 (void)hfs_statfs(mp, vfs_statfs(mp), context);
1c79356b 582 }
91447636
A
583 return (retval);
584}
1c79356b 585
1c79356b 586
91447636
A
587struct hfs_changefs_cargs {
588 struct hfsmount *hfsmp;
589 int namefix;
590 int permfix;
591 int permswitch;
592};
1c79356b 593
91447636
A
594static int
595hfs_changefs_callback(struct vnode *vp, void *cargs)
596{
597 ExtendedVCB *vcb;
598 struct cnode *cp;
599 struct cat_desc cndesc;
600 struct cat_attr cnattr;
601 struct hfs_changefs_cargs *args;
935ed37a
A
602 int lockflags;
603 int error;
1c79356b 604
91447636 605 args = (struct hfs_changefs_cargs *)cargs;
b4c24cb9 606
91447636
A
607 cp = VTOC(vp);
608 vcb = HFSTOVCB(args->hfsmp);
1c79356b 609
935ed37a
A
610 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
611 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
612 hfs_systemfile_unlock(args->hfsmp, lockflags);
613 if (error) {
91447636
A
614 /*
615 * If we couldn't find this guy skip to the next one
616 */
617 if (args->namefix)
618 cache_purge(vp);
1c79356b 619
91447636
A
620 return (VNODE_RETURNED);
621 }
622 /*
623 * Get the real uid/gid and perm mask from disk.
624 */
625 if (args->permswitch || args->permfix) {
626 cp->c_uid = cnattr.ca_uid;
627 cp->c_gid = cnattr.ca_gid;
628 cp->c_mode = cnattr.ca_mode;
629 }
630 /*
631 * If we're switching name converters then...
632 * Remove the existing entry from the namei cache.
633 * Update name to one based on new encoder.
634 */
635 if (args->namefix) {
636 cache_purge(vp);
637 replace_desc(cp, &cndesc);
1c79356b 638
91447636 639 if (cndesc.cd_cnid == kHFSRootFolderID) {
2d21ac55 640 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
91447636
A
641 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
642 }
643 } else {
644 cat_releasedesc(&cndesc);
645 }
646 return (VNODE_RETURNED);
647}
1c79356b 648
9bccf70c
A
649/* Change fs mount parameters */
650static int
91447636 651hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
1c79356b 652{
9bccf70c 653 int retval = 0;
1c79356b
A
654 int namefix, permfix, permswitch;
655 struct hfsmount *hfsmp;
1c79356b 656 ExtendedVCB *vcb;
1c79356b
A
657 hfs_to_unicode_func_t get_unicode_func;
658 unicode_to_hfs_func_t get_hfsname_func;
b0d623f7 659 u_int32_t old_encoding = 0;
91447636 660 struct hfs_changefs_cargs cargs;
2d21ac55 661 u_int32_t mount_flags;
1c79356b
A
662
663 hfsmp = VFSTOHFS(mp);
664 vcb = HFSTOVCB(hfsmp);
91447636
A
665 mount_flags = (unsigned int)vfs_flags(mp);
666
593a1d5f
A
667 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
668
55e303ae 669 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
91447636 670 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
55e303ae 671 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
91447636 672 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
55e303ae 673
0b4e3aa0 674 /* The root filesystem must operate with actual permissions: */
91447636 675 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
2d21ac55 676 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
593a1d5f
A
677 retval = EINVAL;
678 goto exit;
55e303ae 679 }
91447636 680 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
55e303ae
A
681 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
682 else
683 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
684
685 namefix = permfix = 0;
1c79356b 686
2d21ac55
A
687 /*
688 * Tracking of hot files requires up-to-date access times. So if
689 * access time updates are disabled, we must also disable hot files.
690 */
691 if (mount_flags & MNT_NOATIME) {
692 (void) hfs_recording_suspend(hfsmp);
693 }
694
9bccf70c 695 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
1c79356b
A
696 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
697 gTimeZone = args->hfs_timezone;
698 }
699
9bccf70c 700 /* Change the default uid, gid and/or mask */
1c79356b
A
701 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
702 hfsmp->hfs_uid = args->hfs_uid;
91447636 703 if (vcb->vcbSigWord == kHFSPlusSigWord)
9bccf70c 704 ++permfix;
1c79356b
A
705 }
706 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
707 hfsmp->hfs_gid = args->hfs_gid;
91447636 708 if (vcb->vcbSigWord == kHFSPlusSigWord)
9bccf70c 709 ++permfix;
1c79356b
A
710 }
711 if (args->hfs_mask != (mode_t)VNOVAL) {
712 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
713 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
714 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
715 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
716 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
91447636 717 if (vcb->vcbSigWord == kHFSPlusSigWord)
9bccf70c 718 ++permfix;
1c79356b
A
719 }
720 }
721
9bccf70c 722 /* Change the hfs encoding value (hfs only) */
91447636 723 if ((vcb->vcbSigWord == kHFSSigWord) &&
b0d623f7 724 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
1c79356b
A
725 (hfsmp->hfs_encoding != args->hfs_encoding)) {
726
727 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
9bccf70c
A
728 if (retval)
729 goto exit;
1c79356b
A
730
731 /*
732 * Connect the new hfs_get_unicode converter but leave
733 * the old hfs_get_hfsname converter in place so that
734 * we can lookup existing vnodes to get their correctly
735 * encoded names.
736 *
737 * When we're all finished, we can then connect the new
738 * hfs_get_hfsname converter and release our interest
739 * in the old converters.
740 */
741 hfsmp->hfs_get_unicode = get_unicode_func;
9bccf70c
A
742 old_encoding = hfsmp->hfs_encoding;
743 hfsmp->hfs_encoding = args->hfs_encoding;
1c79356b
A
744 ++namefix;
745 }
746
9bccf70c
A
747 if (!(namefix || permfix || permswitch))
748 goto exit;
1c79356b 749
91447636
A
750 /* XXX 3762912 hack to support HFS filesystem 'owner' */
751 if (permfix)
752 vfs_setowner(mp,
753 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
754 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
755
1c79356b
A
756 /*
757 * For each active vnode fix things that changed
758 *
759 * Note that we can visit a vnode more than once
760 * and we can race with fsync.
91447636
A
761 *
762 * hfs_changefs_callback will be called for each vnode
763 * hung off of this mount point
935ed37a
A
764 *
765 * The vnode will be properly referenced and unreferenced
766 * around the callback
1c79356b 767 */
91447636
A
768 cargs.hfsmp = hfsmp;
769 cargs.namefix = namefix;
770 cargs.permfix = permfix;
771 cargs.permswitch = permswitch;
1c79356b 772
91447636 773 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
1c79356b 774
1c79356b
A
775 /*
776 * If we're switching name converters we can now
777 * connect the new hfs_get_hfsname converter and
778 * release our interest in the old converters.
779 */
780 if (namefix) {
1c79356b 781 hfsmp->hfs_get_hfsname = get_hfsname_func;
1c79356b 782 vcb->volumeNameEncodingHint = args->hfs_encoding;
1c79356b
A
783 (void) hfs_relconverter(old_encoding);
784 }
9bccf70c 785exit:
593a1d5f 786 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
1c79356b
A
787 return (retval);
788}
789
790
91447636
A
791struct hfs_reload_cargs {
792 struct hfsmount *hfsmp;
91447636
A
793 int error;
794};
795
796static int
797hfs_reload_callback(struct vnode *vp, void *cargs)
798{
799 struct cnode *cp;
800 struct hfs_reload_cargs *args;
935ed37a 801 int lockflags;
91447636
A
802
803 args = (struct hfs_reload_cargs *)cargs;
804 /*
805 * flush all the buffers associated with this node
806 */
807 (void) buf_invalidateblks(vp, 0, 0, 0);
808
809 cp = VTOC(vp);
810 /*
811 * Remove any directory hints
812 */
813 if (vnode_isdir(vp))
814 hfs_reldirhints(cp, 0);
815
816 /*
817 * Re-read cnode data for all active vnodes (non-metadata files).
818 */
6d2010ae 819 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
91447636
A
820 struct cat_fork *datafork;
821 struct cat_desc desc;
822
823 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
824
825 /* lookup by fileID since name could have changed */
935ed37a
A
826 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
827 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
828 hfs_systemfile_unlock(args->hfsmp, lockflags);
829 if (args->error) {
91447636 830 return (VNODE_RETURNED_DONE);
935ed37a 831 }
91447636
A
832
833 /* update cnode's catalog descriptor */
834 (void) replace_desc(cp, &desc);
835 }
836 return (VNODE_RETURNED);
837}
838
1c79356b
A
839/*
840 * Reload all incore data for a filesystem (used after running fsck on
841 * the root filesystem and finding things to fix). The filesystem must
842 * be mounted read-only.
843 *
844 * Things to do to update the mount:
9bccf70c
A
845 * invalidate all cached meta-data.
846 * invalidate all inactive vnodes.
847 * invalidate all cached file data.
848 * re-read volume header from disk.
849 * re-load meta-file info (extents, file size).
850 * re-load B-tree header data.
851 * re-read cnode data for all active vnodes.
1c79356b 852 */
6d2010ae 853int
2d21ac55 854hfs_reload(struct mount *mountp)
1c79356b 855{
91447636 856 register struct vnode *devvp;
1c79356b 857 struct buf *bp;
d52fe63f 858 int error, i;
1c79356b
A
859 struct hfsmount *hfsmp;
860 struct HFSPlusVolumeHeader *vhp;
861 ExtendedVCB *vcb;
9bccf70c
A
862 struct filefork *forkp;
863 struct cat_desc cndesc;
91447636 864 struct hfs_reload_cargs args;
593a1d5f 865 daddr64_t priIDSector;
1c79356b
A
866
867 hfsmp = VFSTOHFS(mountp);
868 vcb = HFSTOVCB(hfsmp);
869
870 if (vcb->vcbSigWord == kHFSSigWord)
871 return (EINVAL); /* rooting from HFS is not supported! */
872
873 /*
874 * Invalidate all cached meta-data.
875 */
876 devvp = hfsmp->hfs_devvp;
91447636 877 if (buf_invalidateblks(devvp, 0, 0, 0))
1c79356b 878 panic("hfs_reload: dirty1");
9bccf70c 879
91447636 880 args.hfsmp = hfsmp;
91447636
A
881 args.error = 0;
882 /*
883 * hfs_reload_callback will be called for each vnode
884 * hung off of this mount point that can't be recycled...
885 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
886 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
887 * properly referenced and unreferenced around the callback
888 */
91447636 889 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
9bccf70c 890
91447636
A
891 if (args.error)
892 return (args.error);
9bccf70c 893
1c79356b
A
894 /*
895 * Re-read VolumeHeader from disk.
896 */
593a1d5f
A
897 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
898 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
d52fe63f 899
91447636 900 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
593a1d5f
A
901 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
902 hfsmp->hfs_physical_block_size, NOCRED, &bp);
1c79356b
A
903 if (error) {
904 if (bp != NULL)
91447636 905 buf_brelse(bp);
1c79356b
A
906 return (error);
907 }
908
593a1d5f 909 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
1c79356b 910
9bccf70c 911 /* Do a quick sanity check */
55e303ae
A
912 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
913 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
914 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
915 SWAP_BE16(vhp->version) != kHFSXVersion) ||
9bccf70c 916 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
91447636 917 buf_brelse(bp);
9bccf70c 918 return (EIO);
1c79356b
A
919 }
920
9bccf70c 921 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
91447636 922 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
b4c24cb9 923 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
9bccf70c
A
924 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
925 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
926 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
927 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
928 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
929 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
2d21ac55 930 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
9bccf70c
A
931 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
932 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
1c79356b
A
933 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
934 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
935 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
936
937 /*
938 * Re-load meta-file vnode data (extent info, file size, etc).
939 */
9bccf70c
A
940 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
941 for (i = 0; i < kHFSPlusExtentDensity; i++) {
942 forkp->ff_extents[i].startBlock =
943 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
944 forkp->ff_extents[i].blockCount =
945 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
946 }
947 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
948 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
949 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
950
951
952 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
953 for (i = 0; i < kHFSPlusExtentDensity; i++) {
954 forkp->ff_extents[i].startBlock =
955 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
956 forkp->ff_extents[i].blockCount =
957 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
958 }
959 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
960 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
961 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
962
91447636
A
963 if (hfsmp->hfs_attribute_vp) {
964 forkp = VTOF(hfsmp->hfs_attribute_vp);
965 for (i = 0; i < kHFSPlusExtentDensity; i++) {
966 forkp->ff_extents[i].startBlock =
967 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
968 forkp->ff_extents[i].blockCount =
969 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
970 }
971 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
972 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
973 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
974 }
9bccf70c
A
975
976 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
977 for (i = 0; i < kHFSPlusExtentDensity; i++) {
978 forkp->ff_extents[i].startBlock =
979 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
980 forkp->ff_extents[i].blockCount =
981 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
982 }
983 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
984 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
985 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
1c79356b 986
91447636 987 buf_brelse(bp);
1c79356b
A
988 vhp = NULL;
989
990 /*
991 * Re-load B-tree header data
992 */
9bccf70c 993 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
91447636 994 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
1c79356b
A
995 return (error);
996
9bccf70c 997 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
91447636 998 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
1c79356b
A
999 return (error);
1000
91447636
A
1001 if (hfsmp->hfs_attribute_vp) {
1002 forkp = VTOF(hfsmp->hfs_attribute_vp);
1003 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
1004 return (error);
1005 }
1006
9bccf70c 1007 /* Reload the volume name */
2d21ac55 1008 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
1c79356b 1009 return (error);
9bccf70c
A
1010 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
1011 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
1012 cat_releasedesc(&cndesc);
1c79356b 1013
2d21ac55
A
1014 /* Re-establish private/hidden directories. */
1015 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1016 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1c79356b 1017
55e303ae
A
1018 /* In case any volume information changed to trigger a notification */
1019 hfs_generate_volume_notifications(hfsmp);
1020
1c79356b
A
1021 return (0);
1022}
1023
b0d623f7 1024
e2fac8b1
A
1025
1026static void
1027hfs_syncer(void *arg0, void *unused)
1028{
1029#pragma unused(unused)
1030
1031 struct hfsmount *hfsmp = arg0;
b0d623f7
A
1032 clock_sec_t secs;
1033 clock_usec_t usecs;
1034 uint32_t delay = HFS_META_DELAY;
e2fac8b1 1035 uint64_t now;
b0d623f7 1036 static int no_max=1;
e2fac8b1
A
1037
1038 clock_get_calendar_microtime(&secs, &usecs);
b0d623f7 1039 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
e2fac8b1
A
1040
1041 //
b0d623f7
A
1042 // If the amount of pending writes is more than our limit, wait
1043 // for 2/3 of it to drain and then flush the journal.
e2fac8b1 1044 //
b0d623f7
A
1045 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1046 int counter=0;
ebb1b9f4 1047 uint64_t pending_io, start, rate = 0;
b0d623f7
A
1048
1049 no_max = 0;
e2fac8b1 1050
b0d623f7 1051 hfs_start_transaction(hfsmp); // so we hold off any new i/o's
e2fac8b1 1052
b0d623f7
A
1053 pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1054
1055 clock_get_calendar_microtime(&secs, &usecs);
1056 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1057
1058 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1059 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1060 }
1061
1062 if (counter >= 500) {
1063 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1064 }
1065
1066 if (hfsmp->jnl) {
6d2010ae 1067 journal_flush(hfsmp->jnl, FALSE);
b0d623f7
A
1068 } else {
1069 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1070 }
1071
1072 clock_get_calendar_microtime(&secs, &usecs);
1073 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1074 hfsmp->hfs_last_sync_time = now;
ebb1b9f4
A
1075 if (now != start) {
1076 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second
1077 }
e2fac8b1 1078
b0d623f7
A
1079 hfs_end_transaction(hfsmp);
1080
1081 //
1082 // If a reasonable amount of time elapsed then check the
1083 // i/o rate. If it's taking less than 1 second or more
1084 // than 2 seconds, adjust hfs_max_pending_io so that we
1085 // will allow about 1.5 seconds of i/o to queue up.
1086 //
ebb1b9f4 1087 if (((now - start) >= 300000) && (rate != 0)) {
b0d623f7
A
1088 uint64_t scale = (pending_io * 100) / rate;
1089
1090 if (scale < 100 || scale > 200) {
1091 // set it so that it should take about 1.5 seconds to drain
1092 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1093 }
1094 }
1095
1096 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1097 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1098 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1099 && (hfsmp->hfs_active_threads == 0)
1100 && (hfsmp->hfs_global_lock_nesting == 0))) {
1101
1102 //
1103 // Flush the journal if more than 5 seconds elapsed since
1104 // the last sync OR we have not sync'ed recently and the
1105 // last sync request time was more than 100 milliseconds
1106 // ago and no one is in the middle of a transaction right
1107 // now. Else we defer the sync and reschedule it.
1108 //
e2fac8b1 1109 if (hfsmp->jnl) {
6d2010ae 1110 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
b0d623f7 1111
6d2010ae 1112 journal_flush(hfsmp->jnl, FALSE);
b0d623f7 1113
6d2010ae 1114 hfs_unlock_global (hfsmp);
b0d623f7
A
1115 } else {
1116 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
e2fac8b1 1117 }
b0d623f7 1118
e2fac8b1 1119 clock_get_calendar_microtime(&secs, &usecs);
b0d623f7
A
1120 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1121 hfsmp->hfs_last_sync_time = now;
e2fac8b1
A
1122
1123 } else if (hfsmp->hfs_active_threads == 0) {
1124 uint64_t deadline;
1125
e2fac8b1
A
1126 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1127 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
b0d623f7
A
1128
1129 // note: we intentionally return early here and do not
1130 // decrement the sync_scheduled and sync_incomplete
1131 // variables because we rescheduled the timer.
1132
e2fac8b1
A
1133 return;
1134 }
1135
1136 //
1137 // NOTE: we decrement these *after* we're done the journal_flush() since
1138 // it can take a significant amount of time and so we don't want more
1139 // callbacks scheduled until we're done this one.
1140 //
1141 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1142 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1143 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1144}
1145
b0d623f7 1146
e2fac8b1 1147extern int IOBSDIsMediaEjectable( const char *cdev_name );
1c79356b 1148
6d2010ae
A
1149/*
1150 * Initialization code for Red-Black Tree Allocator
1151 *
1152 * This function will build the two red-black trees necessary for allocating space
1153 * from the metadata zone as well as normal allocations. Currently, we use
1154 * an advisory read to get most of the data into the buffer cache.
1155 * This function is intended to be run in a separate thread so as not to slow down mount.
1156 *
1157 */
1158
1159void
1160hfs_initialize_allocator (struct hfsmount *hfsmp) {
1161
1162#if CONFIG_HFS_ALLOC_RBTREE
1163 u_int32_t err;
1164
1165 /*
1166 * Take the allocation file lock. Journal transactions will block until
1167 * we're done here.
1168 */
1169 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1170
1171 /*
1172 * GenerateTree assumes that the bitmap lock is held when you call the function.
1173 * It will drop and re-acquire the lock periodically as needed to let other allocations
1174 * through. It returns with the bitmap lock held. Since we only maintain one tree,
1175 * we don't need to specify a start block (always starts at 0).
1176 */
1177 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1178 if (err) {
1179 goto bailout;
1180 }
1181 /* Mark offset tree as built */
1182 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1183
1184bailout:
1185 /*
1186 * GenerateTree may drop the bitmap lock during operation in order to give other
1187 * threads a chance to allocate blocks, but it will always return with the lock held, so
1188 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1189 */
1190 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1191 if (err != 0) {
1192 /* Wakeup any waiters on the allocation bitmap lock */
1193 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1194 }
1195
1196 hfs_systemfile_unlock(hfsmp, flags);
1197#else
1198#pragma unused (hfsmp)
1199#endif
1200}
1201
316670eb
A
1202void hfs_unmap_blocks (struct hfsmount *hfsmp) {
1203 /*
1204 * Take the allocation file lock. Journal transactions will block until
1205 * we're done here.
1206 */
1207 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1208
1209 /*
1210 * UnmapBlocks assumes that the bitmap lock is held when you call the function.
1211 * We don't care if there were any error issuing unmaps yet.
1212 */
1213 (void) UnmapBlocks(hfsmp);
1214
1215 hfs_systemfile_unlock(hfsmp, flags);
1216}
1217
6d2010ae
A
1218
1219/*
1220 * Teardown code for the Red-Black Tree allocator.
1221 * This function consolidates the code which serializes with respect
1222 * to a thread that may be potentially still building the tree when we need to begin
1223 * tearing it down. Since the red-black tree may not be live when we enter this function
1224 * we return:
1225 * 1 -> Tree was live.
1226 * 0 -> Tree was not active at time of call.
1227 */
1228
1229int
1230hfs_teardown_allocator (struct hfsmount *hfsmp) {
1231 int rb_used = 0;
1232
1233#if CONFIG_HFS_ALLOC_RBTREE
1234
1235 int flags = 0;
1236
1237 /*
1238 * Check to see if the tree-generation is still on-going.
1239 * If it is, then block until it's done.
1240 */
1241
1242 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1243
1244
1245 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1246 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1247
1248 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1249 &hfsmp->extent_tree_flags, THREAD_UNINT);
1250 }
1251
1252 if (hfs_isrbtree_active (hfsmp)) {
1253 rb_used = 1;
1254
1255 /* Tear down the RB Trees while we have the bitmap locked */
1256 DestroyTrees(hfsmp);
1257
1258 }
1259
1260 hfs_systemfile_unlock(hfsmp, flags);
1261#else
1262 #pragma unused (hfsmp)
1263#endif
1264 return rb_used;
1265
1266}
1267
6d2010ae
A
1268static int hfs_root_unmounted_cleanly = 0;
1269
1270SYSCTL_DECL(_vfs_generic);
1271SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1272
1c79356b
A
1273/*
1274 * Common code for mount and mountroot
1275 */
6d2010ae 1276int
91447636 1277hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
2d21ac55 1278 int journal_replay_only, vfs_context_t context)
1c79356b 1279{
91447636 1280 struct proc *p = vfs_context_proc(context);
9bccf70c 1281 int retval = E_NONE;
b0d623f7 1282 struct hfsmount *hfsmp = NULL;
9bccf70c
A
1283 struct buf *bp;
1284 dev_t dev;
b0d623f7 1285 HFSMasterDirectoryBlock *mdbp = NULL;
9bccf70c 1286 int ronly;
2d21ac55 1287#if QUOTA
9bccf70c 1288 int i;
2d21ac55 1289#endif
9bccf70c 1290 int mntwrapper;
91447636 1291 kauth_cred_t cred;
d52fe63f 1292 u_int64_t disksize;
593a1d5f
A
1293 daddr64_t log_blkcnt;
1294 u_int32_t log_blksize;
1295 u_int32_t phys_blksize;
d52fe63f 1296 u_int32_t minblksize;
9bccf70c 1297 u_int32_t iswritable;
91447636 1298 daddr64_t mdb_offset;
2d21ac55 1299 int isvirtual = 0;
e2fac8b1 1300 int isroot = 0;
316670eb 1301 u_int32_t device_features = 0;
6d2010ae
A
1302 int isssd;
1303#if CONFIG_HFS_ALLOC_RBTREE
1304 thread_t allocator_thread;
1305#endif
060df5ea 1306
b0d623f7
A
1307 if (args == NULL) {
1308 /* only hfs_mountroot passes us NULL as the 'args' argument */
1309 isroot = 1;
1310 }
1311
91447636
A
1312 ronly = vfs_isrdonly(mp);
1313 dev = vnode_specrdev(devvp);
1314 cred = p ? vfs_context_ucred(context) : NOCRED;
9bccf70c 1315 mntwrapper = 0;
1c79356b 1316
d52fe63f
A
1317 bp = NULL;
1318 hfsmp = NULL;
9bccf70c 1319 mdbp = NULL;
d52fe63f 1320 minblksize = kHFSBlockSize;
1c79356b 1321
91447636
A
1322 /* Advisory locking should be handled at the VFS layer */
1323 vfs_setlocklocal(mp);
1324
593a1d5f
A
1325 /* Get the logical block size (treated as physical block size everywhere) */
1326 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
6d2010ae
A
1327 if (HFS_MOUNT_DEBUG) {
1328 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1329 }
d52fe63f
A
1330 retval = ENXIO;
1331 goto error_exit;
1332 }
b0d623f7
A
1333 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1334 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1335 retval = ENXIO;
1336 goto error_exit;
1337 }
1338
593a1d5f
A
1339 /* Get the physical block size. */
1340 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1341 if (retval) {
1342 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
6d2010ae
A
1343 if (HFS_MOUNT_DEBUG) {
1344 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1345 }
593a1d5f
A
1346 retval = ENXIO;
1347 goto error_exit;
1348 }
1349 /* If device does not support this ioctl, assume that physical
1350 * block size is same as logical block size
1351 */
1352 phys_blksize = log_blksize;
1353 }
b0d623f7
A
1354 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1355 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1356 retval = ENXIO;
1357 goto error_exit;
1358 }
1359
d52fe63f 1360 /* Switch to 512 byte sectors (temporarily) */
593a1d5f 1361 if (log_blksize > 512) {
d52fe63f
A
1362 u_int32_t size512 = 512;
1363
91447636 1364 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
6d2010ae
A
1365 if (HFS_MOUNT_DEBUG) {
1366 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1367 }
d52fe63f
A
1368 retval = ENXIO;
1369 goto error_exit;
1370 }
1371 }
1372 /* Get the number of 512 byte physical blocks. */
593a1d5f 1373 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
2d21ac55 1374 /* resetting block size may fail if getting block count did */
593a1d5f 1375 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
6d2010ae
A
1376 if (HFS_MOUNT_DEBUG) {
1377 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1378 }
d52fe63f
A
1379 retval = ENXIO;
1380 goto error_exit;
1381 }
1382 /* Compute an accurate disk size (i.e. within 512 bytes) */
593a1d5f 1383 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1c79356b 1384
d52fe63f 1385 /*
91447636
A
1386 * On Tiger it is not necessary to switch the device
1387 * block size to be 4k if there are more than 31-bits
1388 * worth of blocks but to insure compatibility with
1389 * pre-Tiger systems we have to do it.
c910b4d9
A
1390 *
1391 * If the device size is not a multiple of 4K (8 * 512), then
1392 * switching the logical block size isn't going to help because
1393 * we will be unable to write the alternate volume header.
1394 * In this case, just leave the logical block size unchanged.
1c79356b 1395 */
c910b4d9 1396 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
593a1d5f
A
1397 minblksize = log_blksize = 4096;
1398 if (phys_blksize < log_blksize)
1399 phys_blksize = log_blksize;
d52fe63f 1400 }
91447636 1401
b0d623f7
A
1402 /*
1403 * The cluster layer is not currently prepared to deal with a logical
1404 * block size larger than the system's page size. (It can handle
1405 * blocks per page, but not multiple pages per block.) So limit the
1406 * logical block size to the page size.
1407 */
1408 if (log_blksize > PAGE_SIZE)
1409 log_blksize = PAGE_SIZE;
1410
2d21ac55 1411 /* Now switch to our preferred physical block size. */
593a1d5f
A
1412 if (log_blksize > 512) {
1413 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
6d2010ae
A
1414 if (HFS_MOUNT_DEBUG) {
1415 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1416 }
d52fe63f
A
1417 retval = ENXIO;
1418 goto error_exit;
1419 }
1420 /* Get the count of physical blocks. */
593a1d5f 1421 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
6d2010ae
A
1422 if (HFS_MOUNT_DEBUG) {
1423 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1424 }
d52fe63f
A
1425 retval = ENXIO;
1426 goto error_exit;
1427 }
1428 }
d52fe63f
A
1429 /*
1430 * At this point:
1431 * minblksize is the minimum physical block size
593a1d5f
A
1432 * log_blksize has our preferred physical block size
1433 * log_blkcnt has the total number of physical blocks
1c79356b 1434 */
0b4e3aa0 1435
593a1d5f
A
1436 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1437 if ((retval = (int)buf_meta_bread(devvp,
1438 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1439 phys_blksize, cred, &bp))) {
6d2010ae
A
1440 if (HFS_MOUNT_DEBUG) {
1441 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1442 }
d52fe63f
A
1443 goto error_exit;
1444 }
9bccf70c 1445 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
b0d623f7
A
1446 if (mdbp == NULL) {
1447 retval = ENOMEM;
6d2010ae
A
1448 if (HFS_MOUNT_DEBUG) {
1449 printf("hfs_mountfs: MALLOC failed\n");
1450 }
b0d623f7
A
1451 goto error_exit;
1452 }
593a1d5f 1453 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
91447636 1454 buf_brelse(bp);
9bccf70c 1455 bp = NULL;
1c79356b 1456
d52fe63f 1457 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
b0d623f7 1458 if (hfsmp == NULL) {
6d2010ae
A
1459 if (HFS_MOUNT_DEBUG) {
1460 printf("hfs_mountfs: MALLOC (2) failed\n");
1461 }
b0d623f7
A
1462 retval = ENOMEM;
1463 goto error_exit;
1464 }
d52fe63f 1465 bzero(hfsmp, sizeof(struct hfsmount));
b4c24cb9 1466
b0d623f7 1467 hfs_chashinit_finish(hfsmp);
316670eb 1468
060df5ea 1469 /*
316670eb
A
1470 * See if the disk supports unmap (trim).
1471 *
1472 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
1473 * returned by vfs_ioattr. We need to call VNOP_IOCTL ourselves.
1474 */
1475 if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
1476 if (device_features & DK_FEATURE_UNMAP) {
1477 hfsmp->hfs_flags |= HFS_UNMAP;
1478 }
1479 }
1480
1481 /*
1482 * See if the disk is a solid state device, too. We need this to decide what to do about
6d2010ae 1483 * hotfiles.
060df5ea 1484 */
6d2010ae
A
1485 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1486 if (isssd) {
1487 hfsmp->hfs_flags |= HFS_SSD;
060df5ea
A
1488 }
1489 }
6d2010ae
A
1490
1491
9bccf70c 1492 /*
91447636
A
1493 * Init the volume information structure
1494 */
1495
1496 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1497 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1498 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
3a60a9f5 1499 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
6d2010ae
A
1500 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1501
91447636 1502 vfs_setfsprivate(mp, hfsmp);
9bccf70c 1503 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
91447636 1504 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
9bccf70c 1505 hfsmp->hfs_devvp = devvp;
2d21ac55 1506 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
593a1d5f
A
1507 hfsmp->hfs_logical_block_size = log_blksize;
1508 hfsmp->hfs_logical_block_count = log_blkcnt;
316670eb 1509 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
593a1d5f
A
1510 hfsmp->hfs_physical_block_size = phys_blksize;
1511 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
55e303ae
A
1512 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1513 if (ronly)
1514 hfsmp->hfs_flags |= HFS_READ_ONLY;
91447636 1515 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
55e303ae 1516 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
2d21ac55
A
1517
1518#if QUOTA
9bccf70c 1519 for (i = 0; i < MAXQUOTAS; i++)
91447636 1520 dqfileinit(&hfsmp->hfs_qfiles[i]);
2d21ac55 1521#endif
9bccf70c 1522
1c79356b
A
1523 if (args) {
1524 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1525 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1526 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1527 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
91447636 1528 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1c79356b
A
1529 if (args->hfs_mask != (mode_t)VNOVAL) {
1530 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1531 if (args->flags & HFSFSMNT_NOXONFILES) {
1532 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1533 } else {
1534 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1535 }
1536 } else {
1537 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1538 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
9bccf70c
A
1539 }
1540 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1541 mntwrapper = 1;
1c79356b
A
1542 } else {
1543 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
91447636 1544 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1c79356b
A
1545 hfsmp->hfs_uid = UNKNOWNUID;
1546 hfsmp->hfs_gid = UNKNOWNGID;
91447636 1547 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1c79356b
A
1548 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1549 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
9bccf70c
A
1550 }
1551 }
1552
1553 /* Find out if disk media is writable. */
91447636 1554 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
9bccf70c 1555 if (iswritable)
55e303ae 1556 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
9bccf70c 1557 else
55e303ae 1558 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
9bccf70c 1559 }
1c79356b 1560
91447636 1561 // record the current time at which we're mounting this volume
2d21ac55
A
1562 struct timeval tv;
1563 microtime(&tv);
1564 hfsmp->hfs_mount_time = tv.tv_sec;
91447636 1565
d52fe63f
A
1566 /* Mount a standard HFS disk */
1567 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
9bccf70c 1568 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
316670eb 1569#if CONFIG_HFS_STD
b0d623f7
A
1570 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1571 if (vfs_isrdwr(mp)) {
1572 retval = EROFS;
1573 goto error_exit;
1574 }
6d2010ae
A
1575
1576 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1577
b0d623f7
A
1578 /* Treat it as if it's read-only and not writeable */
1579 hfsmp->hfs_flags |= HFS_READ_ONLY;
1580 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1581
1582 /* If only journal replay is requested, exit immediately */
2d21ac55
A
1583 if (journal_replay_only) {
1584 retval = 0;
1585 goto error_exit;
1586 }
1587
91447636 1588 if ((vfs_flags(mp) & MNT_ROOTFS)) {
d52fe63f 1589 retval = EINVAL; /* Cannot root from HFS standard disks */
1c79356b 1590 goto error_exit;
d52fe63f
A
1591 }
1592 /* HFS disks can only use 512 byte physical blocks */
593a1d5f
A
1593 if (log_blksize > kHFSBlockSize) {
1594 log_blksize = kHFSBlockSize;
1595 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
d52fe63f
A
1596 retval = ENXIO;
1597 goto error_exit;
1598 }
593a1d5f 1599 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
d52fe63f
A
1600 retval = ENXIO;
1601 goto error_exit;
1602 }
593a1d5f
A
1603 hfsmp->hfs_logical_block_size = log_blksize;
1604 hfsmp->hfs_logical_block_count = log_blkcnt;
316670eb 1605 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
c910b4d9
A
1606 hfsmp->hfs_physical_block_size = log_blksize;
1607 hfsmp->hfs_log_per_phys = 1;
d52fe63f 1608 }
1c79356b
A
1609 if (args) {
1610 hfsmp->hfs_encoding = args->hfs_encoding;
1611 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1612
1c79356b
A
1613 /* establish the timezone */
1614 gTimeZone = args->hfs_timezone;
1615 }
1616
9bccf70c
A
1617 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1618 &hfsmp->hfs_get_hfsname);
d52fe63f
A
1619 if (retval)
1620 goto error_exit;
1c79356b 1621
d52fe63f 1622 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1c79356b
A
1623 if (retval)
1624 (void) hfs_relconverter(hfsmp->hfs_encoding);
316670eb
A
1625#else
1626 /* On platforms where HFS Standard is not supported, deny the mount altogether */
1627 retval = EINVAL;
1628 goto error_exit;
1629#endif
1c79356b 1630
d52fe63f
A
1631 } else /* Mount an HFS Plus disk */ {
1632 HFSPlusVolumeHeader *vhp;
1633 off_t embeddedOffset;
b4c24cb9 1634 int jnl_disable = 0;
d52fe63f
A
1635
1636 /* Get the embedded Volume Header */
1637 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1638 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1639 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1640 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1641
d52fe63f
A
1642 /*
1643 * If the embedded volume doesn't start on a block
1644 * boundary, then switch the device to a 512-byte
1645 * block size so everything will line up on a block
1646 * boundary.
1647 */
593a1d5f 1648 if ((embeddedOffset % log_blksize) != 0) {
b0d623f7 1649 printf("hfs_mountfs: embedded volume offset not"
d52fe63f 1650 " a multiple of physical block size (%d);"
593a1d5f
A
1651 " switching to 512\n", log_blksize);
1652 log_blksize = 512;
91447636 1653 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
593a1d5f 1654 (caddr_t)&log_blksize, FWRITE, context)) {
6d2010ae
A
1655
1656 if (HFS_MOUNT_DEBUG) {
1657 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1658 }
d52fe63f
A
1659 retval = ENXIO;
1660 goto error_exit;
1661 }
91447636 1662 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
593a1d5f 1663 (caddr_t)&log_blkcnt, 0, context)) {
6d2010ae
A
1664 if (HFS_MOUNT_DEBUG) {
1665 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1666 }
d52fe63f
A
1667 retval = ENXIO;
1668 goto error_exit;
1669 }
d52fe63f 1670 /* Note: relative block count adjustment */
593a1d5f
A
1671 hfsmp->hfs_logical_block_count *=
1672 hfsmp->hfs_logical_block_size / log_blksize;
c910b4d9 1673
b0d623f7
A
1674 /* Update logical /physical block size */
1675 hfsmp->hfs_logical_block_size = log_blksize;
c910b4d9 1676 hfsmp->hfs_physical_block_size = log_blksize;
316670eb 1677
c910b4d9
A
1678 phys_blksize = log_blksize;
1679 hfsmp->hfs_log_per_phys = 1;
d52fe63f
A
1680 }
1681
9bccf70c
A
1682 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1683 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1684
593a1d5f 1685 hfsmp->hfs_logical_block_count = disksize / log_blksize;
9bccf70c 1686
316670eb
A
1687 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1688
593a1d5f
A
1689 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1690 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1691 phys_blksize, cred, &bp);
6d2010ae
A
1692 if (retval) {
1693 if (HFS_MOUNT_DEBUG) {
1694 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1695 }
d52fe63f 1696 goto error_exit;
6d2010ae 1697 }
593a1d5f 1698 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
91447636 1699 buf_brelse(bp);
9bccf70c
A
1700 bp = NULL;
1701 vhp = (HFSPlusVolumeHeader*) mdbp;
d52fe63f
A
1702
1703 } else /* pure HFS+ */ {
1704 embeddedOffset = 0;
1705 vhp = (HFSPlusVolumeHeader*) mdbp;
1706 }
1707
6d2010ae 1708 if (isroot) {
316670eb 1709 hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0);
6d2010ae
A
1710 }
1711
2d21ac55
A
1712 /*
1713 * On inconsistent disks, do not allow read-write mount
b0d623f7
A
1714 * unless it is the boot volume being mounted. We also
1715 * always want to replay the journal if the journal_replay_only
1716 * flag is set because that will (most likely) get the
1717 * disk into a consistent state before fsck_hfs starts
1718 * looking at it.
2d21ac55 1719 */
b0d623f7
A
1720 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1721 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1722 && !journal_replay_only
1723 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
6d2010ae
A
1724
1725 if (HFS_MOUNT_DEBUG) {
1726 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1727 }
2d21ac55
A
1728 retval = EINVAL;
1729 goto error_exit;
1730 }
1731
1732
b4c24cb9
A
1733 // XXXdbg
1734 //
1735 hfsmp->jnl = NULL;
1736 hfsmp->jvp = NULL;
2d21ac55
A
1737 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1738 args->journal_disable) {
b4c24cb9
A
1739 jnl_disable = 1;
1740 }
1741
1742 //
1743 // We only initialize the journal here if the last person
1744 // to mount this volume was journaling aware. Otherwise
1745 // we delay journal initialization until later at the end
1746 // of hfs_MountHFSPlusVolume() because the last person who
1747 // mounted it could have messed things up behind our back
1748 // (so we need to go find the .journal file, make sure it's
1749 // the right size, re-sync up if it was moved, etc).
1750 //
1751 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1752 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1753 && !jnl_disable) {
1754
1755 // if we're able to init the journal, mark the mount
1756 // point as journaled.
1757 //
b0d623f7 1758 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
2d21ac55 1759 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
b4c24cb9 1760 } else {
b0d623f7
A
1761 if (retval == EROFS) {
1762 // EROFS is a special error code that means the volume has an external
1763 // journal which we couldn't find. in that case we do not want to
1764 // rewrite the volume header - we'll just refuse to mount the volume.
6d2010ae
A
1765 if (HFS_MOUNT_DEBUG) {
1766 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1767 }
b0d623f7
A
1768 retval = EINVAL;
1769 goto error_exit;
1770 }
1771
55e303ae
A
1772 // if the journal failed to open, then set the lastMountedVersion
1773 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1774 // of just bailing out because the volume is journaled.
91447636 1775 if (!ronly) {
6d2010ae
A
1776 if (HFS_MOUNT_DEBUG) {
1777 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1778 }
1779
1780 HFSPlusVolumeHeader *jvhp;
55e303ae
A
1781
1782 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1783
1784 if (mdb_offset == 0) {
593a1d5f 1785 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
55e303ae
A
1786 }
1787
1788 bp = NULL;
593a1d5f
A
1789 retval = (int)buf_meta_bread(devvp,
1790 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1791 phys_blksize, cred, &bp);
55e303ae 1792 if (retval == 0) {
593a1d5f 1793 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
55e303ae 1794
91447636
A
1795 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1796 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1797 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1798 buf_bwrite(bp);
55e303ae 1799 } else {
91447636 1800 buf_brelse(bp);
55e303ae
A
1801 }
1802 bp = NULL;
1803 } else if (bp) {
91447636
A
1804 buf_brelse(bp);
1805 // clear this so the error exit path won't try to use it
1806 bp = NULL;
55e303ae
A
1807 }
1808 }
1809
1810 // if this isn't the root device just bail out.
91447636 1811 // If it is the root device we just continue on
55e303ae
A
1812 // in the hopes that fsck_hfs will be able to
1813 // fix any damage that exists on the volume.
91447636 1814 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
6d2010ae
A
1815 if (HFS_MOUNT_DEBUG) {
1816 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1817 }
55e303ae
A
1818 retval = EINVAL;
1819 goto error_exit;
1820 }
b4c24cb9
A
1821 }
1822 }
1823 // XXXdbg
1824
2d21ac55
A
1825 /* Either the journal is replayed successfully, or there
1826 * was nothing to replay, or no journal exists. In any case,
1827 * return success.
1828 */
1829 if (journal_replay_only) {
1830 retval = 0;
1831 goto error_exit;
1832 }
1833
d52fe63f
A
1834 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1835
91447636 1836 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
d52fe63f
A
1837 /*
1838 * If the backend didn't like our physical blocksize
1839 * then retry with physical blocksize of 512.
1840 */
593a1d5f 1841 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
b0d623f7 1842 printf("hfs_mountfs: could not use physical block size "
316670eb 1843 "(%d) switching to 512\n", log_blksize);
593a1d5f
A
1844 log_blksize = 512;
1845 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
6d2010ae
A
1846 if (HFS_MOUNT_DEBUG) {
1847 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1848 }
d52fe63f
A
1849 retval = ENXIO;
1850 goto error_exit;
1851 }
593a1d5f 1852 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
6d2010ae
A
1853 if (HFS_MOUNT_DEBUG) {
1854 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1855 }
d52fe63f
A
1856 retval = ENXIO;
1857 goto error_exit;
1858 }
593a1d5f 1859 devvp->v_specsize = log_blksize;
d52fe63f 1860 /* Note: relative block count adjustment (in case this is an embedded volume). */
316670eb
A
1861 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1862 hfsmp->hfs_logical_block_size = log_blksize;
1863 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1864
1865 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1866
b0d623f7 1867 if (hfsmp->jnl && hfsmp->jvp == devvp) {
55e303ae
A
1868 // close and re-open this with the new block size
1869 journal_close(hfsmp->jnl);
1870 hfsmp->jnl = NULL;
1871 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
2d21ac55 1872 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
91447636
A
1873 } else {
1874 // if the journal failed to open, then set the lastMountedVersion
1875 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1876 // of just bailing out because the volume is journaled.
1877 if (!ronly) {
6d2010ae
A
1878 if (HFS_MOUNT_DEBUG) {
1879 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1880 }
91447636
A
1881 HFSPlusVolumeHeader *jvhp;
1882
1883 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1884
1885 if (mdb_offset == 0) {
593a1d5f 1886 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
91447636
A
1887 }
1888
1889 bp = NULL;
593a1d5f
A
1890 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1891 phys_blksize, cred, &bp);
91447636 1892 if (retval == 0) {
593a1d5f 1893 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
91447636
A
1894
1895 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1896 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1897 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1898 buf_bwrite(bp);
1899 } else {
1900 buf_brelse(bp);
1901 }
1902 bp = NULL;
1903 } else if (bp) {
1904 buf_brelse(bp);
1905 // clear this so the error exit path won't try to use it
1906 bp = NULL;
1907 }
1908 }
1909
1910 // if this isn't the root device just bail out.
1911 // If it is the root device we just continue on
1912 // in the hopes that fsck_hfs will be able to
1913 // fix any damage that exists on the volume.
1914 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
6d2010ae
A
1915 if (HFS_MOUNT_DEBUG) {
1916 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1917 }
91447636
A
1918 retval = EINVAL;
1919 goto error_exit;
1920 }
1921 }
55e303ae
A
1922 }
1923
d52fe63f 1924 /* Try again with a smaller block size... */
91447636 1925 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
6d2010ae
A
1926 if (retval && HFS_MOUNT_DEBUG) {
1927 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1928 }
d52fe63f
A
1929 }
1930 if (retval)
1931 (void) hfs_relconverter(0);
1932 }
1c79356b 1933
91447636
A
1934 // save off a snapshot of the mtime from the previous mount
1935 // (for matador).
1936 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1937
1c79356b 1938 if ( retval ) {
6d2010ae
A
1939 if (HFS_MOUNT_DEBUG) {
1940 printf("hfs_mountfs: encountered failure %d \n", retval);
1941 }
1c79356b
A
1942 goto error_exit;
1943 }
1944
91447636
A
1945 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1946 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1947 vfs_setmaxsymlen(mp, 0);
b0d623f7 1948
91447636 1949 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
2d21ac55
A
1950#if NAMEDSTREAMS
1951 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1952#endif
1953 if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1954 /* Tell VFS that we support directory hard links. */
1955 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1956 } else {
1957 /* HFS standard doesn't support extended readdir! */
6d2010ae 1958 mount_set_noreaddirext (mp);
2d21ac55 1959 }
1c79356b 1960
55e303ae
A
1961 if (args) {
1962 /*
1963 * Set the free space warning levels for a non-root volume:
1964 *
b0d623f7
A
1965 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1966 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1967 * whichever is less. And last, set the "desired" freespace level to
1968 * to 3% of the volume size or 200MB, whichever is less.
55e303ae 1969 */
b0d623f7
A
1970 hfsmp->hfs_freespace_notify_dangerlimit =
1971 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1972 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
55e303ae
A
1973 hfsmp->hfs_freespace_notify_warninglimit =
1974 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1975 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1976 hfsmp->hfs_freespace_notify_desiredlevel =
1977 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1978 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1979 } else {
1980 /*
1981 * Set the free space warning levels for the root volume:
1982 *
6d2010ae
A
1983 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1984 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
b0d623f7 1985 * whichever is less. And last, set the "desired" freespace level to
6d2010ae 1986 * to 11% of the volume size or 1.25GB, whichever is less.
55e303ae 1987 */
b0d623f7
A
1988 hfsmp->hfs_freespace_notify_dangerlimit =
1989 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1990 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
55e303ae
A
1991 hfsmp->hfs_freespace_notify_warninglimit =
1992 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1993 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1994 hfsmp->hfs_freespace_notify_desiredlevel =
1995 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1996 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1997 };
1998
2d21ac55
A
1999 /* Check if the file system exists on virtual device, like disk image */
2000 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
2001 if (isvirtual) {
2002 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
2003 }
2004 }
2005
b0d623f7 2006 /* do not allow ejectability checks on the root device */
e2fac8b1
A
2007 if (isroot == 0) {
2008 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
2009 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
b0d623f7 2010 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with.
e2fac8b1
A
2011 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
2012 if (hfsmp->hfs_syncer == NULL) {
2013 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
2014 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
2015 }
2016 }
2017 }
316670eb
A
2018
2019#if CONFIG_HFS_MOUNT_UNMAP
2020 /* Enable UNMAPs for embedded SSDs only for now */
2021 /*
2022 * TODO: Should we enable this for CoreStorage volumes, too?
2023 */
2024 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2025 if (hfsmp->hfs_flags & HFS_UNMAP) {
2026 hfs_unmap_blocks(hfsmp);
2027 }
2028 }
2029#endif
2030
2031
6d2010ae
A
2032#if CONFIG_HFS_ALLOC_RBTREE
2033 /*
2034 * We spawn a thread to create the pair of red-black trees for this volume.
2035 * However, in so doing, we must be careful to ensure that if this thread is still
2036 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
2037 * we'll need to set a bit that indicates we're in progress building the trees here.
2038 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
2039 * notifies the tree generation code that an unmount is waiting. Also mark the bit that
2040 * indicates the tree is live and operating.
2041 *
2042 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
2043 */
2044
2045 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2046 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
2047
2048 /* Initialize EOF counter so that the thread can assume it started at initial values */
2049 hfsmp->offset_block_end = 0;
2050 InitTree(hfsmp);
2051
2052 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
2053 thread_deallocate(allocator_thread);
2054 }
2055
2056#endif
e2fac8b1 2057
55e303ae
A
2058 /*
2059 * Start looking for free space to drop below this level and generate a
2060 * warning immediately if needed:
2061 */
2062 hfsmp->hfs_notification_conditions = 0;
2063 hfs_generate_volume_notifications(hfsmp);
2d21ac55 2064
9bccf70c
A
2065 if (ronly == 0) {
2066 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2067 }
2068 FREE(mdbp, M_TEMP);
2069 return (0);
1c79356b 2070
9bccf70c
A
2071error_exit:
2072 if (bp)
91447636 2073 buf_brelse(bp);
9bccf70c
A
2074 if (mdbp)
2075 FREE(mdbp, M_TEMP);
91447636 2076
b4c24cb9 2077 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
b0d623f7
A
2078 vnode_clearmountedon(hfsmp->jvp);
2079 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
b4c24cb9
A
2080 hfsmp->jvp = NULL;
2081 }
9bccf70c 2082 if (hfsmp) {
2d21ac55
A
2083 if (hfsmp->hfs_devvp) {
2084 vnode_rele(hfsmp->hfs_devvp);
2085 }
b0d623f7 2086 hfs_delete_chash(hfsmp);
6d2010ae 2087
9bccf70c 2088 FREE(hfsmp, M_HFSMNT);
91447636 2089 vfs_setfsprivate(mp, NULL);
9bccf70c 2090 }
1c79356b
A
2091 return (retval);
2092}
2093
2094
2095/*
2096 * Make a filesystem operational.
2097 * Nothing to do at the moment.
2098 */
2099/* ARGSUSED */
9bccf70c 2100static int
91447636 2101hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
1c79356b 2102{
9bccf70c 2103 return (0);
1c79356b
A
2104}
2105
2106
2107/*
2108 * unmount system call
2109 */
6d2010ae 2110int
91447636 2111hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
1c79356b 2112{
91447636 2113 struct proc *p = vfs_context_proc(context);
1c79356b
A
2114 struct hfsmount *hfsmp = VFSTOHFS(mp);
2115 int retval = E_NONE;
2116 int flags;
9bccf70c 2117 int force;
91447636 2118 int started_tr = 0;
6d2010ae 2119 int rb_used = 0;
1c79356b
A
2120
2121 flags = 0;
9bccf70c
A
2122 force = 0;
2123 if (mntflags & MNT_FORCE) {
1c79356b 2124 flags |= FORCECLOSE;
9bccf70c
A
2125 force = 1;
2126 }
1c79356b 2127
9bccf70c 2128 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
1c79356b
A
2129 return (retval);
2130
55e303ae 2131 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
91447636 2132 (void) hfs_recording_suspend(hfsmp);
55e303ae 2133
e2fac8b1
A
2134 /*
2135 * Cancel any pending timers for this volume. Then wait for any timers
2136 * which have fired, but whose callbacks have not yet completed.
2137 */
2138 if (hfsmp->hfs_syncer)
2139 {
2140 struct timespec ts = {0, 100000000}; /* 0.1 seconds */
2141
2142 /*
2143 * Cancel any timers that have been scheduled, but have not
2144 * fired yet. NOTE: The kernel considers a timer complete as
2145 * soon as it starts your callback, so the kernel does not
2146 * keep track of the number of callbacks in progress.
2147 */
2148 if (thread_call_cancel(hfsmp->hfs_syncer))
2149 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2150 thread_call_free(hfsmp->hfs_syncer);
2151 hfsmp->hfs_syncer = NULL;
2152
2153 /*
2154 * This waits for all of the callbacks that were entered before
2155 * we did thread_call_cancel above, but have not completed yet.
2156 */
2157 while(hfsmp->hfs_sync_incomplete > 0)
2158 {
2159 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2160 }
2161
2162 if (hfsmp->hfs_sync_incomplete < 0)
b0d623f7 2163 panic("hfs_unmount: pm_sync_incomplete underflow!\n");
e2fac8b1
A
2164 }
2165
6d2010ae
A
2166#if CONFIG_HFS_ALLOC_RBTREE
2167 rb_used = hfs_teardown_allocator(hfsmp);
2168#endif
2169
1c79356b
A
2170 /*
2171 * Flush out the b-trees, volume bitmap and Volume Header
2172 */
55e303ae 2173 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2d21ac55
A
2174 retval = hfs_start_transaction(hfsmp);
2175 if (retval == 0) {
2176 started_tr = 1;
2177 } else if (!force) {
2178 goto err_exit;
2179 }
2180
2181 if (hfsmp->hfs_startup_vp) {
2182 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2183 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2184 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2185 if (retval && !force)
2186 goto err_exit;
2187 }
91447636
A
2188
2189 if (hfsmp->hfs_attribute_vp) {
2190 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2191 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2192 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2193 if (retval && !force)
2194 goto err_exit;
b4c24cb9 2195 }
91447636
A
2196
2197 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2198 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2199 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
9bccf70c 2200 if (retval && !force)
b4c24cb9
A
2201 goto err_exit;
2202
91447636
A
2203 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2204 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2205 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
9bccf70c 2206 if (retval && !force)
b4c24cb9
A
2207 goto err_exit;
2208
91447636
A
2209 if (hfsmp->hfs_allocation_vp) {
2210 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2211 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2212 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
55e303ae
A
2213 if (retval && !force)
2214 goto err_exit;
2215 }
2216
91447636
A
2217 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2218 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2219 if (retval && !force)
b4c24cb9 2220 goto err_exit;
1c79356b 2221 }
2d21ac55
A
2222
2223 /* If runtime corruption was detected, indicate that the volume
2224 * was not unmounted cleanly.
2225 */
2226 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
1c79356b
A
2227 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2228 } else {
9bccf70c 2229 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
1c79356b 2230 }
2d21ac55 2231
6d2010ae
A
2232
2233 if (rb_used) {
2234 /* If the rb-tree was live, just set min_start to 0 */
2235 hfsmp->nextAllocation = 0;
2236 }
2237 else {
2238 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2239 int i;
2240 u_int32_t min_start = hfsmp->totalBlocks;
2241
2242 // set the nextAllocation pointer to the smallest free block number
2243 // we've seen so on the next mount we won't rescan unnecessarily
2244 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2245 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2246 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2247 min_start = hfsmp->vcbFreeExt[i].startBlock;
2248 }
2249 }
2250 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2251 if (min_start < hfsmp->nextAllocation) {
2252 hfsmp->nextAllocation = min_start;
b0d623f7 2253 }
b0d623f7
A
2254 }
2255 }
6d2010ae 2256
b0d623f7 2257
91447636 2258 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1c79356b 2259 if (retval) {
1c79356b 2260 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
9bccf70c 2261 if (!force)
b4c24cb9
A
2262 goto err_exit; /* could not flush everything */
2263 }
2264
2d21ac55
A
2265 if (started_tr) {
2266 hfs_end_transaction(hfsmp);
2267 started_tr = 0;
2268 }
1c79356b
A
2269 }
2270
b4c24cb9 2271 if (hfsmp->jnl) {
6d2010ae 2272 hfs_journal_flush(hfsmp, FALSE);
b4c24cb9
A
2273 }
2274
1c79356b
A
2275 /*
2276 * Invalidate our caches and release metadata vnodes
2277 */
2278 (void) hfsUnmount(hfsmp, p);
2279
2280 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2281 (void) hfs_relconverter(hfsmp->hfs_encoding);
2282
b4c24cb9
A
2283 // XXXdbg
2284 if (hfsmp->jnl) {
2285 journal_close(hfsmp->jnl);
55e303ae 2286 hfsmp->jnl = NULL;
b4c24cb9
A
2287 }
2288
91447636
A
2289 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2290
b4c24cb9 2291 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
b0d623f7 2292 vnode_clearmountedon(hfsmp->jvp);
91447636 2293 retval = VNOP_CLOSE(hfsmp->jvp,
55e303ae 2294 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
b0d623f7 2295 vfs_context_kernel());
91447636 2296 vnode_put(hfsmp->jvp);
55e303ae 2297 hfsmp->jvp = NULL;
b4c24cb9
A
2298 }
2299 // XXXdbg
2300
6d2010ae
A
2301 /*
2302 * Last chance to dump unreferenced system files.
2303 */
2304 (void) vflush(mp, NULLVP, FORCECLOSE);
2305
2306#if HFS_SPARSE_DEV
55e303ae
A
2307 /* Drop our reference on the backing fs (if any). */
2308 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2309 struct vnode * tmpvp;
2310
2311 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2312 tmpvp = hfsmp->hfs_backingfs_rootvp;
2313 hfsmp->hfs_backingfs_rootvp = NULLVP;
91447636 2314 vnode_rele(tmpvp);
55e303ae
A
2315 }
2316#endif /* HFS_SPARSE_DEV */
91447636 2317 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
6d2010ae 2318 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2d21ac55 2319 vnode_rele(hfsmp->hfs_devvp);
b0d623f7
A
2320
2321 hfs_delete_chash(hfsmp);
1c79356b 2322 FREE(hfsmp, M_HFSMNT);
91447636 2323
9bccf70c 2324 return (0);
b4c24cb9
A
2325
2326 err_exit:
91447636
A
2327 if (started_tr) {
2328 hfs_end_transaction(hfsmp);
b4c24cb9
A
2329 }
2330 return retval;
1c79356b
A
2331}
2332
2333
2334/*
2335 * Return the root of a filesystem.
1c79356b 2336 */
9bccf70c 2337static int
91447636 2338hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
1c79356b 2339{
6d2010ae 2340 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
1c79356b
A
2341}
2342
2343
2344/*
2345 * Do operations associated with quotas
2346 */
2d21ac55
A
2347#if !QUOTA
2348static int
2349hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2350{
2351 return (ENOTSUP);
2352}
2353#else
91447636
A
2354static int
2355hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
1c79356b 2356{
91447636 2357 struct proc *p = vfs_context_proc(context);
9bccf70c
A
2358 int cmd, type, error;
2359
2d21ac55 2360 if (uid == ~0U)
6d2010ae 2361 uid = kauth_cred_getuid(vfs_context_ucred(context));
9bccf70c
A
2362 cmd = cmds >> SUBCMDSHIFT;
2363
2364 switch (cmd) {
2365 case Q_SYNC:
2366 case Q_QUOTASTAT:
2367 break;
2368 case Q_GETQUOTA:
6d2010ae 2369 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
9bccf70c
A
2370 break;
2371 /* fall through */
2372 default:
91447636 2373 if ( (error = vfs_context_suser(context)) )
9bccf70c
A
2374 return (error);
2375 }
2376
2377 type = cmds & SUBCMDMASK;
2378 if ((u_int)type >= MAXQUOTAS)
2379 return (EINVAL);
91447636 2380 if (vfs_busy(mp, LK_NOWAIT))
9bccf70c
A
2381 return (0);
2382
2383 switch (cmd) {
1c79356b 2384
9bccf70c 2385 case Q_QUOTAON:
91447636 2386 error = hfs_quotaon(p, mp, type, datap);
9bccf70c
A
2387 break;
2388
2389 case Q_QUOTAOFF:
2390 error = hfs_quotaoff(p, mp, type);
2391 break;
2392
2393 case Q_SETQUOTA:
91447636 2394 error = hfs_setquota(mp, uid, type, datap);
9bccf70c
A
2395 break;
2396
2397 case Q_SETUSE:
91447636 2398 error = hfs_setuse(mp, uid, type, datap);
9bccf70c
A
2399 break;
2400
2401 case Q_GETQUOTA:
91447636 2402 error = hfs_getquota(mp, uid, type, datap);
9bccf70c
A
2403 break;
2404
2405 case Q_SYNC:
2406 error = hfs_qsync(mp);
2407 break;
2408
2409 case Q_QUOTASTAT:
91447636 2410 error = hfs_quotastat(mp, type, datap);
9bccf70c
A
2411 break;
2412
2413 default:
2414 error = EINVAL;
2415 break;
2416 }
91447636
A
2417 vfs_unbusy(mp);
2418
9bccf70c 2419 return (error);
1c79356b 2420}
2d21ac55 2421#endif /* QUOTA */
1c79356b 2422
91447636
A
2423/* Subtype is composite of bits */
2424#define HFS_SUBTYPE_JOURNALED 0x01
2425#define HFS_SUBTYPE_CASESENSITIVE 0x02
2426/* bits 2 - 6 reserved */
2427#define HFS_SUBTYPE_STANDARDHFS 0x80
b4c24cb9 2428
1c79356b
A
2429/*
2430 * Get file system statistics.
2431 */
6d2010ae 2432int
91447636 2433hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
1c79356b
A
2434{
2435 ExtendedVCB *vcb = VFSTOVCB(mp);
2436 struct hfsmount *hfsmp = VFSTOHFS(mp);
b0d623f7 2437 u_int32_t freeCNIDs;
2d21ac55 2438 u_int16_t subtype = 0;
1c79356b 2439
b0d623f7 2440 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
1c79356b 2441
2d21ac55 2442 sbp->f_bsize = (u_int32_t)vcb->blockSize;
cf7d32b8 2443 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
b0d623f7
A
2444 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2445 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2446 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2447 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2448 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
91447636
A
2449
2450 /*
2451 * Subtypes (flavors) for HFS
2452 * 0: Mac OS Extended
2453 * 1: Mac OS Extended (Journaled)
2454 * 2: Mac OS Extended (Case Sensitive)
2455 * 3: Mac OS Extended (Case Sensitive, Journaled)
2456 * 4 - 127: Reserved
2457 * 128: Mac OS Standard
2458 *
2459 */
2460 if (hfsmp->hfs_flags & HFS_STANDARD) {
2461 subtype = HFS_SUBTYPE_STANDARDHFS;
2462 } else /* HFS Plus */ {
2463 if (hfsmp->jnl)
2464 subtype |= HFS_SUBTYPE_JOURNALED;
2465 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2466 subtype |= HFS_SUBTYPE_CASESENSITIVE;
1c79356b 2467 }
91447636
A
2468 sbp->f_fssubtype = subtype;
2469
1c79356b
A
2470 return (0);
2471}
2472
2473
b4c24cb9
A
2474//
2475// XXXdbg -- this is a callback to be used by the journal to
2476// get meta data blocks flushed out to disk.
2477//
2478// XXXdbg -- be smarter and don't flush *every* block on each
2479// call. try to only flush some so we don't wind up
2480// being too synchronous.
2481//
2482__private_extern__
2483void
2484hfs_sync_metadata(void *arg)
2485{
2486 struct mount *mp = (struct mount *)arg;
b4c24cb9
A
2487 struct hfsmount *hfsmp;
2488 ExtendedVCB *vcb;
91447636 2489 buf_t bp;
593a1d5f 2490 int retval;
91447636 2491 daddr64_t priIDSector;
b4c24cb9
A
2492 hfsmp = VFSTOHFS(mp);
2493 vcb = HFSTOVCB(hfsmp);
2494
b4c24cb9 2495 // now make sure the super block is flushed
593a1d5f
A
2496 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2497 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2498
2499 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2500 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2501 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2d21ac55 2502 if ((retval != 0 ) && (retval != ENXIO)) {
0c530ab8 2503 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2d21ac55 2504 (int)priIDSector, retval);
b4c24cb9
A
2505 }
2506
91447636
A
2507 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2508 buf_bwrite(bp);
b4c24cb9 2509 } else if (bp) {
91447636 2510 buf_brelse(bp);
b4c24cb9
A
2511 }
2512
2513 // the alternate super block...
2514 // XXXdbg - we probably don't need to do this each and every time.
2515 // hfs_btreeio.c:FlushAlternate() should flag when it was
2516 // written...
91447636 2517 if (hfsmp->hfs_alt_id_sector) {
593a1d5f
A
2518 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2519 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2520 hfsmp->hfs_physical_block_size, NOCRED, &bp);
91447636
A
2521 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2522 buf_bwrite(bp);
2523 } else if (bp) {
2524 buf_brelse(bp);
2525 }
b4c24cb9 2526 }
b4c24cb9
A
2527}
2528
91447636
A
2529
2530struct hfs_sync_cargs {
2531 kauth_cred_t cred;
2532 struct proc *p;
2533 int waitfor;
2534 int error;
2535};
2536
2537
2538static int
2539hfs_sync_callback(struct vnode *vp, void *cargs)
2540{
2541 struct cnode *cp;
2542 struct hfs_sync_cargs *args;
2543 int error;
2544
2545 args = (struct hfs_sync_cargs *)cargs;
2546
2547 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2548 return (VNODE_RETURNED);
2549 }
2550 cp = VTOC(vp);
2551
2552 if ((cp->c_flag & C_MODIFIED) ||
2553 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2554 vnode_hasdirtyblks(vp)) {
2555 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2556
2557 if (error)
2558 args->error = error;
2559 }
2560 hfs_unlock(cp);
2561 return (VNODE_RETURNED);
2562}
2563
2564
2565
1c79356b
A
2566/*
2567 * Go through the disk queues to initiate sandbagged IO;
2568 * go through the inodes to write those that have been modified;
2569 * initiate the writing of the super block if it has been modified.
2570 *
2571 * Note: we are always called with the filesystem marked `MPBUSY'.
2572 */
6d2010ae 2573int
91447636 2574hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
1c79356b 2575{
91447636 2576 struct proc *p = vfs_context_proc(context);
9bccf70c
A
2577 struct cnode *cp;
2578 struct hfsmount *hfsmp;
2579 ExtendedVCB *vcb;
91447636 2580 struct vnode *meta_vp[4];
9bccf70c
A
2581 int i;
2582 int error, allerror = 0;
91447636 2583 struct hfs_sync_cargs args;
1c79356b 2584
593a1d5f
A
2585 hfsmp = VFSTOHFS(mp);
2586
1c79356b 2587 /*
593a1d5f 2588 * hfs_changefs might be manipulating vnodes so back off
1c79356b 2589 */
593a1d5f 2590 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
1c79356b
A
2591 return (0);
2592
55e303ae
A
2593 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2594 return (EROFS);
1c79356b 2595
3a60a9f5
A
2596 /* skip over frozen volumes */
2597 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2598 return 0;
2599
2d21ac55 2600 args.cred = kauth_cred_get();
91447636
A
2601 args.waitfor = waitfor;
2602 args.p = p;
2603 args.error = 0;
9bccf70c 2604 /*
91447636
A
2605 * hfs_sync_callback will be called for each vnode
2606 * hung off of this mount point... the vnode will be
2607 * properly referenced and unreferenced around the callback
9bccf70c 2608 */
91447636 2609 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
1c79356b 2610
91447636
A
2611 if (args.error)
2612 allerror = args.error;
1c79356b 2613
9bccf70c
A
2614 vcb = HFSTOVCB(hfsmp);
2615
2616 meta_vp[0] = vcb->extentsRefNum;
2617 meta_vp[1] = vcb->catalogRefNum;
2618 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
91447636 2619 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
9bccf70c
A
2620
2621 /* Now sync our three metadata files */
91447636 2622 for (i = 0; i < 4; ++i) {
9bccf70c
A
2623 struct vnode *btvp;
2624
91447636
A
2625 btvp = meta_vp[i];;
2626 if ((btvp==0) || (vnode_mount(btvp) != mp))
9bccf70c 2627 continue;
b4c24cb9 2628
91447636
A
2629 /* XXX use hfs_systemfile_lock instead ? */
2630 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
9bccf70c 2631 cp = VTOC(btvp);
91447636
A
2632
2633 if (((cp->c_flag & C_MODIFIED) == 0) &&
2634 (cp->c_touch_acctime == 0) &&
2635 (cp->c_touch_chgtime == 0) &&
2636 (cp->c_touch_modtime == 0) &&
2637 vnode_hasdirtyblks(btvp) == 0) {
2638 hfs_unlock(VTOC(btvp));
9bccf70c
A
2639 continue;
2640 }
91447636 2641 error = vnode_get(btvp);
9bccf70c 2642 if (error) {
91447636 2643 hfs_unlock(VTOC(btvp));
9bccf70c
A
2644 continue;
2645 }
91447636 2646 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
9bccf70c 2647 allerror = error;
9bccf70c 2648
91447636
A
2649 hfs_unlock(cp);
2650 vnode_put(btvp);
2651 };
9bccf70c
A
2652
2653 /*
2654 * Force stale file system control information to be flushed.
2655 */
2656 if (vcb->vcbSigWord == kHFSSigWord) {
91447636 2657 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
9bccf70c 2658 allerror = error;
91447636 2659 }
9bccf70c
A
2660 }
2661#if QUOTA
2662 hfs_qsync(mp);
2663#endif /* QUOTA */
55e303ae 2664
2d21ac55
A
2665 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2666
9bccf70c
A
2667 /*
2668 * Write back modified superblock.
2669 */
9bccf70c
A
2670 if (IsVCBDirty(vcb)) {
2671 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
b4c24cb9
A
2672 if (error)
2673 allerror = error;
9bccf70c 2674 }
1c79356b 2675
b4c24cb9 2676 if (hfsmp->jnl) {
6d2010ae 2677 hfs_journal_flush(hfsmp, FALSE);
b4c24cb9 2678 }
3a60a9f5 2679
e2fac8b1 2680 {
b0d623f7
A
2681 clock_sec_t secs;
2682 clock_usec_t usecs;
e2fac8b1
A
2683 uint64_t now;
2684
2685 clock_get_calendar_microtime(&secs, &usecs);
b0d623f7 2686 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
e2fac8b1
A
2687 hfsmp->hfs_last_sync_time = now;
2688 }
2689
3a60a9f5 2690 lck_rw_unlock_shared(&hfsmp->hfs_insync);
9bccf70c 2691 return (allerror);
1c79356b
A
2692}
2693
2694
2695/*
2696 * File handle to vnode
2697 *
2698 * Have to be really careful about stale file handles:
9bccf70c
A
2699 * - check that the cnode id is valid
2700 * - call hfs_vget() to get the locked cnode
2701 * - check for an unallocated cnode (i_mode == 0)
1c79356b
A
2702 * - check that the given client host has export rights and return
2703 * those rights via. exflagsp and credanonp
2704 */
9bccf70c 2705static int
2d21ac55 2706hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
1c79356b
A
2707{
2708 struct hfsfid *hfsfhp;
2709 struct vnode *nvp;
2710 int result;
1c79356b
A
2711
2712 *vpp = NULL;
2713 hfsfhp = (struct hfsfid *)fhp;
2714
2d21ac55 2715 if (fhlen < (int)sizeof(struct hfsfid))
91447636 2716 return (EINVAL);
1c79356b 2717
6d2010ae 2718 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
91447636
A
2719 if (result) {
2720 if (result == ENOENT)
2721 result = ESTALE;
2722 return result;
2723 }
b0d623f7
A
2724
2725 /*
2726 * We used to use the create time as the gen id of the file handle,
2727 * but it is not static enough because it can change at any point
2728 * via system calls. We still don't have another volume ID or other
2729 * unique identifier to use for a generation ID across reboots that
2730 * persists until the file is removed. Using only the CNID exposes
2731 * us to the potential wrap-around case, but as of 2/2008, it would take
2732 * over 2 months to wrap around if the machine did nothing but allocate
2733 * CNIDs. Using some kind of wrap counter would only be effective if
2734 * each file had the wrap counter associated with it. For now,
2735 * we use only the CNID to identify the file as it's good enough.
2736 */
2737
1c79356b 2738 *vpp = nvp;
91447636
A
2739
2740 hfs_unlock(VTOC(nvp));
9bccf70c 2741 return (0);
1c79356b
A
2742}
2743
2744
2745/*
2746 * Vnode pointer to File handle
2747 */
2748/* ARGSUSED */
9bccf70c 2749static int
2d21ac55 2750hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
1c79356b 2751{
9bccf70c 2752 struct cnode *cp;
1c79356b 2753 struct hfsfid *hfsfhp;
1c79356b 2754
9bccf70c 2755 if (ISHFS(VTOVCB(vp)))
91447636
A
2756 return (ENOTSUP); /* hfs standard is not exportable */
2757
2758 if (*fhlenp < (int)sizeof(struct hfsfid))
2759 return (EOVERFLOW);
1c79356b 2760
9bccf70c
A
2761 cp = VTOC(vp);
2762 hfsfhp = (struct hfsfid *)fhp;
b0d623f7 2763 /* only the CNID is used to identify the file now */
0c530ab8 2764 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
b0d623f7 2765 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
91447636 2766 *fhlenp = sizeof(struct hfsfid);
1c79356b 2767
9bccf70c 2768 return (0);
1c79356b
A
2769}
2770
2771
2772/*
2773 * Initial HFS filesystems, done only once.
2774 */
9bccf70c 2775static int
91447636 2776hfs_init(__unused struct vfsconf *vfsp)
1c79356b 2777{
9bccf70c 2778 static int done = 0;
1c79356b 2779
9bccf70c
A
2780 if (done)
2781 return (0);
2782 done = 1;
2783 hfs_chashinit();
2784 hfs_converterinit();
1c79356b 2785
55e303ae 2786 BTReserveSetup();
91447636
A
2787
2788
2789 hfs_lock_attr = lck_attr_alloc_init();
2790 hfs_group_attr = lck_grp_attr_alloc_init();
2791 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2792 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
6d2010ae 2793 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
0c530ab8 2794
b0d623f7
A
2795#if HFS_COMPRESSION
2796 decmpfs_init();
2797#endif
1c79356b 2798
9bccf70c 2799 return (0);
1c79356b
A
2800}
2801
55e303ae 2802static int
2d21ac55 2803hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
55e303ae
A
2804{
2805 struct hfsmount * hfsmp;
91447636 2806 char fstypename[MFSNAMELEN];
55e303ae
A
2807
2808 if (vp == NULL)
2809 return (EINVAL);
2810
91447636 2811 if (!vnode_isvroot(vp))
55e303ae
A
2812 return (EINVAL);
2813
91447636 2814 vnode_vfsname(vp, fstypename);
2d21ac55 2815 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
55e303ae
A
2816 return (EINVAL);
2817
2818 hfsmp = VTOHFS(vp);
2819
2820 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2821 return (EINVAL);
2822
2823 *hfsmpp = hfsmp;
2824
2825 return (0);
2826}
1c79356b 2827
b4c24cb9
A
2828// XXXdbg
2829#include <sys/filedesc.h>
2830
1c79356b 2831/*
9bccf70c 2832 * HFS filesystem related variables.
1c79356b 2833 */
6d2010ae 2834int
91447636
A
2835hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2836 user_addr_t newp, size_t newlen, vfs_context_t context)
1c79356b 2837{
91447636 2838 struct proc *p = vfs_context_proc(context);
55e303ae 2839 int error;
55e303ae 2840 struct hfsmount *hfsmp;
9bccf70c
A
2841
2842 /* all sysctl names at this level are terminal */
1c79356b 2843
55e303ae 2844 if (name[0] == HFS_ENCODINGBIAS) {
2d21ac55 2845 int bias;
55e303ae
A
2846
2847 bias = hfs_getencodingbias();
2848 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2849 if (error == 0 && newp)
2850 hfs_setencodingbias(bias);
2851 return (error);
2852
2853 } else if (name[0] == HFS_EXTEND_FS) {
91447636 2854 u_int64_t newsize;
2d21ac55 2855 vnode_t vp = vfs_context_cwd(context);
91447636 2856
2d21ac55 2857 if (newp == USER_ADDR_NULL || vp == NULLVP)
55e303ae 2858 return (EINVAL);
91447636 2859 if ((error = hfs_getmountpoint(vp, &hfsmp)))
55e303ae 2860 return (error);
2d21ac55 2861 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
55e303ae
A
2862 if (error)
2863 return (error);
2864
91447636 2865 error = hfs_extendfs(hfsmp, newsize, context);
55e303ae
A
2866 return (error);
2867
2868 } else if (name[0] == HFS_ENCODINGHINT) {
2869 size_t bufsize;
2870 size_t bytes;
2871 u_int32_t hint;
b0d623f7
A
2872 u_int16_t *unicode_name = NULL;
2873 char *filename = NULL;
55e303ae 2874
2d21ac55
A
2875 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2876 return (EINVAL);
2877
55e303ae
A
2878 bufsize = MAX(newlen * 3, MAXPATHLEN);
2879 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
b0d623f7
A
2880 if (filename == NULL) {
2881 error = ENOMEM;
2882 goto encodinghint_exit;
2883 }
55e303ae 2884 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
2885 if (filename == NULL) {
2886 error = ENOMEM;
2887 goto encodinghint_exit;
2888 }
55e303ae
A
2889
2890 error = copyin(newp, (caddr_t)filename, newlen);
2891 if (error == 0) {
2d21ac55 2892 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
55e303ae
A
2893 &bytes, bufsize, 0, UTF_DECOMPOSED);
2894 if (error == 0) {
2895 hint = hfs_pickencoding(unicode_name, bytes / 2);
2d21ac55 2896 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
55e303ae
A
2897 }
2898 }
b0d623f7
A
2899
2900encodinghint_exit:
2901 if (unicode_name)
2902 FREE(unicode_name, M_TEMP);
2903 if (filename)
2904 FREE(filename, M_TEMP);
55e303ae
A
2905 return (error);
2906
2907 } else if (name[0] == HFS_ENABLE_JOURNALING) {
b4c24cb9 2908 // make the file system journaled...
2d21ac55
A
2909 vnode_t vp = vfs_context_cwd(context);
2910 vnode_t jvp;
b4c24cb9 2911 ExtendedVCB *vcb;
b4c24cb9
A
2912 struct cat_attr jnl_attr, jinfo_attr;
2913 struct cat_fork jnl_fork, jinfo_fork;
2914 void *jnl = NULL;
91447636 2915 int lockflags;
d7e50217
A
2916
2917 /* Only root can enable journaling */
91447636 2918 if (!is_suser()) {
d7e50217
A
2919 return (EPERM);
2920 }
2d21ac55 2921 if (vp == NULLVP)
91447636 2922 return EINVAL;
55e303ae 2923
b4c24cb9 2924 hfsmp = VTOHFS(vp);
55e303ae 2925 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
b4c24cb9
A
2926 return EROFS;
2927 }
2928 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2929 printf("hfs: can't make a plain hfs volume journaled.\n");
2930 return EINVAL;
2931 }
2932
2933 if (hfsmp->jnl) {
2d21ac55 2934 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
b4c24cb9
A
2935 return EAGAIN;
2936 }
2937
2938 vcb = HFSTOVCB(hfsmp);
91447636 2939 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
b4c24cb9
A
2940 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2941 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2942
2943 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
91447636 2944 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9
A
2945 return EINVAL;
2946 }
91447636 2947 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9
A
2948
2949 // make sure these both exist!
91447636
A
2950 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2951 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
b4c24cb9
A
2952
2953 return EINVAL;
2954 }
2955
91447636 2956 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
b4c24cb9
A
2957
2958 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2959 (off_t)name[2], (off_t)name[3]);
2960
b0d623f7
A
2961 //
2962 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2963 // enabling the journal on a separate device so it is safe
2964 // to just copy hfs_devvp here. If hfs_util gets the ability
2965 // to dynamically enable the journal on a separate device then
2966 // we will have to do the same thing as hfs_early_journal_init()
2967 // to locate and open the journal device.
2968 //
b4c24cb9
A
2969 jvp = hfsmp->hfs_devvp;
2970 jnl = journal_create(jvp,
2971 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2972 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
55e303ae 2973 (off_t)((unsigned)name[3]),
b4c24cb9 2974 hfsmp->hfs_devvp,
593a1d5f 2975 hfsmp->hfs_logical_block_size,
b4c24cb9
A
2976 0,
2977 0,
2978 hfs_sync_metadata, hfsmp->hfs_mp);
2979
6d2010ae
A
2980 /*
2981 * Set up the trim callback function so that we can add
2982 * recently freed extents to the free extent cache once
2983 * the transaction that freed them is written to the
2984 * journal on disk.
2985 */
2986 if (jnl)
2987 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2988
b4c24cb9
A
2989 if (jnl == NULL) {
2990 printf("hfs: FAILED to create the journal!\n");
2991 if (jvp && jvp != hfsmp->hfs_devvp) {
b0d623f7
A
2992 vnode_clearmountedon(jvp);
2993 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
b4c24cb9
A
2994 }
2995 jvp = NULL;
2996
2997 return EINVAL;
2998 }
2999
6d2010ae
A
3000 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3001
4452a7af
A
3002 /*
3003 * Flush all dirty metadata buffers.
3004 */
6d2010ae
A
3005 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
3006 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
3007 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
3008 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
4452a7af 3009 if (hfsmp->hfs_attribute_vp)
6d2010ae 3010 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
4452a7af 3011
b4c24cb9
A
3012 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
3013 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
3014 hfsmp->jvp = jvp;
3015 hfsmp->jnl = jnl;
3016
3017 // save this off for the hack-y check in hfs_remove()
3018 hfsmp->jnl_start = (u_int32_t)name[2];
55e303ae 3019 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
b4c24cb9
A
3020 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
3021 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
3022
2d21ac55 3023 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
b4c24cb9 3024
6d2010ae 3025 hfs_unlock_global (hfsmp);
b4c24cb9
A
3026 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3027
b0d623f7
A
3028 {
3029 fsid_t fsid;
3030
3031 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3032 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3033 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3034 }
b4c24cb9 3035 return 0;
55e303ae 3036 } else if (name[0] == HFS_DISABLE_JOURNALING) {
b4c24cb9 3037 // clear the journaling bit
2d21ac55 3038 vnode_t vp = vfs_context_cwd(context);
b4c24cb9 3039
d7e50217 3040 /* Only root can disable journaling */
91447636 3041 if (!is_suser()) {
d7e50217
A
3042 return (EPERM);
3043 }
2d21ac55 3044 if (vp == NULLVP)
91447636 3045 return EINVAL;
55e303ae 3046
b4c24cb9 3047 hfsmp = VTOHFS(vp);
b4c24cb9 3048
2d21ac55
A
3049 /*
3050 * Disabling journaling is disallowed on volumes with directory hard links
3051 * because we have not tested the relevant code path.
3052 */
3053 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
3054 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
3055 return EPERM;
3056 }
3057
3058 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
b4c24cb9 3059
6d2010ae 3060 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
b4c24cb9
A
3061
3062 // Lights out for you buddy!
91447636 3063 journal_close(hfsmp->jnl);
b4c24cb9 3064 hfsmp->jnl = NULL;
b4c24cb9
A
3065
3066 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
b0d623f7
A
3067 vnode_clearmountedon(hfsmp->jvp);
3068 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3069 vnode_put(hfsmp->jvp);
b4c24cb9 3070 }
b4c24cb9 3071 hfsmp->jvp = NULL;
2d21ac55 3072 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
b4c24cb9
A
3073 hfsmp->jnl_start = 0;
3074 hfsmp->hfs_jnlinfoblkid = 0;
3075 hfsmp->hfs_jnlfileid = 0;
3076
3077 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
3078
6d2010ae
A
3079 hfs_unlock_global (hfsmp);
3080
b4c24cb9
A
3081 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3082
b0d623f7
A
3083 {
3084 fsid_t fsid;
3085
3086 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3087 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3088 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3089 }
b4c24cb9 3090 return 0;
55e303ae 3091 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2d21ac55 3092 vnode_t vp = vfs_context_cwd(context);
55e303ae
A
3093 off_t jnl_start, jnl_size;
3094
2d21ac55 3095 if (vp == NULLVP)
91447636
A
3096 return EINVAL;
3097
b0d623f7
A
3098 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3099 if (proc_is64bit(current_proc()))
3100 return EINVAL;
3101
55e303ae
A
3102 hfsmp = VTOHFS(vp);
3103 if (hfsmp->jnl == NULL) {
3104 jnl_start = 0;
3105 jnl_size = 0;
3106 } else {
3107 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3108 jnl_size = (off_t)hfsmp->jnl_size;
3109 }
3110
91447636 3111 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
55e303ae
A
3112 return error;
3113 }
91447636 3114 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
55e303ae
A
3115 return error;
3116 }
3117
3118 return 0;
3119 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3120
b0d623f7 3121 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
55e303ae
A
3122
3123 } else if (name[0] == VFS_CTL_QUERY) {
91447636 3124 struct sysctl_req *req;
b0d623f7 3125 union union_vfsidctl vc;
91447636
A
3126 struct mount *mp;
3127 struct vfsquery vq;
91447636 3128
91447636 3129 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
55e303ae 3130
b0d623f7
A
3131 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3132 if (error) return (error);
3133
3134 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
91447636 3135 if (mp == NULL) return (ENOENT);
55e303ae
A
3136
3137 hfsmp = VFSTOHFS(mp);
3138 bzero(&vq, sizeof(vq));
3139 vq.vq_flags = hfsmp->hfs_notification_conditions;
3140 return SYSCTL_OUT(req, &vq, sizeof(vq));;
2d21ac55 3141 } else if (name[0] == HFS_REPLAY_JOURNAL) {
b0d623f7
A
3142 vnode_t devvp = NULL;
3143 int device_fd;
3144 if (namelen != 2) {
3145 return (EINVAL);
2d21ac55 3146 }
b0d623f7
A
3147 device_fd = name[1];
3148 error = file_vnode(device_fd, &devvp);
2d21ac55 3149 if (error) {
2d21ac55
A
3150 return error;
3151 }
b0d623f7
A
3152 error = vnode_getwithref(devvp);
3153 if (error) {
3154 file_drop(device_fd);
3155 return error;
3156 }
3157 error = hfs_journal_replay(devvp, context);
3158 file_drop(device_fd);
3159 vnode_put(devvp);
2d21ac55 3160 return error;
6d2010ae
A
3161 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3162 hfs_resize_debug = 1;
3163 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3164 return 0;
2d21ac55 3165 }
9bccf70c 3166
91447636 3167 return (ENOTSUP);
1c79356b
A
3168}
3169
b0d623f7
A
3170/*
3171 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3172 * the build_path ioctl. We use it to leverage the code below that updates
3173 * the origin list cache if necessary
935ed37a 3174 */
b0d623f7 3175
935ed37a 3176int
91447636
A
3177hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3178{
2d21ac55 3179 int error;
935ed37a
A
3180 int lockflags;
3181 struct hfsmount *hfsmp;
3182
3183 hfsmp = VFSTOHFS(mp);
2d21ac55 3184
6d2010ae 3185 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
2d21ac55
A
3186 if (error)
3187 return (error);
3188
3189 /*
3190 * ADLs may need to have their origin state updated
b0d623f7
A
3191 * since build_path needs a valid parent. The same is true
3192 * for hardlinked files as well. There isn't a race window here
3193 * in re-acquiring the cnode lock since we aren't pulling any data
3194 * out of the cnode; instead, we're going to the catalog.
2d21ac55 3195 */
935ed37a 3196 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
2d21ac55
A
3197 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3198 cnode_t *cp = VTOC(*vpp);
3199 struct cat_desc cdesc;
3200
935ed37a
A
3201 if (!hfs_haslinkorigin(cp)) {
3202 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
b0d623f7 3203 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
935ed37a
A
3204 hfs_systemfile_unlock(hfsmp, lockflags);
3205 if (error == 0) {
b0d623f7
A
3206 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3207 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
935ed37a
A
3208 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3209 }
3210 cat_releasedesc(&cdesc);
2d21ac55 3211 }
2d21ac55
A
3212 }
3213 hfs_unlock(cp);
3214 }
3215 return (0);
91447636
A
3216}
3217
3218
3219/*
3220 * Look up an HFS object by ID.
3221 *
3222 * The object is returned with an iocount reference and the cnode locked.
3223 *
3224 * If the object is a file then it will represent the data fork.
3225 */
91447636 3226int
6d2010ae 3227hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
9bccf70c 3228{
2d21ac55 3229 struct vnode *vp = NULLVP;
91447636
A
3230 struct cat_desc cndesc;
3231 struct cat_attr cnattr;
3232 struct cat_fork cnfork;
743b1565 3233 u_int32_t linkref = 0;
91447636 3234 int error;
9bccf70c
A
3235
3236 /* Check for cnids that should't be exported. */
2d21ac55
A
3237 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3238 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
9bccf70c 3239 return (ENOENT);
2d21ac55
A
3240 }
3241 /* Don't export our private directories. */
3242 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3243 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
9bccf70c 3244 return (ENOENT);
2d21ac55 3245 }
91447636
A
3246 /*
3247 * Check the hash first
3248 */
6d2010ae 3249 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
91447636
A
3250 if (vp) {
3251 *vpp = vp;
3252 return(0);
3253 }
9bccf70c 3254
91447636
A
3255 bzero(&cndesc, sizeof(cndesc));
3256 bzero(&cnattr, sizeof(cnattr));
3257 bzero(&cnfork, sizeof(cnfork));
55e303ae 3258
91447636
A
3259 /*
3260 * Not in hash, lookup in catalog
3261 */
3262 if (cnid == kHFSRootParentID) {
3263 static char hfs_rootname[] = "/";
3264
2d21ac55 3265 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
91447636
A
3266 cndesc.cd_namelen = 1;
3267 cndesc.cd_parentcnid = kHFSRootParentID;
3268 cndesc.cd_cnid = kHFSRootFolderID;
3269 cndesc.cd_flags = CD_ISDIR;
3270
3271 cnattr.ca_fileid = kHFSRootFolderID;
2d21ac55 3272 cnattr.ca_linkcount = 1;
91447636 3273 cnattr.ca_entries = 1;
2d21ac55 3274 cnattr.ca_dircount = 1;
91447636
A
3275 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3276 } else {
3277 int lockflags;
2d21ac55
A
3278 cnid_t pid;
3279 const char *nameptr;
55e303ae 3280
91447636 3281 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
2d21ac55 3282 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
91447636
A
3283 hfs_systemfile_unlock(hfsmp, lockflags);
3284
3285 if (error) {
3286 *vpp = NULL;
3287 return (error);
55e303ae 3288 }
91447636 3289
743b1565 3290 /*
2d21ac55 3291 * Check for a raw hardlink inode and save its linkref.
743b1565 3292 */
2d21ac55
A
3293 pid = cndesc.cd_parentcnid;
3294 nameptr = (const char *)cndesc.cd_nameptr;
3295
3296 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3297 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3298 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3299
3300 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3301 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3302 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3303
3304 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3305 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3306 *vpp = NULL;
4a3eedf9 3307 cat_releasedesc(&cndesc);
2d21ac55 3308 return (ENOENT); /* open unlinked file */
91447636
A
3309 }
3310 }
3311
3312 /*
2d21ac55
A
3313 * Finish initializing cnode descriptor for hardlinks.
3314 *
3315 * We need a valid name and parent for reverse lookups.
91447636 3316 */
2d21ac55
A
3317 if (linkref) {
3318 cnid_t nextlinkid;
3319 cnid_t prevlinkid;
3320 struct cat_desc linkdesc;
935ed37a 3321 int lockflags;
2d21ac55
A
3322
3323 cnattr.ca_linkref = linkref;
91447636 3324
2d21ac55
A
3325 /*
3326 * Pick up the first link in the chain and get a descriptor for it.
3327 * This allows blind volfs paths to work for hardlinks.
3328 */
6d2010ae 3329 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) &&
2d21ac55 3330 (nextlinkid != 0)) {
935ed37a
A
3331 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3332 error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3333 hfs_systemfile_unlock(hfsmp, lockflags);
3334 if (error == 0) {
2d21ac55
A
3335 cat_releasedesc(&cndesc);
3336 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3337 }
3338 }
743b1565
A
3339 }
3340
2d21ac55 3341 if (linkref) {
6d2010ae
A
3342 int newvnode_flags = 0;
3343
3344 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3345 &cnfork, &vp, &newvnode_flags);
2d21ac55
A
3346 if (error == 0) {
3347 VTOC(vp)->c_flag |= C_HARDLINK;
3348 vnode_setmultipath(vp);
3349 }
3350 } else {
3351 struct componentname cn;
6d2010ae 3352 int newvnode_flags = 0;
2d21ac55
A
3353
3354 /* Supply hfs_getnewvnode with a component name. */
3355 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3356 cn.cn_nameiop = LOOKUP;
3357 cn.cn_flags = ISLASTCN | HASBUF;
3358 cn.cn_context = NULL;
3359 cn.cn_pnlen = MAXPATHLEN;
3360 cn.cn_nameptr = cn.cn_pnbuf;
3361 cn.cn_namelen = cndesc.cd_namelen;
3362 cn.cn_hash = 0;
3363 cn.cn_consume = 0;
3364 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3365
6d2010ae
A
3366 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3367 &cnfork, &vp, &newvnode_flags);
91447636 3368
b0d623f7 3369 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
2d21ac55
A
3370 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3371 }
3372 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3373 }
91447636 3374 cat_releasedesc(&cndesc);
2d21ac55 3375
91447636 3376 *vpp = vp;
2d21ac55 3377 if (vp && skiplock) {
91447636 3378 hfs_unlock(VTOC(vp));
2d21ac55 3379 }
91447636 3380 return (error);
55e303ae
A
3381}
3382
91447636 3383
9bccf70c
A
3384/*
3385 * Flush out all the files in a filesystem.
3386 */
55e303ae 3387static int
2d21ac55 3388#if QUOTA
9bccf70c 3389hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
2d21ac55
A
3390#else
3391hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3392#endif /* QUOTA */
1c79356b 3393{
55e303ae
A
3394 struct hfsmount *hfsmp;
3395 struct vnode *skipvp = NULLVP;
2d21ac55
A
3396 int error;
3397#if QUOTA
55e303ae 3398 int quotafilecnt;
9bccf70c 3399 int i;
2d21ac55 3400#endif
1c79356b 3401
9bccf70c 3402 hfsmp = VFSTOHFS(mp);
1c79356b 3403
55e303ae
A
3404#if QUOTA
3405 /*
3406 * The open quota files have an indirect reference on
3407 * the root directory vnode. We must account for this
3408 * extra reference when doing the intial vflush.
3409 */
3410 quotafilecnt = 0;
91447636 3411 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
55e303ae
A
3412
3413 /* Find out how many quota files we have open. */
3414 for (i = 0; i < MAXQUOTAS; i++) {
3415 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3416 ++quotafilecnt;
3417 }
3418
3419 /* Obtain the root vnode so we can skip over it. */
6d2010ae 3420 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
55e303ae
A
3421 }
3422#endif /* QUOTA */
3423
3424 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
91447636
A
3425 if (error != 0)
3426 return(error);
3427
55e303ae
A
3428 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3429
3430#if QUOTA
91447636 3431 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
55e303ae
A
3432 if (skipvp) {
3433 /*
3434 * See if there are additional references on the
3435 * root vp besides the ones obtained from the open
91447636 3436 * quota files and the hfs_chash_getvnode call above.
55e303ae
A
3437 */
3438 if ((error == 0) &&
91447636 3439 (vnode_isinuse(skipvp, quotafilecnt))) {
55e303ae
A
3440 error = EBUSY; /* root directory is still open */
3441 }
91447636
A
3442 hfs_unlock(VTOC(skipvp));
3443 vnode_put(skipvp);
55e303ae
A
3444 }
3445 if (error && (flags & FORCECLOSE) == 0)
9bccf70c 3446 return (error);
55e303ae 3447
9bccf70c
A
3448 for (i = 0; i < MAXQUOTAS; i++) {
3449 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3450 continue;
3451 hfs_quotaoff(p, mp, i);
3452 }
55e303ae 3453 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
1c79356b 3454 }
9bccf70c 3455#endif /* QUOTA */
1c79356b 3456
9bccf70c
A
3457 return (error);
3458}
1c79356b 3459
9bccf70c
A
3460/*
3461 * Update volume encoding bitmap (HFS Plus only)
3462 */
3463__private_extern__
3464void
3465hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3466{
3467#define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3468#define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3469
2d21ac55 3470 u_int32_t index;
9bccf70c
A
3471
3472 switch (encoding) {
3473 case kTextEncodingMacUkrainian:
3474 index = kIndexMacUkrainian;
3475 break;
3476 case kTextEncodingMacFarsi:
3477 index = kIndexMacFarsi;
3478 break;
3479 default:
3480 index = encoding;
3481 break;
3482 }
1c79356b 3483
2d21ac55 3484 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
91447636
A
3485 HFS_MOUNT_LOCK(hfsmp, TRUE)
3486 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
2d21ac55 3487 MarkVCBDirty(hfsmp);
91447636 3488 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
9bccf70c 3489 }
1c79356b
A
3490}
3491
3492/*
9bccf70c 3493 * Update volume stats
91447636
A
3494 *
3495 * On journal volumes this will cause a volume header flush
1c79356b
A
3496 */
3497int
9bccf70c 3498hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
1c79356b 3499{
91447636 3500 struct timeval tv;
1c79356b 3501
91447636
A
3502 microtime(&tv);
3503
3504 lck_mtx_lock(&hfsmp->hfs_mutex);
3505
2d21ac55 3506 MarkVCBDirty(hfsmp);
91447636 3507 hfsmp->hfs_mtime = tv.tv_sec;
9bccf70c
A
3508
3509 switch (op) {
3510 case VOL_UPDATE:
3511 break;
3512 case VOL_MKDIR:
91447636
A
3513 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3514 ++hfsmp->hfs_dircount;
3515 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3516 ++hfsmp->vcbNmRtDirs;
9bccf70c
A
3517 break;
3518 case VOL_RMDIR:
91447636
A
3519 if (hfsmp->hfs_dircount != 0)
3520 --hfsmp->hfs_dircount;
3521 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3522 --hfsmp->vcbNmRtDirs;
9bccf70c
A
3523 break;
3524 case VOL_MKFILE:
91447636
A
3525 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3526 ++hfsmp->hfs_filecount;
3527 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3528 ++hfsmp->vcbNmFls;
9bccf70c
A
3529 break;
3530 case VOL_RMFILE:
91447636
A
3531 if (hfsmp->hfs_filecount != 0)
3532 --hfsmp->hfs_filecount;
3533 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3534 --hfsmp->vcbNmFls;
9bccf70c
A
3535 break;
3536 }
b4c24cb9 3537
91447636
A
3538 lck_mtx_unlock(&hfsmp->hfs_mutex);
3539
b4c24cb9
A
3540 if (hfsmp->jnl) {
3541 hfs_flushvolumeheader(hfsmp, 0, 0);
3542 }
3543
9bccf70c 3544 return (0);
1c79356b
A
3545}
3546
9bccf70c
A
3547
3548static int
3549hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
1c79356b 3550{
9bccf70c
A
3551 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3552 struct filefork *fp;
1c79356b 3553 HFSMasterDirectoryBlock *mdb;
9bccf70c
A
3554 struct buf *bp = NULL;
3555 int retval;
316670eb 3556 int sector_size;
9bccf70c 3557 ByteCount namelen;
1c79356b 3558
316670eb
A
3559 sector_size = hfsmp->hfs_logical_block_size;
3560 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
1c79356b 3561 if (retval) {
9bccf70c 3562 if (bp)
91447636 3563 buf_brelse(bp);
1c79356b
A
3564 return retval;
3565 }
3566
91447636 3567 lck_mtx_lock(&hfsmp->hfs_mutex);
b4c24cb9 3568
316670eb 3569 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
1c79356b 3570
6d2010ae 3571 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
9bccf70c
A
3572 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3573 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
1c79356b
A
3574 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3575 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3576 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3577 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3578 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3579
2d21ac55 3580 namelen = strlen((char *)vcb->vcbVN);
1c79356b
A
3581 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3582 /* Retry with MacRoman in case that's how it was exported. */
3583 if (retval)
3584 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3585
9bccf70c 3586 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
1c79356b
A
3587 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3588 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3589 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3590 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3591
3592 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3593
9bccf70c
A
3594 fp = VTOF(vcb->extentsRefNum);
3595 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3596 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3597 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3598 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3599 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3600 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3601 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3602 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
91447636 3603 FTOC(fp)->c_flag &= ~C_MODIFIED;
1c79356b 3604
9bccf70c
A
3605 fp = VTOF(vcb->catalogRefNum);
3606 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3607 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3608 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3609 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3610 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3611 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3612 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3613 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
91447636
A
3614 FTOC(fp)->c_flag &= ~C_MODIFIED;
3615
3616 MarkVCBClean( vcb );
3617
3618 lck_mtx_unlock(&hfsmp->hfs_mutex);
9bccf70c
A
3619
3620 /* If requested, flush out the alternate MDB */
3621 if (altflush) {
3622 struct buf *alt_bp = NULL;
9bccf70c 3623
316670eb
A
3624 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) {
3625 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
b4c24cb9 3626
91447636 3627 (void) VNOP_BWRITE(alt_bp);
9bccf70c 3628 } else if (alt_bp)
91447636 3629 buf_brelse(alt_bp);
9bccf70c 3630 }
1c79356b 3631
9bccf70c 3632 if (waitfor != MNT_WAIT)
91447636 3633 buf_bawrite(bp);
b4c24cb9 3634 else
91447636 3635 retval = VNOP_BWRITE(bp);
1c79356b
A
3636
3637 return (retval);
3638}
3639
55e303ae
A
3640/*
3641 * Flush any dirty in-memory mount data to the on-disk
3642 * volume header.
3643 *
3644 * Note: the on-disk volume signature is intentionally
3645 * not flushed since the on-disk "H+" and "HX" signatures
3646 * are always stored in-memory as "H+".
3647 */
9bccf70c
A
3648int
3649hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
1c79356b 3650{
9bccf70c
A
3651 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3652 struct filefork *fp;
b0d623f7 3653 HFSPlusVolumeHeader *volumeHeader, *altVH;
9bccf70c 3654 int retval;
b0d623f7 3655 struct buf *bp, *alt_bp;
9bccf70c 3656 int i;
91447636 3657 daddr64_t priIDSector;
2d21ac55 3658 int critical;
55e303ae 3659 u_int16_t signature;
91447636 3660 u_int16_t hfsversion;
1c79356b 3661
55e303ae
A
3662 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3663 return(0);
3664 }
2d21ac55 3665 if (hfsmp->hfs_flags & HFS_STANDARD) {
9bccf70c 3666 return hfs_flushMDB(hfsmp, waitfor, altflush);
2d21ac55
A
3667 }
3668 critical = altflush;
593a1d5f
A
3669 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3670 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
d52fe63f 3671
91447636
A
3672 if (hfs_start_transaction(hfsmp) != 0) {
3673 return EINVAL;
b4c24cb9
A
3674 }
3675
b0d623f7
A
3676 bp = NULL;
3677 alt_bp = NULL;
3678
593a1d5f
A
3679 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3680 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3681 hfsmp->hfs_physical_block_size, NOCRED, &bp);
1c79356b 3682 if (retval) {
b0d623f7
A
3683 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3684 goto err_exit;
b4c24cb9
A
3685 }
3686
593a1d5f
A
3687 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3688 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
1c79356b 3689
55e303ae 3690 /*
b0d623f7
A
3691 * Sanity check what we just read. If it's bad, try the alternate
3692 * instead.
55e303ae
A
3693 */
3694 signature = SWAP_BE16 (volumeHeader->signature);
91447636 3695 hfsversion = SWAP_BE16 (volumeHeader->version);
55e303ae 3696 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
91447636 3697 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
55e303ae 3698 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
b0d623f7 3699 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
91447636 3700 vcb->vcbVN, signature, hfsversion,
b0d623f7
A
3701 SWAP_BE32 (volumeHeader->blockSize),
3702 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3703 hfs_mark_volume_inconsistent(hfsmp);
3704
3705 if (hfsmp->hfs_alt_id_sector) {
3706 retval = buf_meta_bread(hfsmp->hfs_devvp,
3707 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3708 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3709 if (retval) {
3710 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3711 goto err_exit;
3712 }
3713
3714 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3715 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3716 signature = SWAP_BE16(altVH->signature);
3717 hfsversion = SWAP_BE16(altVH->version);
3718
3719 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3720 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3721 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3722 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3723 vcb->vcbVN, signature, hfsversion,
3724 SWAP_BE32(altVH->blockSize));
3725 retval = EIO;
3726 goto err_exit;
3727 }
3728
3729 /* The alternate is plausible, so use it. */
3730 bcopy(altVH, volumeHeader, kMDBSize);
3731 buf_brelse(alt_bp);
3732 alt_bp = NULL;
3733 } else {
3734 /* No alternate VH, nothing more we can do. */
3735 retval = EIO;
3736 goto err_exit;
3737 }
3738 }
3739
3740 if (hfsmp->jnl) {
3741 journal_modify_block_start(hfsmp->jnl, bp);
55e303ae
A
3742 }
3743
1c79356b
A
3744 /*
3745 * For embedded HFS+ volumes, update create date if it changed
3746 * (ie from a setattrlist call)
3747 */
9bccf70c
A
3748 if ((vcb->hfsPlusIOPosOffset != 0) &&
3749 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3750 struct buf *bp2;
1c79356b
A
3751 HFSMasterDirectoryBlock *mdb;
3752
593a1d5f
A
3753 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3754 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3755 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
9bccf70c
A
3756 if (retval) {
3757 if (bp2)
91447636 3758 buf_brelse(bp2);
9bccf70c 3759 retval = 0;
1c79356b 3760 } else {
91447636 3761 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
593a1d5f 3762 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
1c79356b
A
3763
3764 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3765 {
b4c24cb9
A
3766 if (hfsmp->jnl) {
3767 journal_modify_block_start(hfsmp->jnl, bp2);
3768 }
3769
1c79356b
A
3770 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3771
b4c24cb9 3772 if (hfsmp->jnl) {
2d21ac55 3773 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
b4c24cb9 3774 } else {
91447636 3775 (void) VNOP_BWRITE(bp2); /* write out the changes */
b4c24cb9 3776 }
1c79356b
A
3777 }
3778 else
3779 {
91447636 3780 buf_brelse(bp2); /* just release it */
1c79356b
A
3781 }
3782 }
9bccf70c 3783 }
1c79356b 3784
2d21ac55 3785 lck_mtx_lock(&hfsmp->hfs_mutex);
91447636 3786
1c79356b 3787 /* Note: only update the lower 16 bits worth of attributes */
91447636
A
3788 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3789 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
b4c24cb9
A
3790 if (hfsmp->jnl) {
3791 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3792 } else {
3793 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3794 }
9bccf70c
A
3795 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3796 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3797 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3798 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3799 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
91447636 3800 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
9bccf70c
A
3801 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3802 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3803 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3804 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3805 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3806 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3807 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3808
91447636
A
3809 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3810 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
9bccf70c 3811 critical = 1;
91447636 3812 }
9bccf70c 3813
2d21ac55
A
3814 /*
3815 * System files are only dirty when altflush is set.
3816 */
3817 if (altflush == 0) {
3818 goto done;
3819 }
3820
9bccf70c
A
3821 /* Sync Extents over-flow file meta data */
3822 fp = VTOF(vcb->extentsRefNum);
91447636
A
3823 if (FTOC(fp)->c_flag & C_MODIFIED) {
3824 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3825 volumeHeader->extentsFile.extents[i].startBlock =
3826 SWAP_BE32 (fp->ff_extents[i].startBlock);
3827 volumeHeader->extentsFile.extents[i].blockCount =
3828 SWAP_BE32 (fp->ff_extents[i].blockCount);
3829 }
3830 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3831 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3832 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3833 FTOC(fp)->c_flag &= ~C_MODIFIED;
9bccf70c 3834 }
9bccf70c
A
3835
3836 /* Sync Catalog file meta data */
3837 fp = VTOF(vcb->catalogRefNum);
91447636
A
3838 if (FTOC(fp)->c_flag & C_MODIFIED) {
3839 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3840 volumeHeader->catalogFile.extents[i].startBlock =
3841 SWAP_BE32 (fp->ff_extents[i].startBlock);
3842 volumeHeader->catalogFile.extents[i].blockCount =
3843 SWAP_BE32 (fp->ff_extents[i].blockCount);
3844 }
3845 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3846 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3847 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3848 FTOC(fp)->c_flag &= ~C_MODIFIED;
9bccf70c 3849 }
9bccf70c
A
3850
3851 /* Sync Allocation file meta data */
3852 fp = VTOF(vcb->allocationsRefNum);
91447636
A
3853 if (FTOC(fp)->c_flag & C_MODIFIED) {
3854 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3855 volumeHeader->allocationFile.extents[i].startBlock =
3856 SWAP_BE32 (fp->ff_extents[i].startBlock);
3857 volumeHeader->allocationFile.extents[i].blockCount =
3858 SWAP_BE32 (fp->ff_extents[i].blockCount);
3859 }
3860 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3861 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3862 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3863 FTOC(fp)->c_flag &= ~C_MODIFIED;
3864 }
3865
3866 /* Sync Attribute file meta data */
3867 if (hfsmp->hfs_attribute_vp) {
3868 fp = VTOF(hfsmp->hfs_attribute_vp);
3869 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3870 volumeHeader->attributesFile.extents[i].startBlock =
3871 SWAP_BE32 (fp->ff_extents[i].startBlock);
3872 volumeHeader->attributesFile.extents[i].blockCount =
3873 SWAP_BE32 (fp->ff_extents[i].blockCount);
3874 }
3875 FTOC(fp)->c_flag &= ~C_MODIFIED;
3876 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3877 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3878 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3879 }
3880
2d21ac55
A
3881 /* Sync Startup file meta data */
3882 if (hfsmp->hfs_startup_vp) {
3883 fp = VTOF(hfsmp->hfs_startup_vp);
3884 if (FTOC(fp)->c_flag & C_MODIFIED) {
3885 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3886 volumeHeader->startupFile.extents[i].startBlock =
3887 SWAP_BE32 (fp->ff_extents[i].startBlock);
3888 volumeHeader->startupFile.extents[i].blockCount =
3889 SWAP_BE32 (fp->ff_extents[i].blockCount);
3890 }
3891 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3892 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3893 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3894 FTOC(fp)->c_flag &= ~C_MODIFIED;
3895 }
9bccf70c 3896 }
9bccf70c 3897
2d21ac55
A
3898done:
3899 MarkVCBClean(hfsmp);
3900 lck_mtx_unlock(&hfsmp->hfs_mutex);
3901
9bccf70c 3902 /* If requested, flush out the alternate volume header */
91447636 3903 if (altflush && hfsmp->hfs_alt_id_sector) {
593a1d5f
A
3904 if (buf_meta_bread(hfsmp->hfs_devvp,
3905 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3906 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
b4c24cb9
A
3907 if (hfsmp->jnl) {
3908 journal_modify_block_start(hfsmp->jnl, alt_bp);
3909 }
3910
593a1d5f
A
3911 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3912 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3913 kMDBSize);
b4c24cb9
A
3914
3915 if (hfsmp->jnl) {
2d21ac55 3916 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
b4c24cb9 3917 } else {
91447636 3918 (void) VNOP_BWRITE(alt_bp);
b4c24cb9 3919 }
9bccf70c 3920 } else if (alt_bp)
91447636 3921 buf_brelse(alt_bp);
9bccf70c
A
3922 }
3923
b4c24cb9 3924 if (hfsmp->jnl) {
2d21ac55 3925 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
b4c24cb9
A
3926 } else {
3927 if (waitfor != MNT_WAIT)
91447636 3928 buf_bawrite(bp);
b4c24cb9 3929 else {
91447636 3930 retval = VNOP_BWRITE(bp);
b4c24cb9
A
3931 /* When critical data changes, flush the device cache */
3932 if (critical && (retval == 0)) {
91447636
A
3933 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3934 NULL, FWRITE, NULL);
b4c24cb9 3935 }
9bccf70c
A
3936 }
3937 }
91447636 3938 hfs_end_transaction(hfsmp);
1c79356b 3939
1c79356b 3940 return (retval);
b0d623f7
A
3941
3942err_exit:
3943 if (alt_bp)
3944 buf_brelse(alt_bp);
3945 if (bp)
3946 buf_brelse(bp);
3947 hfs_end_transaction(hfsmp);
3948 return retval;
1c79356b
A
3949}
3950
3951
55e303ae
A
3952/*
3953 * Extend a file system.
3954 */
91447636
A
3955int
3956hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
55e303ae 3957{
91447636
A
3958 struct proc *p = vfs_context_proc(context);
3959 kauth_cred_t cred = vfs_context_ucred(context);
55e303ae
A
3960 struct vnode *vp;
3961 struct vnode *devvp;
3962 struct buf *bp;
55e303ae
A
3963 struct filefork *fp = NULL;
3964 ExtendedVCB *vcb;
3965 struct cat_fork forkdata;
3966 u_int64_t oldsize;
3967 u_int64_t newblkcnt;
91447636 3968 u_int64_t prev_phys_block_count;
55e303ae 3969 u_int32_t addblks;
316670eb
A
3970 u_int64_t sector_count;
3971 u_int32_t sector_size;
3972 u_int32_t phys_sector_size;
3973 u_int32_t overage_blocks;
91447636
A
3974 daddr64_t prev_alt_sector;
3975 daddr_t bitmapblks;
d1ecb069 3976 int lockflags = 0;
55e303ae 3977 int error;
2d21ac55
A
3978 int64_t oldBitmapSize;
3979 Boolean usedExtendFileC = false;
d1ecb069 3980 int transaction_begun = 0;
2d21ac55 3981
55e303ae
A
3982 devvp = hfsmp->hfs_devvp;
3983 vcb = HFSTOVCB(hfsmp);
3984
3985 /*
3986 * - HFS Plus file systems only.
3987 * - Journaling must be enabled.
3988 * - No embedded volumes.
3989 */
3990 if ((vcb->vcbSigWord == kHFSSigWord) ||
3991 (hfsmp->jnl == NULL) ||
3992 (vcb->hfsPlusIOPosOffset != 0)) {
3993 return (EPERM);
3994 }
3995 /*
3996 * If extending file system by non-root, then verify
3997 * ownership and check permissions.
3998 */
91447636 3999 if (suser(cred, NULL)) {
6d2010ae 4000 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
91447636 4001
55e303ae
A
4002 if (error)
4003 return (error);
91447636 4004 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
55e303ae 4005 if (error == 0) {
91447636 4006 error = hfs_write_access(vp, cred, p, false);
55e303ae 4007 }
91447636
A
4008 hfs_unlock(VTOC(vp));
4009 vnode_put(vp);
55e303ae
A
4010 if (error)
4011 return (error);
4012
91447636 4013 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
55e303ae
A
4014 if (error)
4015 return (error);
4016 }
316670eb 4017 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
55e303ae
A
4018 return (ENXIO);
4019 }
316670eb 4020 if (sector_size != hfsmp->hfs_logical_block_size) {
55e303ae
A
4021 return (ENXIO);
4022 }
316670eb 4023 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
55e303ae
A
4024 return (ENXIO);
4025 }
316670eb 4026 if ((sector_size * sector_count) < newsize) {
55e303ae
A
4027 printf("hfs_extendfs: not enough space on device\n");
4028 return (ENOSPC);
4029 }
316670eb 4030 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
593a1d5f
A
4031 if (error) {
4032 if ((error != ENOTSUP) && (error != ENOTTY)) {
4033 return (ENXIO);
4034 }
4035 /* If ioctl is not supported, force physical and logical sector size to be same */
316670eb 4036 phys_sector_size = sector_size;
593a1d5f 4037 }
91447636 4038 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
55e303ae
A
4039
4040 /*
4041 * Validate new size.
4042 */
316670eb 4043 if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
55e303ae
A
4044 printf("hfs_extendfs: invalid size\n");
4045 return (EINVAL);
4046 }
4047 newblkcnt = newsize / vcb->blockSize;
4048 if (newblkcnt > (u_int64_t)0xFFFFFFFF)
4049 return (EOVERFLOW);
4050
4051 addblks = newblkcnt - vcb->totalBlocks;
4052
6d2010ae
A
4053 if (hfs_resize_debug) {
4054 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
4055 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
4056 }
4057 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
d1ecb069
A
4058
4059 HFS_MOUNT_LOCK(hfsmp, TRUE);
4060 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4061 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4062 error = EALREADY;
4063 goto out;
4064 }
4065 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4066 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
316670eb 4067
e2d2fc5c
A
4068 /* Start with a clean journal. */
4069 hfs_journal_flush(hfsmp, TRUE);
4070
55e303ae
A
4071 /*
4072 * Enclose changes inside a transaction.
4073 */
91447636 4074 if (hfs_start_transaction(hfsmp) != 0) {
d1ecb069
A
4075 error = EINVAL;
4076 goto out;
55e303ae 4077 }
d1ecb069 4078 transaction_begun = 1;
55e303ae 4079
316670eb
A
4080
4081 /* Update the hfsmp fields for the physical information about the device */
4082 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4083 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4084
4085 hfsmp->hfs_logical_block_count = sector_count;
4086 /*
4087 * Note that the new AltVH location must be based on the device's EOF rather than the new
4088 * filesystem's EOF, so we use logical_block_count here rather than newsize.
4089 */
4090 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
4091 HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
4092 hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
4093
4094
2d21ac55
A
4095 /*
4096 * Note: we take the attributes lock in case we have an attribute data vnode
4097 * which needs to change size.
4098 */
4099 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
55e303ae 4100 vp = vcb->allocationsRefNum;
55e303ae
A
4101 fp = VTOF(vp);
4102 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
4103
4104 /*
4105 * Calculate additional space required (if any) by allocation bitmap.
4106 */
2d21ac55
A
4107 oldBitmapSize = fp->ff_size;
4108 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
91447636 4109 if (bitmapblks > (daddr_t)fp->ff_blocks)
55e303ae
A
4110 bitmapblks -= fp->ff_blocks;
4111 else
4112 bitmapblks = 0;
4113
6d2010ae
A
4114 /*
4115 * The allocation bitmap can contain unused bits that are beyond end of
4116 * current volume's allocation blocks. Usually they are supposed to be
4117 * zero'ed out but there can be cases where they might be marked as used.
4118 * After extending the file system, those bits can represent valid
4119 * allocation blocks, so we mark all the bits from the end of current
4120 * volume to end of allocation bitmap as "free".
316670eb
A
4121 *
4122 * Figure out the number of overage blocks before proceeding though,
4123 * so we don't add more bytes to our I/O than necessary.
4124 * First figure out the total number of blocks representable by the
4125 * end of the bitmap file vs. the total number of blocks in the new FS.
4126 * Then subtract away the number of blocks in the current FS. This is how much
4127 * we can mark as free right now without having to grow the bitmap file.
6d2010ae 4128 */
316670eb
A
4129 overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
4130 overage_blocks = MIN (overage_blocks, newblkcnt);
4131 overage_blocks -= vcb->totalBlocks;
4132
4133 BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
6d2010ae 4134
55e303ae 4135 if (bitmapblks > 0) {
91447636 4136 daddr64_t blkno;
55e303ae 4137 daddr_t blkcnt;
2d21ac55 4138 off_t bytesAdded;
55e303ae
A
4139
4140 /*
2d21ac55
A
4141 * Get the bitmap's current size (in allocation blocks) so we know
4142 * where to start zero filling once the new space is added. We've
4143 * got to do this before the bitmap is grown.
55e303ae 4144 */
91447636 4145 blkno = (daddr64_t)fp->ff_blocks;
2d21ac55
A
4146
4147 /*
4148 * Try to grow the allocation file in the normal way, using allocation
4149 * blocks already existing in the file system. This way, we might be
4150 * able to grow the bitmap contiguously, or at least in the metadata
4151 * zone.
4152 */
4153 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
6d2010ae
A
4154 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4155 | kEFMetadataMask | kEFContigMask, &bytesAdded);
2d21ac55
A
4156
4157 if (error == 0) {
4158 usedExtendFileC = true;
4159 } else {
4160 /*
4161 * If the above allocation failed, fall back to allocating the new
4162 * extent of the bitmap from the space we're going to add. Since those
4163 * blocks don't yet belong to the file system, we have to update the
4164 * extent list directly, and manually adjust the file size.
4165 */
4166 bytesAdded = 0;
4167 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4168 if (error) {
4169 printf("hfs_extendfs: error %d adding extents\n", error);
4170 goto out;
4171 }
4172 fp->ff_blocks += bitmapblks;
4173 VTOC(vp)->c_blocks = fp->ff_blocks;
4174 VTOC(vp)->c_flag |= C_MODIFIED;
4175 }
4176
4177 /*
4178 * Update the allocation file's size to include the newly allocated
4179 * blocks. Note that ExtendFileC doesn't do this, which is why this
4180 * statement is outside the above "if" statement.
4181 */
55e303ae 4182 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
2d21ac55 4183
55e303ae
A
4184 /*
4185 * Zero out the new bitmap blocks.
4186 */
4187 {
4188
4189 bp = NULL;
2d21ac55 4190 blkcnt = bitmapblks;
55e303ae 4191 while (blkcnt > 0) {
91447636 4192 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
55e303ae
A
4193 if (error) {
4194 if (bp) {
91447636 4195 buf_brelse(bp);
55e303ae
A
4196 }
4197 break;
4198 }
91447636
A
4199 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4200 buf_markaged(bp);
4201 error = (int)buf_bwrite(bp);
55e303ae
A
4202 if (error)
4203 break;
4204 --blkcnt;
4205 ++blkno;
4206 }
4207 }
4208 if (error) {
4209 printf("hfs_extendfs: error %d clearing blocks\n", error);
4210 goto out;
4211 }
4212 /*
4213 * Mark the new bitmap space as allocated.
2d21ac55
A
4214 *
4215 * Note that ExtendFileC will have marked any blocks it allocated, so
4216 * this is only needed if we used AddFileExtent. Also note that this
4217 * has to come *after* the zero filling of new blocks in the case where
4218 * we used AddFileExtent (since the part of the bitmap we're touching
4219 * is in those newly allocated blocks).
55e303ae 4220 */
2d21ac55
A
4221 if (!usedExtendFileC) {
4222 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4223 if (error) {
4224 printf("hfs_extendfs: error %d setting bitmap\n", error);
4225 goto out;
4226 }
4227 vcb->freeBlocks -= bitmapblks;
55e303ae
A
4228 }
4229 }
4230 /*
4231 * Mark the new alternate VH as allocated.
4232 */
4233 if (vcb->blockSize == 512)
4234 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4235 else
4236 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4237 if (error) {
4238 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4239 goto out;
4240 }
4241 /*
4242 * Mark the old alternate VH as free.
4243 */
4244 if (vcb->blockSize == 512)
4245 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4246 else
4247 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
55e303ae
A
4248 /*
4249 * Adjust file system variables for new space.
4250 */
4251 vcb->totalBlocks += addblks;
2d21ac55 4252 vcb->freeBlocks += addblks;
55e303ae
A
4253 MarkVCBDirty(vcb);
4254 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4255 if (error) {
4256 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4257 /*
4258 * Restore to old state.
4259 */
2d21ac55 4260 if (usedExtendFileC) {
6d2010ae
A
4261 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4262 FTOC(fp)->c_fileid, false);
2d21ac55
A
4263 } else {
4264 fp->ff_blocks -= bitmapblks;
4265 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4266 /*
4267 * No need to mark the excess blocks free since those bitmap blocks
4268 * are no longer part of the bitmap. But we do need to undo the
4269 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4270 */
4271 vcb->freeBlocks += bitmapblks;
4272 }
55e303ae 4273 vcb->totalBlocks -= addblks;
2d21ac55 4274 vcb->freeBlocks -= addblks;
593a1d5f 4275 hfsmp->hfs_logical_block_count = prev_phys_block_count;
91447636 4276 hfsmp->hfs_alt_id_sector = prev_alt_sector;
55e303ae 4277 MarkVCBDirty(vcb);
6d2010ae
A
4278 if (vcb->blockSize == 512) {
4279 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4280 hfs_mark_volume_inconsistent(hfsmp);
4281 }
4282 } else {
4283 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4284 hfs_mark_volume_inconsistent(hfsmp);
4285 }
4286 }
55e303ae
A
4287 goto out;
4288 }
4289 /*
4290 * Invalidate the old alternate volume header.
4291 */
4292 bp = NULL;
91447636 4293 if (prev_alt_sector) {
593a1d5f
A
4294 if (buf_meta_bread(hfsmp->hfs_devvp,
4295 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4296 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
91447636
A
4297 journal_modify_block_start(hfsmp->jnl, bp);
4298
593a1d5f 4299 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
91447636 4300
2d21ac55 4301 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
91447636
A
4302 } else if (bp) {
4303 buf_brelse(bp);
4304 }
55e303ae 4305 }
2d21ac55 4306
0b4c1975
A
4307 /*
4308 * Update the metadata zone size based on current volume size
2d21ac55 4309 */
6d2010ae 4310 hfs_metadatazone_init(hfsmp, false);
2d21ac55
A
4311
4312 /*
4313 * Adjust the size of hfsmp->hfs_attrdata_vp
4314 */
4315 if (hfsmp->hfs_attrdata_vp) {
4316 struct cnode *attr_cp;
4317 struct filefork *attr_fp;
4318
4319 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4320 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4321 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4322
4323 attr_cp->c_blocks = newblkcnt;
4324 attr_fp->ff_blocks = newblkcnt;
4325 attr_fp->ff_extents[0].blockCount = newblkcnt;
4326 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4327 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4328 vnode_put(hfsmp->hfs_attrdata_vp);
4329 }
4330 }
4331
6d2010ae
A
4332 /*
4333 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4334 * locks in the middle of these operations like we do in the truncate case
4335 * where we have to relocate files, we can only update the red-black tree
4336 * if there were actual changes made to the bitmap. Also, we can't really scan the
4337 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4338 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4339 * not currently controlled by the tree.
4340 *
4341 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4342 */
6d2010ae
A
4343 if (error == 0) {
4344 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4345 }
316670eb
A
4346
4347 /* Release all locks and sync up journal content before
4348 * checking and extending, if required, the journal
4349 */
4350 if (lockflags) {
4351 hfs_systemfile_unlock(hfsmp, lockflags);
4352 lockflags = 0;
4353 }
4354 if (transaction_begun) {
4355 hfs_end_transaction(hfsmp);
4356 hfs_journal_flush(hfsmp, TRUE);
4357 transaction_begun = 0;
4358 }
4359
4360 /* Increase the journal size, if required. */
4361 error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
4362 if (error) {
4363 printf ("hfs_extendfs: Could not extend journal size\n");
4364 goto out_noalloc;
4365 }
4366
6d2010ae
A
4367 /* Log successful extending */
4368 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4369 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4370
55e303ae
A
4371out:
4372 if (error && fp) {
4373 /* Restore allocation fork. */
4374 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4375 VTOC(vp)->c_blocks = fp->ff_blocks;
6d2010ae 4376
55e303ae 4377 }
316670eb
A
4378
4379out_noalloc:
d1ecb069
A
4380 HFS_MOUNT_LOCK(hfsmp, TRUE);
4381 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
d1ecb069
A
4382 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4383 if (lockflags) {
4384 hfs_systemfile_unlock(hfsmp, lockflags);
4385 }
4386 if (transaction_begun) {
4387 hfs_end_transaction(hfsmp);
e2d2fc5c
A
4388 hfs_journal_flush(hfsmp, FALSE);
4389 /* Just to be sure, sync all data to the disk */
4390 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
d1ecb069 4391 }
91447636 4392
6d2010ae 4393 return MacToVFSError(error);
91447636
A
4394}
4395
4396#define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4397
4398/*
4399 * Truncate a file system (while still mounted).
4400 */
91447636 4401int
2d21ac55 4402hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
91447636 4403{
91447636
A
4404 struct buf *bp = NULL;
4405 u_int64_t oldsize;
4406 u_int32_t newblkcnt;
2d21ac55 4407 u_int32_t reclaimblks = 0;
91447636
A
4408 int lockflags = 0;
4409 int transaction_begun = 0;
b7266188 4410 Boolean updateFreeBlocks = false;
6d2010ae
A
4411 Boolean disable_sparse = false;
4412 int error = 0;
91447636 4413
6d2010ae 4414 lck_mtx_lock(&hfsmp->hfs_mutex);
0c530ab8 4415 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
6d2010ae 4416 lck_mtx_unlock(&hfsmp->hfs_mutex);
0c530ab8 4417 return (EALREADY);
91447636 4418 }
0c530ab8 4419 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
6d2010ae
A
4420 hfsmp->hfs_resize_blocksmoved = 0;
4421 hfsmp->hfs_resize_totalblocks = 0;
4422 hfsmp->hfs_resize_progress = 0;
4423 lck_mtx_unlock(&hfsmp->hfs_mutex);
0c530ab8 4424
91447636 4425 /*
0c530ab8 4426 * - Journaled HFS Plus volumes only.
91447636
A
4427 * - No embedded volumes.
4428 */
0c530ab8 4429 if ((hfsmp->jnl == NULL) ||
91447636
A
4430 (hfsmp->hfsPlusIOPosOffset != 0)) {
4431 error = EPERM;
4432 goto out;
4433 }
4434 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4435 newblkcnt = newsize / hfsmp->blockSize;
4436 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4437
b7266188
A
4438 if (hfs_resize_debug) {
4439 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4440 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4441 }
4442
91447636
A
4443 /* Make sure new size is valid. */
4444 if ((newsize < HFS_MIN_SIZE) ||
4445 (newsize >= oldsize) ||
593a1d5f
A
4446 (newsize % hfsmp->hfs_logical_block_size) ||
4447 (newsize % hfsmp->hfs_physical_block_size)) {
b7266188 4448 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
91447636
A
4449 error = EINVAL;
4450 goto out;
4451 }
6d2010ae
A
4452
4453 /*
4454 * Make sure that the file system has enough free blocks reclaim.
4455 *
4456 * Before resize, the disk is divided into four zones -
4457 * A. Allocated_Stationary - These are allocated blocks that exist
4458 * before the new end of disk. These blocks will not be
4459 * relocated or modified during resize.
4460 * B. Free_Stationary - These are free blocks that exist before the
4461 * new end of disk. These blocks can be used for any new
4462 * allocations during resize, including allocation for relocating
4463 * data from the area of disk being reclaimed.
4464 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4465 * beyond the new end of disk. These blocks need to be reclaimed
4466 * during resize by allocating equal number of blocks in Free
4467 * Stationary zone and copying the data.
4468 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4469 * new end of disk. Nothing special needs to be done to reclaim
4470 * them.
4471 *
4472 * Total number of blocks on the disk before resize:
4473 * ------------------------------------------------
4474 * Total Blocks = Allocated_Stationary + Free_Stationary +
4475 * Allocated_To-Reclaim + Free_To-Reclaim
4476 *
4477 * Total number of blocks that need to be reclaimed:
4478 * ------------------------------------------------
4479 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4480 *
4481 * Note that the check below also makes sure that we have enough space
4482 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4483 * Therefore we do not need to check total number of blocks to relocate
4484 * later in the code.
4485 *
4486 * The condition below gets converted to:
4487 *
4488 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4489 *
4490 * which is equivalent to:
4491 *
4492 * Allocated To-Reclaim >= Free Stationary
4493 */
4494 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4495 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4496 error = ENOSPC;
4497 goto out;
4498 }
4499
4500 /* Start with a clean journal. */
4501 hfs_journal_flush(hfsmp, TRUE);
2d21ac55 4502
91447636
A
4503 if (hfs_start_transaction(hfsmp) != 0) {
4504 error = EINVAL;
4505 goto out;
4506 }
4507 transaction_begun = 1;
6d2010ae
A
4508
4509 /* Take the bitmap lock to update the alloc limit field */
4510 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4511
91447636 4512 /*
2d21ac55
A
4513 * Prevent new allocations from using the part we're trying to truncate.
4514 *
4515 * NOTE: allocLimit is set to the allocation block number where the new
4516 * alternate volume header will be. That way there will be no files to
4517 * interfere with allocating the new alternate volume header, and no files
4518 * in the allocation blocks beyond (i.e. the blocks we're trying to
4519 * truncate away.
6d2010ae
A
4520 *
4521 * Also shrink the red-black tree if needed.
4522 */
4523 if (hfsmp->blockSize == 512) {
4524 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4525 }
4526 else {
4527 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4528 }
4529
4530 /* Sparse devices use first fit allocation which is not ideal
4531 * for volume resize which requires best fit allocation. If a
4532 * sparse device is being truncated, disable the sparse device
4533 * property temporarily for the duration of resize. Also reset
4534 * the free extent cache so that it is rebuilt as sorted by
4535 * totalBlocks instead of startBlock.
4536 *
4537 * Note that this will affect all allocations on the volume and
4538 * ideal fix would be just to modify resize-related allocations,
4539 * but it will result in complexity like handling of two free
4540 * extent caches sorted differently, etc. So we stick to this
4541 * solution for now.
2d21ac55 4542 */
b7266188 4543 HFS_MOUNT_LOCK(hfsmp, TRUE);
6d2010ae
A
4544 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4545 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4546 ResetVCBFreeExtCache(hfsmp);
4547 disable_sparse = true;
4548 }
4549
0b4c1975
A
4550 /*
4551 * Update the volume free block count to reflect the total number
4552 * of free blocks that will exist after a successful resize.
4553 * Relocation of extents will result in no net change in the total
4554 * free space on the disk. Therefore the code that allocates
4555 * space for new extent and deallocates the old extent explicitly
4556 * prevents updating the volume free block count. It will also
4557 * prevent false disk full error when the number of blocks in
4558 * an extent being relocated is more than the free blocks that
4559 * will exist after the volume is resized.
b7266188 4560 */
2d21ac55 4561 hfsmp->freeBlocks -= reclaimblks;
b7266188 4562 updateFreeBlocks = true;
6d2010ae
A
4563 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4564
4565 if (lockflags) {
4566 hfs_systemfile_unlock(hfsmp, lockflags);
4567 lockflags = 0;
4568 }
4569
0b4c1975 4570 /*
6d2010ae
A
4571 * Update the metadata zone size to match the new volume size,
4572 * and if it too less, metadata zone might be disabled.
0b4c1975 4573 */
6d2010ae 4574 hfs_metadatazone_init(hfsmp, false);
0b4c1975 4575
2d21ac55 4576 /*
6d2010ae
A
4577 * If some files have blocks at or beyond the location of the
4578 * new alternate volume header, recalculate free blocks and
4579 * reclaim blocks. Otherwise just update free blocks count.
4580 *
4581 * The current allocLimit is set to the location of new alternate
4582 * volume header, and reclaimblks are the total number of blocks
4583 * that need to be reclaimed. So the check below is really
4584 * ignoring the blocks allocated for old alternate volume header.
91447636 4585 */
2d21ac55 4586 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
91447636
A
4587 /*
4588 * hfs_reclaimspace will use separate transactions when
4589 * relocating files (so we don't overwhelm the journal).
4590 */
4591 hfs_end_transaction(hfsmp);
4592 transaction_begun = 0;
4593
4594 /* Attempt to reclaim some space. */
b7266188
A
4595 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4596 if (error != 0) {
4597 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
91447636
A
4598 error = ENOSPC;
4599 goto out;
4600 }
4601 if (hfs_start_transaction(hfsmp) != 0) {
4602 error = EINVAL;
4603 goto out;
4604 }
4605 transaction_begun = 1;
4606
4607 /* Check if we're clear now. */
b7266188
A
4608 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4609 if (error != 0) {
4610 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
0c530ab8 4611 error = EAGAIN; /* tell client to try again */
91447636
A
4612 goto out;
4613 }
6d2010ae
A
4614 }
4615
2d21ac55
A
4616 /*
4617 * Note: we take the attributes lock in case we have an attribute data vnode
4618 * which needs to change size.
4619 */
4620 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
91447636 4621
91447636 4622 /*
2d21ac55 4623 * Allocate last 1KB for alternate volume header.
91447636 4624 */
2d21ac55 4625 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
91447636 4626 if (error) {
2d21ac55 4627 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
91447636
A
4628 goto out;
4629 }
55e303ae 4630
6d2010ae
A
4631 /*
4632 * Mark the old alternate volume header as free.
4633 * We don't bother shrinking allocation bitmap file.
4634 */
4635 if (hfsmp->blockSize == 512)
4636 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4637 else
4638 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4639
91447636
A
4640 /*
4641 * Invalidate the existing alternate volume header.
0c530ab8 4642 *
2d21ac55 4643 * Don't include this in a transaction (don't call journal_modify_block)
0c530ab8 4644 * since this block will be outside of the truncated file system!
91447636
A
4645 */
4646 if (hfsmp->hfs_alt_id_sector) {
b7266188 4647 error = buf_meta_bread(hfsmp->hfs_devvp,
593a1d5f 4648 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
b7266188
A
4649 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4650 if (error == 0) {
593a1d5f 4651 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
0c530ab8 4652 (void) VNOP_BWRITE(bp);
b7266188
A
4653 } else {
4654 if (bp) {
4655 buf_brelse(bp);
4656 }
91447636
A
4657 }
4658 bp = NULL;
4659 }
4660
0c530ab8
A
4661 /* Log successful shrinking. */
4662 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4663 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4664
91447636
A
4665 /*
4666 * Adjust file system variables and flush them to disk.
4667 */
91447636 4668 hfsmp->totalBlocks = newblkcnt;
593a1d5f 4669 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
316670eb
A
4670 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
4671
4672 /*
4673 * Note that although the logical block size is updated here, it is only done for
4674 * the benefit of the partition management software. The logical block count change
4675 * has not yet actually been propagated to the disk device yet.
4676 */
4677
593a1d5f 4678 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
91447636
A
4679 MarkVCBDirty(hfsmp);
4680 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4681 if (error)
4682 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
6d2010ae 4683
2d21ac55
A
4684 /*
4685 * Adjust the size of hfsmp->hfs_attrdata_vp
4686 */
4687 if (hfsmp->hfs_attrdata_vp) {
4688 struct cnode *cp;
4689 struct filefork *fp;
4690
4691 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4692 cp = VTOC(hfsmp->hfs_attrdata_vp);
4693 fp = VTOF(hfsmp->hfs_attrdata_vp);
4694
4695 cp->c_blocks = newblkcnt;
4696 fp->ff_blocks = newblkcnt;
4697 fp->ff_extents[0].blockCount = newblkcnt;
4698 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4699 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4700 vnode_put(hfsmp->hfs_attrdata_vp);
4701 }
4702 }
4703
91447636 4704out:
6d2010ae
A
4705 /*
4706 * Update the allocLimit to acknowledge the last one or two blocks now.
4707 * Add it to the tree as well if necessary.
4708 */
4709 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4710
4711 HFS_MOUNT_LOCK(hfsmp, TRUE);
4712 if (disable_sparse == true) {
4713 /* Now that resize is completed, set the volume to be sparse
4714 * device again so that all further allocations will be first
4715 * fit instead of best fit. Reset free extent cache so that
4716 * it is rebuilt.
4717 */
4718 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4719 ResetVCBFreeExtCache(hfsmp);
4720 }
4721
4722 if (error && (updateFreeBlocks == true)) {
b7266188 4723 hfsmp->freeBlocks += reclaimblks;
6d2010ae
A
4724 }
4725
4726 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
2d21ac55 4727 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
6d2010ae 4728 }
2d21ac55 4729 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
6d2010ae
A
4730 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4731
0b4c1975
A
4732 /* On error, reset the metadata zone for original volume size */
4733 if (error && (updateFreeBlocks == true)) {
6d2010ae 4734 hfs_metadatazone_init(hfsmp, false);
0b4c1975 4735 }
2d21ac55 4736
91447636
A
4737 if (lockflags) {
4738 hfs_systemfile_unlock(hfsmp, lockflags);
4739 }
4740 if (transaction_begun) {
4741 hfs_end_transaction(hfsmp);
6d2010ae 4742 hfs_journal_flush(hfsmp, FALSE);
b7266188
A
4743 /* Just to be sure, sync all data to the disk */
4744 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
91447636 4745 }
0c530ab8 4746
6d2010ae 4747 return MacToVFSError(error);
55e303ae
A
4748}
4749
0c530ab8 4750
91447636 4751/*
2d21ac55
A
4752 * Invalidate the physical block numbers associated with buffer cache blocks
4753 * in the given extent of the given vnode.
91447636 4754 */
2d21ac55
A
4755struct hfs_inval_blk_no {
4756 daddr64_t sectorStart;
4757 daddr64_t sectorCount;
4758};
91447636 4759static int
2d21ac55 4760hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
91447636 4761{
2d21ac55
A
4762 daddr64_t blkno;
4763 struct hfs_inval_blk_no *args;
4764
4765 blkno = buf_blkno(bp);
4766 args = args_in;
4767
4768 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4769 buf_setblkno(bp, buf_lblkno(bp));
89b3af67 4770
2d21ac55
A
4771 return BUF_RETURNED;
4772}
4773static void
4774hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4775{
4776 struct hfs_inval_blk_no args;
4777 args.sectorStart = sectorStart;
4778 args.sectorCount = sectorCount;
4779
4780 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4781}
0c530ab8 4782
2d21ac55
A
4783
4784/*
4785 * Copy the contents of an extent to a new location. Also invalidates the
4786 * physical block number of any buffer cache block in the copied extent
4787 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4788 * determine the new physical block number).
316670eb
A
4789 *
4790 * At this point, for regular files, we hold the truncate lock exclusive
4791 * and the cnode lock exclusive.
2d21ac55
A
4792 */
4793static int
4794hfs_copy_extent(
4795 struct hfsmount *hfsmp,
4796 struct vnode *vp, /* The file whose extent is being copied. */
4797 u_int32_t oldStart, /* The start of the source extent. */
4798 u_int32_t newStart, /* The start of the destination extent. */
4799 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4800 vfs_context_t context)
4801{
4802 int err = 0;
4803 size_t bufferSize;
4804 void *buffer = NULL;
4805 struct vfsioattr ioattr;
4806 buf_t bp = NULL;
4807 off_t resid;
4808 size_t ioSize;
4809 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4810 daddr64_t srcSector, destSector;
593a1d5f 4811 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
6d2010ae
A
4812#if CONFIG_PROTECT
4813 int cpenabled = 0;
4814#endif
2d21ac55
A
4815
4816 /*
4817 * Sanity check that we have locked the vnode of the file we're copying.
4818 *
4819 * But since hfs_systemfile_lock() doesn't actually take the lock on
4820 * the allocation file if a journal is active, ignore the check if the
4821 * file being copied is the allocation file.
4822 */
4823 struct cnode *cp = VTOC(vp);
4824 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4825 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4826
6d2010ae 4827#if CONFIG_PROTECT
316670eb
A
4828 /*
4829 * Prepare the CP blob and get it ready for use, if necessary.
4830 *
4831 * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
4832 * because they are implicitly protected via the media key on iOS. As such, they
4833 * must not be relocated except with the media key. So it is OK to not pass down
4834 * a special cpentry to the IOMedia/LwVM code for handling.
4835 */
4836 if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
6d2010ae 4837 int cp_err = 0;
316670eb
A
4838 /*
4839 * Ideally, the file whose extents we are about to manipulate is using the
4840 * newer offset-based IVs so that we can manipulate it regardless of the
4841 * current lock state. However, we must maintain support for older-style
4842 * EAs.
4843 *
4844 * For the older EA case, the IV was tied to the device LBA for file content.
4845 * This means that encrypted data cannot be moved from one location to another
4846 * in the filesystem without garbling the IV data. As a result, we need to
4847 * access the file's plaintext because we cannot do our AES-symmetry trick
4848 * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate)
4849 * to make forward progress. If the keys are unavailable then we will
4850 * simply stop the resize in its tracks here since we cannot move
4851 * this extent at this time.
4852 */
4853 if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
4854 cp_err = cp_handle_relocate(cp, hfsmp);
4855 }
4856
6d2010ae 4857 if (cp_err) {
316670eb 4858 printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
6d2010ae
A
4859 return cp_err;
4860 }
316670eb
A
4861
4862 cpenabled = 1;
6d2010ae
A
4863 }
4864#endif
4865
316670eb 4866
2d21ac55
A
4867 /*
4868 * Determine the I/O size to use
4869 *
4870 * NOTE: Many external drives will result in an ioSize of 128KB.
4871 * TODO: Should we use a larger buffer, doing several consecutive
4872 * reads, then several consecutive writes?
4873 */
4874 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4875 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4876 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4877 return ENOMEM;
4878
4879 /* Get a buffer for doing the I/O */
4880 bp = buf_alloc(hfsmp->hfs_devvp);
4881 buf_setdataptr(bp, (uintptr_t)buffer);
4882
4883 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
593a1d5f
A
4884 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4885 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
2d21ac55 4886 while (resid > 0) {
b0d623f7 4887 ioSize = MIN(bufferSize, (size_t) resid);
593a1d5f 4888 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
2d21ac55
A
4889
4890 /* Prepare the buffer for reading */
4891 buf_reset(bp, B_READ);
4892 buf_setsize(bp, ioSize);
4893 buf_setcount(bp, ioSize);
4894 buf_setblkno(bp, srcSector);
4895 buf_setlblkno(bp, srcSector);
6d2010ae 4896
316670eb
A
4897 /*
4898 * Note that because this is an I/O to the device vp
4899 * it is correct to have lblkno and blkno both point to the
4900 * start sector being read from. If it were being issued against the
4901 * underlying file then that would be different.
4902 */
4903
4904 /* Attach the new CP blob to the buffer if needed */
6d2010ae
A
4905#if CONFIG_PROTECT
4906 if (cpenabled) {
316670eb
A
4907 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
4908 /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
4909 cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
4910 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
4911 }
4912 else {
4913 /*
4914 * Use the cnode's cp key. This file is tied to the
4915 * LBAs of the physical blocks that it occupies.
4916 */
4917 buf_setcpaddr (bp, cp->c_cpentry);
4918 }
4919
4920 /* Initialize the content protection file offset to start at 0 */
4921 buf_setcpoff (bp, 0);
6d2010ae
A
4922 }
4923#endif
4924
2d21ac55
A
4925 /* Do the read */
4926 err = VNOP_STRATEGY(bp);
4927 if (!err)
4928 err = buf_biowait(bp);
4929 if (err) {
316670eb
A
4930#if CONFIG_PROTECT
4931 /* Turn the flag off in error cases. */
4932 if (cpenabled) {
4933 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
4934 }
4935#endif
2d21ac55
A
4936 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4937 break;
91447636 4938 }
2d21ac55
A
4939
4940 /* Prepare the buffer for writing */
4941 buf_reset(bp, B_WRITE);
4942 buf_setsize(bp, ioSize);
4943 buf_setcount(bp, ioSize);
4944 buf_setblkno(bp, destSector);
4945 buf_setlblkno(bp, destSector);
b7266188 4946 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
2d21ac55 4947 buf_markfua(bp);
6d2010ae
A
4948
4949#if CONFIG_PROTECT
316670eb 4950 /* Attach the CP to the buffer if needed */
6d2010ae 4951 if (cpenabled) {
316670eb
A
4952 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
4953 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
4954 }
4955 else {
4956 /*
4957 * Use the cnode's CP key. This file is still tied
4958 * to the LBAs of the physical blocks that it occupies.
4959 */
4960 buf_setcpaddr (bp, cp->c_cpentry);
4961 }
4962 /*
4963 * The last STRATEGY call may have updated the cp file offset behind our
4964 * back, so we cannot trust it. Re-initialize the content protection
4965 * file offset back to 0 before initiating the write portion of this I/O.
4966 */
4967 buf_setcpoff (bp, 0);
4968 }
6d2010ae 4969#endif
2d21ac55
A
4970
4971 /* Do the write */
4972 vnode_startwrite(hfsmp->hfs_devvp);
4973 err = VNOP_STRATEGY(bp);
316670eb 4974 if (!err) {
2d21ac55 4975 err = buf_biowait(bp);
316670eb
A
4976 }
4977#if CONFIG_PROTECT
4978 /* Turn the flag off regardless once the strategy call finishes. */
4979 if (cpenabled) {
4980 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
4981 }
4982#endif
2d21ac55
A
4983 if (err) {
4984 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4985 break;
0c530ab8 4986 }
2d21ac55
A
4987
4988 resid -= ioSize;
4989 srcSector += ioSizeSectors;
4990 destSector += ioSizeSectors;
4991 }
4992 if (bp)
4993 buf_free(bp);
4994 if (buffer)
4995 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4996
4997 /* Make sure all writes have been flushed to disk. */
b7266188 4998 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
2d21ac55
A
4999 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5000 if (err) {
5001 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
5002 err = 0; /* Don't fail the copy. */
5003 }
5004 }
5005
5006 if (!err)
5007 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
5008
5009 return err;
5010}
5011
5012
6d2010ae
A
5013/* Structure to store state of reclaiming extents from a
5014 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
5015 * initializes the values in this structure which are then
5016 * used by code that reclaims and splits the extents.
5017 */
5018struct hfs_reclaim_extent_info {
5019 struct vnode *vp;
5020 u_int32_t fileID;
5021 u_int8_t forkType;
5022 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
5023 u_int8_t is_sysfile; /* Extent belongs to system file */
5024 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
5025 u_int8_t extent_index;
5026 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
5027 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
5028 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
5029 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
5030 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
5031 union record {
5032 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
5033 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
5034 } record;
5035 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
5036 * For catalog extent record, points to the correct
5037 * extent information in filefork. For overflow extent
5038 * record, or xattr record, points to extent record
5039 * in the structure above
5040 */
5041 struct cat_desc *dirlink_desc;
5042 struct cat_attr *dirlink_attr;
5043 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
5044 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
5045 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
5046 * use it for writing updated extent record
5047 */
5048 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
5049 u_int16_t recordlen;
5050 int overflow_count; /* For debugging, counter for overflow extent record */
5051 FCB *fcb; /* Pointer to the current btree being traversed */
5052};
b7266188 5053
6d2010ae
A
5054/*
5055 * Split the current extent into two extents, with first extent
5056 * to contain given number of allocation blocks. Splitting of
5057 * extent creates one new extent entry which can result in
5058 * shifting of many entries through all the extent records of a
5059 * file, and/or creating a new extent record in the overflow
5060 * extent btree.
2d21ac55 5061 *
6d2010ae
A
5062 * Example:
5063 * The diagram below represents two consecutive extent records,
5064 * for simplicity, lets call them record X and X+1 respectively.
5065 * Interesting extent entries have been denoted by letters.
5066 * If the letter is unchanged before and after split, it means
5067 * that the extent entry was not modified during the split.
5068 * A '.' means that the entry remains unchanged after the split
5069 * and is not relevant for our example. A '0' means that the
5070 * extent entry is empty.
2d21ac55 5071 *
6d2010ae
A
5072 * If there isn't sufficient contiguous free space to relocate
5073 * an extent (extent "C" below), we will have to break the one
5074 * extent into multiple smaller extents, and relocate each of
5075 * the smaller extents individually. The way we do this is by
5076 * finding the largest contiguous free space that is currently
5077 * available (N allocation blocks), and then convert extent "C"
5078 * into two extents, C1 and C2, that occupy exactly the same
5079 * allocation blocks as extent C. Extent C1 is the first
5080 * N allocation blocks of extent C, and extent C2 is the remainder
5081 * of extent C. Then we can relocate extent C1 since we know
5082 * we have enough contiguous free space to relocate it in its
5083 * entirety. We then repeat the process starting with extent C2.
5084 *
5085 * In record X, only the entries following entry C are shifted, and
5086 * the original entry C is replaced with two entries C1 and C2 which
5087 * are actually two extent entries for contiguous allocation blocks.
5088 *
5089 * Note that the entry E from record X is shifted into record X+1 as
5090 * the new first entry. Since the first entry of record X+1 is updated,
5091 * the FABN will also get updated with the blockCount of entry E.
5092 * This also results in shifting of all extent entries in record X+1.
5093 * Note that the number of empty entries after the split has been
5094 * changed from 3 to 2.
5095 *
5096 * Before:
5097 * record X record X+1
5098 * ---------------------===--------- ---------------------------------
5099 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
5100 * ---------------------===--------- ---------------------------------
2d21ac55 5101 *
6d2010ae
A
5102 * After:
5103 * ---------------------=======----- ---------------------------------
5104 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
5105 * ---------------------=======----- ---------------------------------
b7266188 5106 *
6d2010ae
A
5107 * C1.startBlock = C.startBlock
5108 * C1.blockCount = N
5109 *
5110 * C2.startBlock = C.startBlock + N
5111 * C2.blockCount = C.blockCount - N
5112 *
5113 * FABN = old FABN - E.blockCount
5114 *
5115 * Inputs:
5116 * extent_info - This is the structure that contains state about
5117 * the current file, extent, and extent record that
5118 * is being relocated. This structure is shared
5119 * among code that traverses through all the extents
5120 * of the file, code that relocates extents, and
5121 * code that splits the extent.
5122 * Output:
5123 * Zero on success, non-zero on failure.
2d21ac55 5124 */
6d2010ae
A
5125static int
5126hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
2d21ac55 5127{
6d2010ae
A
5128 int error = 0;
5129 int index = extent_info->extent_index;
2d21ac55 5130 int i;
13f56ec4 5131 HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
6d2010ae
A
5132 HFSPlusExtentDescriptor last_extent;
5133 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
5134 HFSPlusExtentRecord *extents_rec = NULL;
5135 HFSPlusExtentKey *extents_key = NULL;
5136 HFSPlusAttrRecord *xattr_rec = NULL;
5137 HFSPlusAttrKey *xattr_key = NULL;
5138 struct BTreeIterator iterator;
5139 struct FSBufferDescriptor btdata;
5140 uint16_t reclen;
5141 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
5142 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
5143 Boolean create_record = false;
5144 Boolean is_xattr;
13f56ec4 5145 struct cnode *cp;
6d2010ae
A
5146
5147 is_xattr = extent_info->is_xattr;
5148 extents = extent_info->extents;
13f56ec4 5149 cp = VTOC(extent_info->vp);
2d21ac55 5150
6d2010ae
A
5151 if (hfs_resize_debug) {
5152 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
5153 }
b7266188 5154
13f56ec4
A
5155 /* Extents overflow btree can not have more than 8 extents.
5156 * No split allowed if the 8th extent is already used.
5157 */
5158 if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
5159 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
5160 error = ENOSPC;
5161 goto out;
5162 }
5163
6d2010ae
A
5164 /* Determine the starting allocation block number for the following
5165 * overflow extent record, if any, before the current record
5166 * gets modified.
b7266188 5167 */
6d2010ae
A
5168 read_recStartBlock = extent_info->recStartBlock;
5169 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5170 if (extents[i].blockCount == 0) {
5171 break;
5172 }
5173 read_recStartBlock += extents[i].blockCount;
b7266188 5174 }
6d2010ae
A
5175
5176 /* Shift and split */
5177 if (index == kHFSPlusExtentDensity-1) {
5178 /* The new extent created after split will go into following overflow extent record */
5179 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
5180 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
5181
5182 /* Last extent in the record will be split, so nothing to shift */
5183 } else {
5184 /* Splitting of extents can result in at most of one
5185 * extent entry to be shifted into following overflow extent
5186 * record. So, store the last extent entry for later.
5187 */
5188 shift_extent = extents[kHFSPlusExtentDensity-1];
13f56ec4
A
5189 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
5190 printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
5191 }
6d2010ae
A
5192
5193 /* Start shifting extent information from the end of the extent
5194 * record to the index where we want to insert the new extent.
5195 * Note that kHFSPlusExtentDensity-1 is already saved above, and
5196 * does not need to be shifted. The extent entry that is being
5197 * split does not get shifted.
5198 */
5199 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
5200 if (hfs_resize_debug) {
5201 if (extents[i].blockCount) {
5202 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
5203 }
5204 }
5205 extents[i+1] = extents[i];
b7266188 5206 }
6d2010ae 5207 }
b7266188 5208
6d2010ae
A
5209 if (index == kHFSPlusExtentDensity-1) {
5210 /* The second half of the extent being split will be the overflow
5211 * entry that will go into following overflow extent record. The
5212 * value has been stored in 'shift_extent' above, so there is
5213 * nothing to be done here.
5214 */
5215 } else {
5216 /* Update the values in the second half of the extent being split
5217 * before updating the first half of the split. Note that the
5218 * extent to split or first half of the split is at index 'index'
5219 * and a new extent or second half of the split will be inserted at
5220 * 'index+1' or into following overflow extent record.
5221 */
5222 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
5223 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
5224 }
5225 /* Update the extent being split, only the block count will change */
5226 extents[index].blockCount = newBlockCount;
5227
5228 if (hfs_resize_debug) {
5229 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
5230 if (index != kHFSPlusExtentDensity-1) {
5231 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
5232 } else {
5233 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
b7266188
A
5234 }
5235 }
5236
13f56ec4
A
5237 /* Write out information about the newly split extent to the disk */
5238 if (extent_info->catalog_fp) {
5239 /* (extent_info->catalog_fp != NULL) means the newly split
5240 * extent exists in the catalog record. This means that
5241 * the cnode was updated. Therefore, to write out the changes,
5242 * mark the cnode as modified. We cannot call hfs_update()
5243 * in this function because the caller hfs_reclaim_extent()
5244 * is holding the catalog lock currently.
5245 */
5246 cp->c_flag |= C_MODIFIED;
5247 } else {
5248 /* The newly split extent is for large EAs or is in overflow
5249 * extent record, so update it directly in the btree using the
5250 * iterator information from the shared extent_info structure
5251 */
6d2010ae
A
5252 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5253 &(extent_info->btdata), extent_info->recordlen);
5254 if (error) {
5255 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5256 goto out;
5257 }
b7266188 5258 }
6d2010ae
A
5259
5260 /* No extent entry to be shifted into another extent overflow record */
5261 if (shift_extent.blockCount == 0) {
5262 if (hfs_resize_debug) {
5263 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5264 }
5265 error = 0;
5266 goto out;
b7266188
A
5267 }
5268
6d2010ae 5269 /* The overflow extent entry has to be shifted into an extent
13f56ec4
A
5270 * overflow record. This means that we might have to shift
5271 * extent entries from all subsequent overflow records by one.
5272 * We start iteration from the first record to the last record,
6d2010ae 5273 * and shift the extent entry from one record to another.
13f56ec4
A
5274 * We might have to create a new extent record for the last
5275 * extent entry for the file.
6d2010ae 5276 */
2d21ac55 5277
6d2010ae
A
5278 /* Initialize iterator to search the next record */
5279 bzero(&iterator, sizeof(iterator));
5280 if (is_xattr) {
13f56ec4 5281 /* Copy the key from the iterator that was used to update the modified attribute record. */
6d2010ae
A
5282 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5283 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5284 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5285
5286 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5287 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5288 if (xattr_rec == NULL) {
5289 error = ENOMEM;
5290 goto out;
b7266188 5291 }
6d2010ae
A
5292 btdata.bufferAddress = xattr_rec;
5293 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5294 btdata.itemCount = 1;
5295 extents = xattr_rec->overflowExtents.extents;
5296 } else {
13f56ec4 5297 /* Initialize the extent key for the current file */
6d2010ae
A
5298 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5299 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5300 extents_key->forkType = extent_info->forkType;
5301 extents_key->fileID = extent_info->fileID;
5302 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5303
5304 MALLOC(extents_rec, HFSPlusExtentRecord *,
5305 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5306 if (extents_rec == NULL) {
5307 error = ENOMEM;
5308 goto out;
5309 }
5310 btdata.bufferAddress = extents_rec;
5311 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5312 btdata.itemCount = 1;
5313 extents = extents_rec[0];
0c530ab8 5314 }
2d21ac55 5315
13f56ec4
A
5316 /* The overflow extent entry has to be shifted into an extent
5317 * overflow record. This means that we might have to shift
5318 * extent entries from all subsequent overflow records by one.
5319 * We start iteration from the first record to the last record,
5320 * examine one extent record in each iteration and shift one
5321 * extent entry from one record to another. We might have to
5322 * create a new extent record for the last extent entry for the
5323 * file.
5324 *
5325 * If shift_extent.blockCount is non-zero, it means that there is
5326 * an extent entry that needs to be shifted into the next
5327 * overflow extent record. We keep on going till there are no such
5328 * entries left to be shifted. This will also change the starting
5329 * allocation block number of the extent record which is part of
5330 * the key for the extent record in each iteration. Note that
5331 * because the extent record key is changing while we are searching,
5332 * the record can not be updated directly, instead it has to be
5333 * deleted and inserted again.
6d2010ae
A
5334 */
5335 while (shift_extent.blockCount) {
5336 if (hfs_resize_debug) {
13f56ec4 5337 printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
6d2010ae
A
5338 }
5339
13f56ec4
A
5340 /* Search if there is any existing overflow extent record
5341 * that matches the current file and the logical start block
5342 * number.
5343 *
6d2010ae
A
5344 * For this, the logical start block number in the key is
5345 * the value calculated based on the logical start block
5346 * number of the current extent record and the total number
5347 * of blocks existing in the current extent record.
5348 */
5349 if (is_xattr) {
5350 xattr_key->startBlock = read_recStartBlock;
5351 } else {
5352 extents_key->startBlock = read_recStartBlock;
5353 }
5354 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5355 if (error) {
5356 if (error != btNotFound) {
5357 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5358 goto out;
2d21ac55 5359 }
13f56ec4
A
5360 /* No matching record was found, so create a new extent record.
5361 * Note: Since no record was found, we can't rely on the
5362 * btree key in the iterator any longer. This will be initialized
5363 * later before we insert the record.
5364 */
6d2010ae
A
5365 create_record = true;
5366 }
5367
5368 /* The extra extent entry from the previous record is being inserted
5369 * as the first entry in the current extent record. This will change
5370 * the file allocation block number (FABN) of the current extent
5371 * record, which is the startBlock value from the extent record key.
5372 * Since one extra entry is being inserted in the record, the new
5373 * FABN for the record will less than old FABN by the number of blocks
5374 * in the new extent entry being inserted at the start. We have to
5375 * do this before we update read_recStartBlock to point at the
5376 * startBlock of the following record.
5377 */
5378 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5379 if (hfs_resize_debug) {
5380 if (create_record) {
5381 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
2d21ac55 5382 }
6d2010ae 5383 }
b7266188 5384
6d2010ae
A
5385 /* Now update the read_recStartBlock to account for total number
5386 * of blocks in this extent record. It will now point to the
5387 * starting allocation block number for the next extent record.
5388 */
5389 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5390 if (extents[i].blockCount == 0) {
5391 break;
2d21ac55 5392 }
6d2010ae
A
5393 read_recStartBlock += extents[i].blockCount;
5394 }
b7266188 5395
6d2010ae
A
5396 if (create_record == true) {
5397 /* Initialize new record content with only one extent entry */
5398 bzero(extents, sizeof(HFSPlusExtentRecord));
5399 /* The new record will contain only one extent entry */
5400 extents[0] = shift_extent;
5401 /* There are no more overflow extents to be shifted */
5402 shift_extent.startBlock = shift_extent.blockCount = 0;
5403
5404 if (is_xattr) {
13f56ec4
A
5405 /* BTSearchRecord above returned btNotFound,
5406 * but since the attribute btree is never empty
5407 * if we are trying to insert new overflow
5408 * record for the xattrs, the extents_key will
5409 * contain correct data. So we don't need to
5410 * re-initialize it again like below.
5411 */
5412
5413 /* Initialize the new xattr record */
6d2010ae
A
5414 xattr_rec->recordType = kHFSPlusAttrExtents;
5415 xattr_rec->overflowExtents.reserved = 0;
5416 reclen = sizeof(HFSPlusAttrExtents);
5417 } else {
13f56ec4
A
5418 /* BTSearchRecord above returned btNotFound,
5419 * which means that extents_key content might
5420 * not correspond to the record that we are
5421 * trying to create, especially when the extents
5422 * overflow btree is empty. So we reinitialize
5423 * the extents_key again always.
5424 */
5425 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5426 extents_key->forkType = extent_info->forkType;
5427 extents_key->fileID = extent_info->fileID;
5428
5429 /* Initialize the new extent record */
6d2010ae 5430 reclen = sizeof(HFSPlusExtentRecord);
2d21ac55 5431 }
6d2010ae
A
5432 } else {
5433 /* The overflow extent entry from previous record will be
5434 * the first entry in this extent record. If the last
5435 * extent entry in this record is valid, it will be shifted
5436 * into the following extent record as its first entry. So
5437 * save the last entry before shifting entries in current
5438 * record.
5439 */
5440 last_extent = extents[kHFSPlusExtentDensity-1];
5441
5442 /* Shift all entries by one index towards the end */
5443 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5444 extents[i+1] = extents[i];
b7266188
A
5445 }
5446
6d2010ae
A
5447 /* Overflow extent entry saved from previous record
5448 * is now the first entry in the current record.
5449 */
5450 extents[0] = shift_extent;
5451
b7266188 5452 if (hfs_resize_debug) {
6d2010ae 5453 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
2d21ac55 5454 }
2d21ac55 5455
6d2010ae
A
5456 /* The last entry from current record will be the
5457 * overflow entry which will be the first entry for
5458 * the following extent record.
5459 */
5460 shift_extent = last_extent;
2d21ac55 5461
6d2010ae
A
5462 /* Since the key->startBlock is being changed for this record,
5463 * it should be deleted and inserted with the new key.
5464 */
5465 error = BTDeleteRecord(extent_info->fcb, &iterator);
5466 if (error) {
5467 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5468 goto out;
2d21ac55 5469 }
b7266188 5470 if (hfs_resize_debug) {
6d2010ae 5471 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
b7266188 5472 }
6d2010ae 5473 }
b7266188 5474
6d2010ae
A
5475 /* Insert the newly created or modified extent record */
5476 bzero(&iterator.hint, sizeof(iterator.hint));
5477 if (is_xattr) {
5478 xattr_key->startBlock = write_recStartBlock;
5479 } else {
5480 extents_key->startBlock = write_recStartBlock;
5481 }
5482 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5483 if (error) {
5484 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5485 goto out;
5486 }
5487 if (hfs_resize_debug) {
5488 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5489 }
5490 }
5491 BTFlushPath(extent_info->fcb);
5492out:
5493 if (extents_rec) {
5494 FREE (extents_rec, M_TEMP);
5495 }
5496 if (xattr_rec) {
5497 FREE (xattr_rec, M_TEMP);
5498 }
5499 return error;
5500}
5501
5502
5503/*
5504 * Relocate an extent if it lies beyond the expected end of volume.
5505 *
5506 * This function is called for every extent of the file being relocated.
5507 * It allocates space for relocation, copies the data, deallocates
5508 * the old extent, and update corresponding on-disk extent. If the function
5509 * does not find contiguous space to relocate an extent, it splits the
5510 * extent in smaller size to be able to relocate it out of the area of
5511 * disk being reclaimed. As an optimization, if an extent lies partially
5512 * in the area of the disk being reclaimed, it is split so that we only
5513 * have to relocate the area that was overlapping with the area of disk
5514 * being reclaimed.
5515 *
5516 * Note that every extent is relocated in its own transaction so that
5517 * they do not overwhelm the journal. This function handles the extent
5518 * record that exists in the catalog record, extent record from overflow
5519 * extents btree, and extents for large EAs.
5520 *
5521 * Inputs:
5522 * extent_info - This is the structure that contains state about
5523 * the current file, extent, and extent record that
5524 * is being relocated. This structure is shared
5525 * among code that traverses through all the extents
5526 * of the file, code that relocates extents, and
5527 * code that splits the extent.
5528 */
5529static int
5530hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5531{
5532 int error = 0;
5533 int index;
5534 struct cnode *cp;
5535 u_int32_t oldStartBlock;
5536 u_int32_t oldBlockCount;
5537 u_int32_t newStartBlock;
5538 u_int32_t newBlockCount;
13f56ec4
A
5539 u_int32_t roundedBlockCount;
5540 uint16_t node_size;
5541 uint32_t remainder_blocks;
6d2010ae
A
5542 u_int32_t alloc_flags;
5543 int blocks_allocated = false;
5544
5545 index = extent_info->extent_index;
5546 cp = VTOC(extent_info->vp);
5547
5548 oldStartBlock = extent_info->extents[index].startBlock;
5549 oldBlockCount = extent_info->extents[index].blockCount;
5550
5551 if (0 && hfs_resize_debug) {
5552 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5553 }
5554
13f56ec4
A
5555 /* If the current extent lies completely within allocLimit,
5556 * it does not require any relocation.
5557 */
6d2010ae
A
5558 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5559 extent_info->cur_blockCount += oldBlockCount;
5560 return error;
5561 }
5562
5563 /* Every extent should be relocated in its own transaction
5564 * to make sure that we don't overflow the journal buffer.
5565 */
5566 error = hfs_start_transaction(hfsmp);
5567 if (error) {
5568 return error;
5569 }
5570 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5571
5572 /* Check if the extent lies partially in the area to reclaim,
5573 * i.e. it starts before allocLimit and ends beyond allocLimit.
5574 * We have already skipped extents that lie completely within
5575 * allocLimit in the check above, so we only check for the
5576 * startBlock. If it lies partially, split it so that we
5577 * only relocate part of the extent.
5578 */
5579 if (oldStartBlock < allocLimit) {
5580 newBlockCount = allocLimit - oldStartBlock;
13f56ec4
A
5581
5582 /* If the extent belongs to a btree, check and trim
5583 * it to be multiple of the node size.
5584 */
5585 if (extent_info->is_sysfile) {
5586 node_size = get_btree_nodesize(extent_info->vp);
5587 /* If the btree node size is less than the block size,
5588 * splitting this extent will not split a node across
5589 * different extents. So we only check and trim if
5590 * node size is more than the allocation block size.
5591 */
5592 if (node_size > hfsmp->blockSize) {
5593 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5594 if (remainder_blocks) {
5595 newBlockCount -= remainder_blocks;
5596 if (hfs_resize_debug) {
5597 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5598 }
5599 }
5600 }
5601 }
5602
5603 if (hfs_resize_debug) {
5604 int idx = extent_info->extent_index;
5605 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5606 }
5607
5608 /* Split the extents into two parts --- the first extent lies
5609 * completely within allocLimit and therefore does not require
5610 * relocation. The second extent will require relocation which
5611 * will be handled when the caller calls this function again
5612 * for the next extent.
5613 */
6d2010ae
A
5614 error = hfs_split_extent(extent_info, newBlockCount);
5615 if (error == 0) {
13f56ec4 5616 /* Split success, no relocation required */
6d2010ae
A
5617 goto out;
5618 }
13f56ec4
A
5619 /* Split failed, so try to relocate entire extent */
5620 if (hfs_resize_debug) {
5621 printf ("hfs_reclaim_extent: Split straddling extent failed, reclocate full extent\n");
5622 }
6d2010ae
A
5623 }
5624
13f56ec4
A
5625 /* At this point, the current extent requires relocation.
5626 * We will try to allocate space equal to the size of the extent
5627 * being relocated first to try to relocate it without splitting.
5628 * If the allocation fails, we will try to allocate contiguous
5629 * blocks out of metadata zone. If that allocation also fails,
5630 * then we will take a whatever contiguous block run is returned
5631 * by the allocation, split the extent into two parts, and then
5632 * relocate the first splitted extent.
5633 */
6d2010ae
A
5634 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5635 if (extent_info->is_sysfile) {
5636 alloc_flags |= HFS_ALLOC_METAZONE;
5637 }
5638
5639 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5640 &newStartBlock, &newBlockCount);
5641 if ((extent_info->is_sysfile == false) &&
5642 ((error == dskFulErr) || (error == ENOSPC))) {
5643 /* For non-system files, try reallocating space in metadata zone */
5644 alloc_flags |= HFS_ALLOC_METAZONE;
5645 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5646 alloc_flags, &newStartBlock, &newBlockCount);
5647 }
5648 if ((error == dskFulErr) || (error == ENOSPC)) {
5649 /* We did not find desired contiguous space for this extent.
5650 * So try to allocate the maximum contiguous space available.
5651 */
5652 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5653
5654 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5655 alloc_flags, &newStartBlock, &newBlockCount);
5656 if (error) {
5657 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5658 goto out;
5659 }
5660 blocks_allocated = true;
5661
13f56ec4
A
5662 /* The number of blocks allocated is less than the requested
5663 * number of blocks. For btree extents, check and trim the
5664 * extent to be multiple of the node size.
5665 */
5666 if (extent_info->is_sysfile) {
5667 node_size = get_btree_nodesize(extent_info->vp);
5668 if (node_size > hfsmp->blockSize) {
5669 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5670 if (remainder_blocks) {
5671 roundedBlockCount = newBlockCount - remainder_blocks;
5672 /* Free tail-end blocks of the newly allocated extent */
5673 BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
5674 newBlockCount - roundedBlockCount,
5675 HFS_ALLOC_SKIPFREEBLKS);
5676 newBlockCount = roundedBlockCount;
5677 if (hfs_resize_debug) {
5678 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5679 }
5680 if (newBlockCount == 0) {
5681 printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
5682 error = ENOSPC;
5683 goto out;
5684 }
5685 }
5686 }
5687 }
5688
5689 /* The number of blocks allocated is less than the number of
5690 * blocks requested, so split this extent --- the first extent
5691 * will be relocated as part of this function call and the caller
5692 * will handle relocating the second extent by calling this
5693 * function again for the second extent.
5694 */
6d2010ae
A
5695 error = hfs_split_extent(extent_info, newBlockCount);
5696 if (error) {
5697 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5698 goto out;
5699 }
5700 oldBlockCount = newBlockCount;
5701 }
5702 if (error) {
5703 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5704 goto out;
5705 }
5706 blocks_allocated = true;
5707
5708 /* Copy data from old location to new location */
5709 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5710 newStartBlock, newBlockCount, context);
5711 if (error) {
5712 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5713 goto out;
5714 }
5715
5716 /* Update the extent record with the new start block information */
5717 extent_info->extents[index].startBlock = newStartBlock;
5718
5719 /* Sync the content back to the disk */
5720 if (extent_info->catalog_fp) {
5721 /* Update the extents in catalog record */
5722 if (extent_info->is_dirlink) {
5723 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5724 extent_info->dirlink_desc, extent_info->dirlink_attr,
5725 &(extent_info->dirlink_fork->ff_data));
5726 } else {
5727 cp->c_flag |= C_MODIFIED;
5728 /* If this is a system file, sync volume headers on disk */
5729 if (extent_info->is_sysfile) {
5730 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2d21ac55
A
5731 }
5732 }
6d2010ae
A
5733 } else {
5734 /* Replace record for extents overflow or extents-based xattrs */
5735 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5736 &(extent_info->btdata), extent_info->recordlen);
0c530ab8 5737 }
6d2010ae
A
5738 if (error) {
5739 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5740 goto out;
5741 }
5742
5743 /* Deallocate the old extent */
5744 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5745 if (error) {
5746 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5747 goto out;
2d21ac55 5748 }
6d2010ae 5749 extent_info->blocks_relocated += newBlockCount;
2d21ac55 5750
6d2010ae
A
5751 if (hfs_resize_debug) {
5752 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5753 }
b7266188 5754
6d2010ae
A
5755out:
5756 if (error != 0) {
5757 if (blocks_allocated == true) {
5758 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5759 }
5760 } else {
5761 /* On success, increment the total allocation blocks processed */
5762 extent_info->cur_blockCount += newBlockCount;
b7266188
A
5763 }
5764
6d2010ae 5765 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
b7266188 5766
6d2010ae
A
5767 /* For a non-system file, if an extent entry from catalog record
5768 * was modified, sync the in-memory changes to the catalog record
5769 * on disk before ending the transaction.
5770 */
13f56ec4
A
5771 if ((extent_info->catalog_fp) &&
5772 (extent_info->is_sysfile == false)) {
6d2010ae
A
5773 (void) hfs_update(extent_info->vp, MNT_WAIT);
5774 }
5775
5776 hfs_end_transaction(hfsmp);
5777
5778 return error;
5779}
5780
5781/* Report intermediate progress during volume resize */
5782static void
5783hfs_truncatefs_progress(struct hfsmount *hfsmp)
5784{
316670eb 5785 u_int32_t cur_progress = 0;
6d2010ae
A
5786
5787 hfs_resize_progress(hfsmp, &cur_progress);
5788 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5789 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5790 hfsmp->hfs_resize_progress = cur_progress;
5791 }
5792 return;
5793}
5794
5795/*
5796 * Reclaim space at the end of a volume for given file and forktype.
5797 *
5798 * This routine attempts to move any extent which contains allocation blocks
5799 * at or after "allocLimit." A separate transaction is used for every extent
5800 * that needs to be moved. If there is not contiguous space available for
5801 * moving an extent, it can be split into smaller extents. The contents of
5802 * any moved extents are read and written via the volume's device vnode --
5803 * NOT via "vp." During the move, moved blocks which are part of a transaction
5804 * have their physical block numbers invalidated so they will eventually be
5805 * written to their new locations.
5806 *
5807 * This function is also called for directory hard links. Directory hard links
5808 * are regular files with no data fork and resource fork that contains alias
5809 * information for backward compatibility with pre-Leopard systems. However
5810 * non-Mac OS X implementation can add/modify data fork or resource fork
5811 * information to directory hard links, so we check, and if required, relocate
5812 * both data fork and resource fork.
5813 *
5814 * Inputs:
5815 * hfsmp The volume being resized.
5816 * vp The vnode for the system file.
5817 * fileID ID of the catalog record that needs to be relocated
5818 * forktype The type of fork that needs relocated,
5819 * kHFSResourceForkType for resource fork,
5820 * kHFSDataForkType for data fork
5821 * allocLimit Allocation limit for the new volume size,
5822 * do not use this block or beyond. All extents
5823 * that use this block or any blocks beyond this limit
5824 * will be relocated.
5825 *
5826 * Side Effects:
5827 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5828 * blocks that were relocated.
5829 */
5830static int
5831hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5832 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5833{
5834 int error = 0;
5835 struct hfs_reclaim_extent_info *extent_info;
5836 int i;
5837 int lockflags = 0;
5838 struct cnode *cp;
5839 struct filefork *fp;
5840 int took_truncate_lock = false;
5841 int release_desc = false;
5842 HFSPlusExtentKey *key;
5843
5844 /* If there is no vnode for this file, then there's nothing to do. */
5845 if (vp == NULL) {
5846 return 0;
5847 }
5848
5849 cp = VTOC(vp);
5850
316670eb
A
5851 if (hfs_resize_debug) {
5852 const char *filename = (const char *) cp->c_desc.cd_nameptr;
5853 int namelen = cp->c_desc.cd_namelen;
5854
5855 if (filename == NULL) {
5856 filename = "";
5857 namelen = 0;
5858 }
5859 printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
5860 }
5861
6d2010ae
A
5862 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5863 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5864 if (extent_info == NULL) {
5865 return ENOMEM;
5866 }
5867 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5868 extent_info->vp = vp;
5869 extent_info->fileID = fileID;
5870 extent_info->forkType = forktype;
5871 extent_info->is_sysfile = vnode_issystem(vp);
5872 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5873 extent_info->is_dirlink = true;
5874 }
5875 /* We always need allocation bitmap and extent btree lock */
5876 lockflags = SFL_BITMAP | SFL_EXTENTS;
5877 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5878 lockflags |= SFL_CATALOG;
5879 } else if (fileID == kHFSAttributesFileID) {
5880 lockflags |= SFL_ATTRIBUTE;
5881 } else if (fileID == kHFSStartupFileID) {
5882 lockflags |= SFL_STARTUP;
5883 }
5884 extent_info->lockflags = lockflags;
5885 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5886
5887 /* Flush data associated with current file on disk.
5888 *
5889 * If the current vnode is directory hard link, no flushing of
5890 * journal or vnode is required. The current kernel does not
5891 * modify data/resource fork of directory hard links, so nothing
5892 * will be in the cache. If a directory hard link is newly created,
5893 * the resource fork data is written directly using devvp and
5894 * the code that actually relocates data (hfs_copy_extent()) also
5895 * uses devvp for its I/O --- so they will see a consistent copy.
5896 */
5897 if (extent_info->is_sysfile) {
5898 /* If the current vnode is system vnode, flush journal
5899 * to make sure that all data is written to the disk.
5900 */
5901 error = hfs_journal_flush(hfsmp, TRUE);
5902 if (error) {
5903 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5904 goto out;
5905 }
5906 } else if (extent_info->is_dirlink == false) {
5907 /* Flush all blocks associated with this regular file vnode.
5908 * Normally there should not be buffer cache blocks for regular
5909 * files, but for objects like symlinks, we can have buffer cache
5910 * blocks associated with the vnode. Therefore we call
5911 * buf_flushdirtyblks() also.
5912 */
5913 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5914
5915 hfs_unlock(cp);
5916 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5917 took_truncate_lock = true;
5918 (void) cluster_push(vp, 0);
5919 error = hfs_lock(cp, HFS_FORCE_LOCK);
5920 if (error) {
5921 goto out;
5922 }
5923
5924 /* If the file no longer exists, nothing left to do */
5925 if (cp->c_flag & C_NOEXISTS) {
5926 error = 0;
5927 goto out;
5928 }
5929
5930 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5931 * be copying consistent bits. (Otherwise, it's possible that an async
5932 * write will complete to the old extent after we read from it. That
5933 * could lead to corruption.)
5934 */
5935 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5936 if (error) {
5937 goto out;
5938 }
5939 }
5940
5941 if (hfs_resize_debug) {
5942 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5943 }
5944
5945 if (extent_info->is_dirlink) {
5946 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5947 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5948 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5949 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5950 MALLOC(extent_info->dirlink_fork, struct filefork *,
5951 sizeof(struct filefork), M_TEMP, M_WAITOK);
5952 if ((extent_info->dirlink_desc == NULL) ||
5953 (extent_info->dirlink_attr == NULL) ||
5954 (extent_info->dirlink_fork == NULL)) {
5955 error = ENOMEM;
5956 goto out;
5957 }
5958
5959 /* Lookup catalog record for directory hard link and
5960 * create a fake filefork for the value looked up from
5961 * the disk.
5962 */
5963 fp = extent_info->dirlink_fork;
5964 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5965 extent_info->dirlink_fork->ff_cp = cp;
5966 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5967 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5968 extent_info->dirlink_desc, extent_info->dirlink_attr,
5969 &(extent_info->dirlink_fork->ff_data));
5970 hfs_systemfile_unlock(hfsmp, lockflags);
5971 if (error) {
5972 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5973 goto out;
5974 }
5975 release_desc = true;
5976 } else {
5977 fp = VTOF(vp);
5978 }
5979
5980 extent_info->catalog_fp = fp;
5981 extent_info->recStartBlock = 0;
5982 extent_info->extents = extent_info->catalog_fp->ff_extents;
5983 /* Relocate extents from the catalog record */
5984 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5985 if (fp->ff_extents[i].blockCount == 0) {
5986 break;
5987 }
5988 extent_info->extent_index = i;
5989 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5990 if (error) {
5991 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5992 goto out;
5993 }
5994 }
5995
5996 /* If the number of allocation blocks processed for reclaiming
5997 * are less than total number of blocks for the file, continuing
5998 * working on overflow extents record.
5999 */
6000 if (fp->ff_blocks <= extent_info->cur_blockCount) {
6001 if (0 && hfs_resize_debug) {
6002 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6003 }
6004 goto out;
6005 }
6006
6007 if (hfs_resize_debug) {
6008 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6009 }
6010
6011 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6012 if (extent_info->iterator == NULL) {
6013 error = ENOMEM;
6014 goto out;
b7266188 6015 }
6d2010ae
A
6016 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6017 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
6018 key->keyLength = kHFSPlusExtentKeyMaximumLength;
6019 key->forkType = forktype;
6020 key->fileID = fileID;
6021 key->startBlock = extent_info->cur_blockCount;
6022
6023 extent_info->btdata.bufferAddress = extent_info->record.overflow;
6024 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
6025 extent_info->btdata.itemCount = 1;
b7266188 6026
6d2010ae
A
6027 extent_info->catalog_fp = NULL;
6028
6029 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
6030 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6031 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6032 &(extent_info->btdata), &(extent_info->recordlen),
6033 extent_info->iterator);
6034 hfs_systemfile_unlock(hfsmp, lockflags);
6035 while (error == 0) {
6036 extent_info->overflow_count++;
6037 extent_info->recStartBlock = key->startBlock;
6038 extent_info->extents = extent_info->record.overflow;
6039 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6040 if (extent_info->record.overflow[i].blockCount == 0) {
6041 goto out;
6042 }
6043 extent_info->extent_index = i;
6044 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6045 if (error) {
6046 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
6047 goto out;
6048 }
6049 }
6050
6051 /* Look for more overflow records */
6052 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6053 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6054 extent_info->iterator, &(extent_info->btdata),
6055 &(extent_info->recordlen));
6056 hfs_systemfile_unlock(hfsmp, lockflags);
6057 if (error) {
6058 break;
6059 }
6060 /* Stop when we encounter a different file or fork. */
6061 if ((key->fileID != fileID) || (key->forkType != forktype)) {
6062 break;
6063 }
6064 }
6065 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6066 error = 0;
6067 }
6068
6069out:
6070 /* If any blocks were relocated, account them and report progress */
6071 if (extent_info->blocks_relocated) {
6072 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6073 hfs_truncatefs_progress(hfsmp);
6074 if (fileID < kHFSFirstUserCatalogNodeID) {
6075 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
6076 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
6077 }
6078 }
6079 if (extent_info->iterator) {
6080 FREE(extent_info->iterator, M_TEMP);
6081 }
6082 if (release_desc == true) {
6083 cat_releasedesc(extent_info->dirlink_desc);
6084 }
6085 if (extent_info->dirlink_desc) {
6086 FREE(extent_info->dirlink_desc, M_TEMP);
6087 }
6088 if (extent_info->dirlink_attr) {
6089 FREE(extent_info->dirlink_attr, M_TEMP);
6090 }
6091 if (extent_info->dirlink_fork) {
6092 FREE(extent_info->dirlink_fork, M_TEMP);
6093 }
6094 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
6095 (void) hfs_update(vp, MNT_WAIT);
6096 }
6097 if (took_truncate_lock) {
6098 hfs_unlock_truncate(cp, 0);
6099 }
6100 if (extent_info) {
6101 FREE(extent_info, M_TEMP);
6102 }
b7266188 6103 if (hfs_resize_debug) {
6d2010ae 6104 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
b7266188
A
6105 }
6106
2d21ac55
A
6107 return error;
6108}
6109
6110
6111/*
6112 * This journal_relocate callback updates the journal info block to point
6113 * at the new journal location. This write must NOT be done using the
6114 * transaction. We must write the block immediately. We must also force
6115 * it to get to the media so that the new journal location will be seen by
6116 * the replay code before we can safely let journaled blocks be written
6117 * to their normal locations.
6118 *
6119 * The tests for journal_uses_fua below are mildly hacky. Since the journal
6120 * and the file system are both on the same device, I'm leveraging what
6121 * the journal has decided about FUA.
6122 */
6123struct hfs_journal_relocate_args {
6124 struct hfsmount *hfsmp;
6125 vfs_context_t context;
6126 u_int32_t newStartBlock;
316670eb 6127 u_int32_t newBlockCount;
2d21ac55
A
6128};
6129
6130static errno_t
6131hfs_journal_relocate_callback(void *_args)
6132{
6133 int error;
6134 struct hfs_journal_relocate_args *args = _args;
6135 struct hfsmount *hfsmp = args->hfsmp;
6136 buf_t bp;
6137 JournalInfoBlock *jibp;
6138
6139 error = buf_meta_bread(hfsmp->hfs_devvp,
593a1d5f 6140 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
2d21ac55
A
6141 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
6142 if (error) {
316670eb 6143 printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
6d2010ae
A
6144 if (bp) {
6145 buf_brelse(bp);
6146 }
2d21ac55
A
6147 return error;
6148 }
6149 jibp = (JournalInfoBlock*) buf_dataptr(bp);
6150 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
316670eb 6151 jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
2d21ac55
A
6152 if (journal_uses_fua(hfsmp->jnl))
6153 buf_markfua(bp);
6154 error = buf_bwrite(bp);
6155 if (error) {
316670eb 6156 printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
2d21ac55
A
6157 return error;
6158 }
6159 if (!journal_uses_fua(hfsmp->jnl)) {
6160 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
6161 if (error) {
316670eb 6162 printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
2d21ac55 6163 error = 0; /* Don't fail the operation. */
0c530ab8 6164 }
91447636
A
6165 }
6166
2d21ac55
A
6167 return error;
6168}
6169
6170
316670eb
A
6171/* Type of resize operation in progress */
6172#define HFS_RESIZE_TRUNCATE 1
6173#define HFS_RESIZE_EXTEND 2
6174
6175/*
6176 * Core function to relocate the journal file. This function takes the
6177 * journal size of the newly relocated journal --- the caller can
6178 * provide a new journal size if they want to change the size of
6179 * the journal. The function takes care of updating the journal info
6180 * block and all other data structures correctly.
6181 *
6182 * Note: This function starts a transaction and grabs the btree locks.
6183 */
2d21ac55 6184static int
316670eb 6185hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
2d21ac55
A
6186{
6187 int error;
6d2010ae 6188 int journal_err;
2d21ac55 6189 int lockflags;
b7266188 6190 u_int32_t oldStartBlock;
2d21ac55
A
6191 u_int32_t newStartBlock;
6192 u_int32_t oldBlockCount;
6193 u_int32_t newBlockCount;
316670eb
A
6194 u_int32_t jnlBlockCount;
6195 u_int32_t alloc_skipfreeblks;
2d21ac55
A
6196 struct cat_desc journal_desc;
6197 struct cat_attr journal_attr;
6198 struct cat_fork journal_fork;
6199 struct hfs_journal_relocate_args callback_args;
6200
316670eb
A
6201 /* Calculate the number of allocation blocks required for the journal */
6202 jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
6203
6204 /*
6205 * During truncatefs(), the volume free block count is updated
6206 * before relocating data and reflects the total number of free
6207 * blocks that will exist on volume after the resize is successful.
6208 * This means that the allocation blocks required for relocation
6209 * have already been reserved and accounted for in the free block
6210 * count. Therefore, block allocation and deallocation routines
6211 * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS
6212 * flag.
6213 *
6214 * This special handling is not required when the file system
6215 * is being extended as we want all the allocated and deallocated
6216 * blocks to be accounted for correctly.
6217 */
6218 if (resize_type == HFS_RESIZE_TRUNCATE) {
6219 alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
6220 } else {
6221 alloc_skipfreeblks = 0;
6d2010ae
A
6222 }
6223
2d21ac55
A
6224 error = hfs_start_transaction(hfsmp);
6225 if (error) {
316670eb 6226 printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
2d21ac55
A
6227 return error;
6228 }
6229 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6230
316670eb
A
6231 error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount,
6232 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | alloc_skipfreeblks,
0b4c1975 6233 &newStartBlock, &newBlockCount);
2d21ac55 6234 if (error) {
316670eb 6235 printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
2d21ac55
A
6236 goto fail;
6237 }
316670eb
A
6238 if (newBlockCount != jnlBlockCount) {
6239 printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
2d21ac55
A
6240 goto free_fail;
6241 }
6242
316670eb 6243 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
2d21ac55 6244 if (error) {
316670eb 6245 printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
2d21ac55
A
6246 goto free_fail;
6247 }
6248
316670eb
A
6249 oldStartBlock = journal_fork.cf_extents[0].startBlock;
6250 oldBlockCount = journal_fork.cf_extents[0].blockCount;
6251 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
2d21ac55 6252 if (error) {
316670eb 6253 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
2d21ac55
A
6254 goto free_fail;
6255 }
316670eb
A
6256
6257 /* Update the catalog record for .journal */
2d21ac55
A
6258 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
6259 journal_fork.cf_extents[0].startBlock = newStartBlock;
6260 journal_fork.cf_extents[0].blockCount = newBlockCount;
6261 journal_fork.cf_blocks = newBlockCount;
6262 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
4a3eedf9 6263 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
2d21ac55 6264 if (error) {
316670eb 6265 printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
2d21ac55
A
6266 goto free_fail;
6267 }
2d21ac55 6268
316670eb
A
6269 /*
6270 * If the journal is part of the file system, then tell the journal
6271 * code about the new location. If the journal is on an external
6272 * device, then just keep using it as-is.
6273 */
6274 if (hfsmp->jvp == hfsmp->hfs_devvp) {
6275 callback_args.hfsmp = hfsmp;
6276 callback_args.context = context;
6277 callback_args.newStartBlock = newStartBlock;
6278 callback_args.newBlockCount = newBlockCount;
6279
6280 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
6281 (off_t)newBlockCount*hfsmp->blockSize, 0,
6282 hfs_journal_relocate_callback, &callback_args);
6283 if (error) {
6284 /* NOTE: journal_relocate will mark the journal invalid. */
6285 printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
6286 goto fail;
6287 }
6288 if (hfs_resize_debug) {
6289 printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
6290 }
6291 hfsmp->jnl_start = newStartBlock;
6292 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
2d21ac55 6293 }
2d21ac55
A
6294
6295 hfs_systemfile_unlock(hfsmp, lockflags);
6296 error = hfs_end_transaction(hfsmp);
6297 if (error) {
316670eb 6298 printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
b7266188 6299 }
316670eb 6300
2d21ac55
A
6301 return error;
6302
6303free_fail:
6d2010ae
A
6304 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
6305 if (journal_err) {
316670eb 6306 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6d2010ae
A
6307 hfs_mark_volume_inconsistent(hfsmp);
6308 }
2d21ac55
A
6309fail:
6310 hfs_systemfile_unlock(hfsmp, lockflags);
6311 (void) hfs_end_transaction(hfsmp);
b7266188 6312 if (hfs_resize_debug) {
316670eb
A
6313 printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
6314 }
6315 return error;
6316}
6317
6318
6319/*
6320 * Relocate the journal file when the file system is being truncated.
6321 * We do not down-size the journal when the file system size is
6322 * reduced, so we always provide the current journal size to the
6323 * relocate code.
6324 */
6325static int
6326hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6327{
6328 int error = 0;
6329 u_int32_t startBlock;
6330 u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6331
6332 /*
6333 * Figure out the location of the .journal file. When the journal
6334 * is on an external device, we need to look up the .journal file.
6335 */
6336 if (hfsmp->jvp == hfsmp->hfs_devvp) {
6337 startBlock = hfsmp->jnl_start;
6338 blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6339 } else {
6340 u_int32_t fileid;
6341 u_int32_t old_jnlfileid;
6342 struct cat_attr attr;
6343 struct cat_fork fork;
6344
6345 /*
6346 * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
6347 * is set, and it is trying to hide the .journal file. So temporarily
6348 * unset the field while calling GetFileInfo.
6349 */
6350 old_jnlfileid = hfsmp->hfs_jnlfileid;
6351 hfsmp->hfs_jnlfileid = 0;
6352 fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
6353 hfsmp->hfs_jnlfileid = old_jnlfileid;
6354 if (fileid != old_jnlfileid) {
6355 printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
6356 return EIO;
6357 }
6358
6359 startBlock = fork.cf_extents[0].startBlock;
6360 blockCount = fork.cf_extents[0].blockCount;
b7266188 6361 }
316670eb
A
6362
6363 if (startBlock + blockCount <= allocLimit) {
6364 /* The journal file does not require relocation */
6365 return 0;
6366 }
6367
6368 error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
6369 if (error == 0) {
6370 hfsmp->hfs_resize_blocksmoved += blockCount;
6371 hfs_truncatefs_progress(hfsmp);
6372 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
6373 blockCount, hfsmp->vcbVN);
6374 }
6375
2d21ac55
A
6376 return error;
6377}
6378
6379
6380/*
6381 * Move the journal info block to a new location. We have to make sure the
6382 * new copy of the journal info block gets to the media first, then change
6383 * the field in the volume header and the catalog record.
6384 */
6385static int
6d2010ae 6386hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
2d21ac55
A
6387{
6388 int error;
6d2010ae 6389 int journal_err;
2d21ac55 6390 int lockflags;
b7266188 6391 u_int32_t oldBlock;
2d21ac55
A
6392 u_int32_t newBlock;
6393 u_int32_t blockCount;
6394 struct cat_desc jib_desc;
6395 struct cat_attr jib_attr;
6396 struct cat_fork jib_fork;
6397 buf_t old_bp, new_bp;
6d2010ae
A
6398
6399 if (hfsmp->vcbJinfoBlock <= allocLimit) {
6400 /* The journal info block does not require relocation */
6401 return 0;
6402 }
2d21ac55
A
6403
6404 error = hfs_start_transaction(hfsmp);
6405 if (error) {
6406 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
6407 return error;
6408 }
6409 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6410
0b4c1975
A
6411 error = BlockAllocate(hfsmp, 1, 1, 1,
6412 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
6413 &newBlock, &blockCount);
2d21ac55
A
6414 if (error) {
6415 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
6416 goto fail;
6417 }
6418 if (blockCount != 1) {
6419 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
6420 goto free_fail;
6421 }
0b4c1975 6422 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
2d21ac55
A
6423 if (error) {
6424 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6425 goto free_fail;
6426 }
6427
6428 /* Copy the old journal info block content to the new location */
6429 error = buf_meta_bread(hfsmp->hfs_devvp,
593a1d5f 6430 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
2d21ac55
A
6431 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
6432 if (error) {
6433 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
6d2010ae
A
6434 if (old_bp) {
6435 buf_brelse(old_bp);
6436 }
2d21ac55
A
6437 goto free_fail;
6438 }
6439 new_bp = buf_getblk(hfsmp->hfs_devvp,
593a1d5f 6440 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
2d21ac55
A
6441 hfsmp->blockSize, 0, 0, BLK_META);
6442 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
6443 buf_brelse(old_bp);
6444 if (journal_uses_fua(hfsmp->jnl))
6445 buf_markfua(new_bp);
6446 error = buf_bwrite(new_bp);
6447 if (error) {
6448 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
6449 goto free_fail;
6450 }
6451 if (!journal_uses_fua(hfsmp->jnl)) {
6452 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
6453 if (error) {
6454 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6455 /* Don't fail the operation. */
6456 }
6457 }
6458
6459 /* Update the catalog record for .journal_info_block */
6460 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
6461 if (error) {
316670eb 6462 printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
2d21ac55
A
6463 goto fail;
6464 }
b7266188 6465 oldBlock = jib_fork.cf_extents[0].startBlock;
2d21ac55
A
6466 jib_fork.cf_size = hfsmp->blockSize;
6467 jib_fork.cf_extents[0].startBlock = newBlock;
6468 jib_fork.cf_extents[0].blockCount = 1;
6469 jib_fork.cf_blocks = 1;
6470 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
4a3eedf9 6471 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
2d21ac55
A
6472 if (error) {
6473 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6474 goto fail;
6475 }
6476
6477 /* Update the pointer to the journal info block in the volume header. */
6478 hfsmp->vcbJinfoBlock = newBlock;
6479 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6480 if (error) {
6481 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6482 goto fail;
6483 }
6d2010ae
A
6484 hfs_systemfile_unlock(hfsmp, lockflags);
6485 error = hfs_end_transaction(hfsmp);
6486 if (error) {
6487 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6488 }
6489 error = hfs_journal_flush(hfsmp, FALSE);
6490 if (error) {
6491 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6492 }
6493
6494 /* Account for the block relocated and print progress */
6495 hfsmp->hfs_resize_blocksmoved += 1;
6496 hfs_truncatefs_progress(hfsmp);
6497 if (!error) {
6498 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6499 hfsmp->vcbVN);
6500 if (hfs_resize_debug) {
6501 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6502 }
6503 }
6504 return error;
6505
6506free_fail:
6507 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6508 if (journal_err) {
6509 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6510 hfs_mark_volume_inconsistent(hfsmp);
6511 }
6512
6513fail:
6514 hfs_systemfile_unlock(hfsmp, lockflags);
6515 (void) hfs_end_transaction(hfsmp);
6516 if (hfs_resize_debug) {
6517 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6518 }
6519 return error;
6520}
6521
6522
316670eb
A
6523static u_int64_t
6524calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count)
6525{
6526 u_int64_t journal_size;
6527 u_int32_t journal_scale;
6528
6529#define DEFAULT_JOURNAL_SIZE (8*1024*1024)
6530#define MAX_JOURNAL_SIZE (512*1024*1024)
6531
6532 /* Calculate the journal size for this volume. We want
6533 * at least 8 MB of journal for each 100 GB of disk space.
6534 * We cap the size at 512 MB, unless the allocation block
6535 * size is larger, in which case, we use one allocation
6536 * block.
6537 */
6538 journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
6539 journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
6540 if (journal_size > MAX_JOURNAL_SIZE) {
6541 journal_size = MAX_JOURNAL_SIZE;
6542 }
6543 if (journal_size < hfsmp->blockSize) {
6544 journal_size = hfsmp->blockSize;
6545 }
6546 return journal_size;
6547}
6548
6549
6550/*
6551 * Calculate the expected journal size based on current partition size.
6552 * If the size of the current journal is less than the calculated size,
6553 * force journal relocation with the new journal size.
6554 */
6555static int
6556hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
6557{
6558 int error = 0;
6559 u_int64_t calc_journal_size;
6560
6561 if (hfsmp->jvp != hfsmp->hfs_devvp) {
6562 if (hfs_resize_debug) {
6563 printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
6564 }
6565 return 0;
6566 }
6567
6568 calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
6569 if (calc_journal_size <= hfsmp->jnl_size) {
6570 /* The journal size requires no modification */
6571 goto out;
6572 }
6573
6574 if (hfs_resize_debug) {
6575 printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
6576 }
6577
6578 /* Extend the journal to the new calculated size */
6579 error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
6580 if (error == 0) {
6581 printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n",
6582 hfsmp->jnl_size, hfsmp->vcbVN);
6583 }
6584out:
6585 return error;
6586}
6587
6588
6d2010ae
A
6589/*
6590 * This function traverses through all extended attribute records for a given
6591 * fileID, and calls function that reclaims data blocks that exist in the
6592 * area of the disk being reclaimed which in turn is responsible for allocating
6593 * new space, copying extent data, deallocating new space, and if required,
6594 * splitting the extent.
6595 *
6596 * Note: The caller has already acquired the cnode lock on the file. Therefore
6597 * we are assured that no other thread would be creating/deleting/modifying
6598 * extended attributes for this file.
6599 *
6600 * Side Effects:
6601 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6602 * blocks that were relocated.
6603 *
6604 * Returns:
6605 * 0 on success, non-zero on failure.
6606 */
6607static int
6608hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6609{
6610 int error = 0;
6611 struct hfs_reclaim_extent_info *extent_info;
6612 int i;
6613 HFSPlusAttrKey *key;
6614 int *lockflags;
6615
6616 if (hfs_resize_debug) {
6617 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6618 }
6619
6620 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6621 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6622 if (extent_info == NULL) {
6623 return ENOMEM;
6624 }
6625 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6626 extent_info->vp = vp;
6627 extent_info->fileID = fileID;
6628 extent_info->is_xattr = true;
6629 extent_info->is_sysfile = vnode_issystem(vp);
6630 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6631 lockflags = &(extent_info->lockflags);
6632 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6633
6634 /* Initialize iterator from the extent_info structure */
6635 MALLOC(extent_info->iterator, struct BTreeIterator *,
6636 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6637 if (extent_info->iterator == NULL) {
6638 error = ENOMEM;
6639 goto out;
6640 }
6641 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6642
6643 /* Build attribute key */
6644 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6645 error = hfs_buildattrkey(fileID, NULL, key);
6646 if (error) {
6647 goto out;
6648 }
6649
6650 /* Initialize btdata from extent_info structure. Note that the
6651 * buffer pointer actually points to the xattr record from the
6652 * extent_info structure itself.
6653 */
6654 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6655 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6656 extent_info->btdata.itemCount = 1;
6657
6658 /*
6659 * Sync all extent-based attribute data to the disk.
6660 *
6661 * All extent-based attribute data I/O is performed via cluster
6662 * I/O using a virtual file that spans across entire file system
6663 * space.
6664 */
6665 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6666 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6667 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6668 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6669 if (error) {
6670 goto out;
6671 }
6672
6673 /* Search for extended attribute for current file. This
6674 * will place the iterator before the first matching record.
6675 */
6676 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6677 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6678 &(extent_info->btdata), &(extent_info->recordlen),
6679 extent_info->iterator);
6680 hfs_systemfile_unlock(hfsmp, *lockflags);
6681 if (error) {
6682 if (error != btNotFound) {
6683 goto out;
6684 }
6685 /* btNotFound is expected here, so just mask it */
6686 error = 0;
6687 }
6688
6689 while (1) {
6690 /* Iterate to the next record */
6691 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6692 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6693 extent_info->iterator, &(extent_info->btdata),
6694 &(extent_info->recordlen));
6695 hfs_systemfile_unlock(hfsmp, *lockflags);
6696
6697 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6698 if (error || key->fileID != fileID) {
6699 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6700 error = 0;
6701 }
6702 break;
6703 }
6704
6705 /* We only care about extent-based EAs */
6706 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6707 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6708 continue;
6709 }
6710
6711 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6712 extent_info->overflow_count = 0;
6713 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6714 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6715 extent_info->overflow_count++;
6716 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6717 }
6718
6719 extent_info->recStartBlock = key->startBlock;
6720 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6721 if (extent_info->extents[i].blockCount == 0) {
6722 break;
6723 }
6724 extent_info->extent_index = i;
6725 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6726 if (error) {
6727 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6728 goto out;
6729 }
6730 }
6731 }
6732
6733out:
6734 /* If any blocks were relocated, account them and report progress */
6735 if (extent_info->blocks_relocated) {
6736 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6737 hfs_truncatefs_progress(hfsmp);
6738 }
6739 if (extent_info->iterator) {
6740 FREE(extent_info->iterator, M_TEMP);
6741 }
6742 if (extent_info) {
6743 FREE(extent_info, M_TEMP);
6744 }
6745 if (hfs_resize_debug) {
6746 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6747 }
6748 return error;
6749}
6750
6751/*
6752 * Reclaim any extent-based extended attributes allocation blocks from
6753 * the area of the disk that is being truncated.
6754 *
6755 * The function traverses the attribute btree to find out the fileIDs
6756 * of the extended attributes that need to be relocated. For every
6757 * file whose large EA requires relocation, it looks up the cnode and
6758 * calls hfs_reclaim_xattr() to do all the work for allocating
6759 * new space, copying data, deallocating old space, and if required,
6760 * splitting the extents.
6761 *
6762 * Inputs:
6763 * allocLimit - starting block of the area being reclaimed
6764 *
6765 * Returns:
6766 * returns 0 on success, non-zero on failure.
6767 */
6768static int
6769hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6770{
6771 int error = 0;
6772 FCB *fcb;
6773 struct BTreeIterator *iterator = NULL;
6774 struct FSBufferDescriptor btdata;
6775 HFSPlusAttrKey *key;
6776 HFSPlusAttrRecord rec;
6777 int lockflags = 0;
6778 cnid_t prev_fileid = 0;
6779 struct vnode *vp;
6780 int need_relocate;
6781 int btree_operation;
6782 u_int32_t files_moved = 0;
6783 u_int32_t prev_blocksmoved;
6784 int i;
6785
6786 fcb = VTOF(hfsmp->hfs_attribute_vp);
6787 /* Store the value to print total blocks moved by this function in end */
6788 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6789
6790 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6791 return ENOMEM;
6792 }
6793 bzero(iterator, sizeof(*iterator));
6794 key = (HFSPlusAttrKey *)&iterator->key;
6795 btdata.bufferAddress = &rec;
6796 btdata.itemSize = sizeof(rec);
6797 btdata.itemCount = 1;
6798
6799 need_relocate = false;
6800 btree_operation = kBTreeFirstRecord;
6801 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6802 while (1) {
6803 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6804 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6805 hfs_systemfile_unlock(hfsmp, lockflags);
6806 if (error) {
6807 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6808 error = 0;
6809 }
6810 break;
6811 }
6812 btree_operation = kBTreeNextRecord;
6813
6814 /* If the extents of current fileID were already relocated, skip it */
6815 if (prev_fileid == key->fileID) {
6816 continue;
6817 }
6818
6819 /* Check if any of the extents in the current record need to be relocated */
6820 need_relocate = false;
6821 switch(rec.recordType) {
6822 case kHFSPlusAttrForkData:
6823 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6824 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6825 break;
6826 }
6827 if ((rec.forkData.theFork.extents[i].startBlock +
6828 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6829 need_relocate = true;
6830 break;
6831 }
6832 }
6833 break;
6834
6835 case kHFSPlusAttrExtents:
6836 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6837 if (rec.overflowExtents.extents[i].blockCount == 0) {
6838 break;
6839 }
6840 if ((rec.overflowExtents.extents[i].startBlock +
6841 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6842 need_relocate = true;
6843 break;
6844 }
6845 }
6846 break;
6847 };
6848
6849 /* Continue iterating to next attribute record */
6850 if (need_relocate == false) {
6851 continue;
6852 }
6853
6854 /* Look up the vnode for corresponding file. The cnode
6855 * will be locked which will ensure that no one modifies
6856 * the xattrs when we are relocating them.
6857 *
6858 * We want to allow open-unlinked files to be moved,
6859 * so provide allow_deleted == 1 for hfs_vget().
6860 */
6861 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6862 continue;
6863 }
6864
6865 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6866 hfs_unlock(VTOC(vp));
6867 vnode_put(vp);
6868 if (error) {
6869 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6870 break;
6871 }
6872 prev_fileid = key->fileID;
6873 files_moved++;
6874 }
6875
6876 if (files_moved) {
6877 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6878 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6879 files_moved, hfsmp->vcbVN);
2d21ac55 6880 }
6d2010ae
A
6881
6882 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6883 return error;
6884}
6885
6886/*
6887 * Reclaim blocks from regular files.
6888 *
6889 * This function iterates over all the record in catalog btree looking
6890 * for files with extents that overlap into the space we're trying to
6891 * free up. If a file extent requires relocation, it looks up the vnode
6892 * and calls function to relocate the data.
6893 *
6894 * Returns:
6895 * Zero on success, non-zero on failure.
6896 */
6897static int
6898hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6899{
6900 int error;
6901 FCB *fcb;
6902 struct BTreeIterator *iterator = NULL;
6903 struct FSBufferDescriptor btdata;
6904 int btree_operation;
6905 int lockflags;
6906 struct HFSPlusCatalogFile filerec;
6907 struct vnode *vp;
6908 struct vnode *rvp;
6909 struct filefork *datafork;
6910 u_int32_t files_moved = 0;
6911 u_int32_t prev_blocksmoved;
6912
6913 fcb = VTOF(hfsmp->hfs_catalog_vp);
6914 /* Store the value to print total blocks moved by this function at the end */
6915 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6916
6917 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
316670eb
A
6918 error = ENOMEM;
6919 goto reclaim_filespace_done;
6920 }
6921
6922#if CONFIG_PROTECT
6923 int keys_generated = 0;
6924 /*
6925 * For content-protected filesystems, we may need to relocate files that
6926 * are encrypted. If they use the new-style offset-based IVs, then
6927 * we can move them regardless of the lock state. We create a temporary
6928 * key here that we use to read/write the data, then we discard it at the
6929 * end of the function.
6930 */
6931 if (cp_fs_protected (hfsmp->hfs_mp)) {
6932 error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
6933 if (error) {
6934 printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
6935 goto reclaim_filespace_done;
6936 }
2d21ac55 6937 }
316670eb
A
6938#endif
6939
6d2010ae
A
6940 bzero(iterator, sizeof(*iterator));
6941
6942 btdata.bufferAddress = &filerec;
6943 btdata.itemSize = sizeof(filerec);
6944 btdata.itemCount = 1;
6945
6946 btree_operation = kBTreeFirstRecord;
6947 while (1) {
6948 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6949 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6950 hfs_systemfile_unlock(hfsmp, lockflags);
6951 if (error) {
6952 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6953 error = 0;
6954 }
6955 break;
6956 }
6957 btree_operation = kBTreeNextRecord;
6958
6959 if (filerec.recordType != kHFSPlusFileRecord) {
6960 continue;
6961 }
6962
6963 /* Check if any of the extents require relocation */
6964 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6965 continue;
6966 }
b7266188 6967
6d2010ae
A
6968 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6969 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
316670eb
A
6970 if (hfs_resize_debug) {
6971 printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
6972 }
6d2010ae
A
6973 continue;
6974 }
6975
6976 /* If data fork exists or item is a directory hard link, relocate blocks */
6977 datafork = VTOF(vp);
6978 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6979 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6980 kHFSDataForkType, allocLimit, context);
6981 if (error) {
6982 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6983 hfs_unlock(VTOC(vp));
6984 vnode_put(vp);
6985 break;
6986 }
6987 }
6988
6989 /* If resource fork exists or item is a directory hard link, relocate blocks */
6990 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6991 if (vnode_isdir(vp)) {
6992 /* Resource fork vnode lookup is invalid for directory hard link.
6993 * So we fake data fork vnode as resource fork vnode.
6994 */
6995 rvp = vp;
6996 } else {
6997 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6998 if (error) {
6999 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
7000 hfs_unlock(VTOC(vp));
7001 vnode_put(vp);
7002 break;
7003 }
7004 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
7005 }
7006
7007 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
7008 kHFSResourceForkType, allocLimit, context);
7009 if (error) {
7010 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7011 hfs_unlock(VTOC(vp));
7012 vnode_put(vp);
7013 break;
7014 }
7015 }
7016
7017 /* The file forks were relocated successfully, now drop the
7018 * cnode lock and vnode reference, and continue iterating to
7019 * next catalog record.
7020 */
7021 hfs_unlock(VTOC(vp));
7022 vnode_put(vp);
7023 files_moved++;
b7266188 7024 }
2d21ac55 7025
6d2010ae
A
7026 if (files_moved) {
7027 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
7028 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
7029 files_moved, hfsmp->vcbVN);
b7266188 7030 }
6d2010ae 7031
316670eb
A
7032reclaim_filespace_done:
7033 if (iterator) {
7034 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7035 }
7036
7037#if CONFIG_PROTECT
7038 if (keys_generated) {
7039 cp_entry_destroy(&hfsmp->hfs_resize_cpentry);
7040 }
7041#endif
2d21ac55
A
7042 return error;
7043}
7044
2d21ac55
A
7045/*
7046 * Reclaim space at the end of a file system.
b7266188
A
7047 *
7048 * Inputs -
6d2010ae 7049 * allocLimit - start block of the space being reclaimed
b7266188 7050 * reclaimblks - number of allocation blocks to reclaim
2d21ac55
A
7051 */
7052static int
6d2010ae 7053hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
2d21ac55 7054{
6d2010ae
A
7055 int error = 0;
7056
7057 /*
7058 * Preflight the bitmap to find out total number of blocks that need
7059 * relocation.
7060 *
7061 * Note: Since allocLimit is set to the location of new alternate volume
7062 * header, the check below does not account for blocks allocated for old
7063 * alternate volume header.
7064 */
7065 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
7066 if (error) {
7067 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
7068 return error;
7069 }
7070 if (hfs_resize_debug) {
7071 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
7072 }
2d21ac55 7073
13f56ec4
A
7074 /* Just to be safe, sync the content of the journal to the disk before we proceed */
7075 hfs_journal_flush(hfsmp, TRUE);
7076
7077 /* First, relocate journal file blocks if they're in the way.
7078 * Doing this first will make sure that journal relocate code
7079 * gets access to contiguous blocks on disk first. The journal
7080 * file has to be contiguous on the disk, otherwise resize will
7081 * fail.
7082 */
7083 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
2d21ac55 7084 if (error) {
13f56ec4
A
7085 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
7086 return error;
7087 }
7088
7089 /* Relocate journal info block blocks if they're in the way. */
7090 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
7091 if (error) {
7092 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
2d21ac55
A
7093 return error;
7094 }
b7266188 7095
13f56ec4
A
7096 /* Relocate extents of the Extents B-tree if they're in the way.
7097 * Relocating extents btree before other btrees is important as
7098 * this will provide access to largest contiguous block range on
7099 * the disk for relocating extents btree. Note that extents btree
7100 * can only have maximum of 8 extents.
7101 */
6d2010ae
A
7102 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
7103 kHFSDataForkType, allocLimit, context);
2d21ac55
A
7104 if (error) {
7105 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
7106 return error;
7107 }
b7266188 7108
13f56ec4
A
7109 /* Relocate extents of the Allocation file if they're in the way. */
7110 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
7111 kHFSDataForkType, allocLimit, context);
7112 if (error) {
7113 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
7114 return error;
7115 }
7116
2d21ac55 7117 /* Relocate extents of the Catalog B-tree if they're in the way. */
6d2010ae
A
7118 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
7119 kHFSDataForkType, allocLimit, context);
2d21ac55
A
7120 if (error) {
7121 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
7122 return error;
7123 }
b7266188 7124
2d21ac55 7125 /* Relocate extents of the Attributes B-tree if they're in the way. */
6d2010ae
A
7126 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
7127 kHFSDataForkType, allocLimit, context);
2d21ac55
A
7128 if (error) {
7129 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
7130 return error;
7131 }
b7266188 7132
2d21ac55 7133 /* Relocate extents of the Startup File if there is one and they're in the way. */
6d2010ae
A
7134 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
7135 kHFSDataForkType, allocLimit, context);
2d21ac55
A
7136 if (error) {
7137 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
7138 return error;
7139 }
7140
7141 /*
7142 * We need to make sure the alternate volume header gets flushed if we moved
7143 * any extents in the volume header. But we need to do that before
7144 * shrinking the size of the volume, or else the journal code will panic
7145 * with an invalid (too large) block number.
7146 *
6d2010ae 7147 * Note that blks_moved will be set if ANY extent was moved, even
2d21ac55
A
7148 * if it was just an overflow extent. In this case, the journal_flush isn't
7149 * strictly required, but shouldn't hurt.
7150 */
6d2010ae 7151 if (hfsmp->hfs_resize_blocksmoved) {
13f56ec4 7152 hfs_journal_flush(hfsmp, TRUE);
0c530ab8
A
7153 }
7154
6d2010ae
A
7155 /* Reclaim extents from catalog file records */
7156 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
7157 if (error) {
7158 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
7159 return error;
91447636 7160 }
0c530ab8 7161
6d2010ae
A
7162 /* Reclaim extents from extent-based extended attributes, if any */
7163 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
7164 if (error) {
7165 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
7166 return error;
0c530ab8 7167 }
91447636 7168
6d2010ae 7169 return error;
91447636
A
7170}
7171
7172
0c530ab8 7173/*
6d2010ae 7174 * Check if there are any extents (including overflow extents) that overlap
b7266188
A
7175 * into the disk space that is being reclaimed.
7176 *
7177 * Output -
6d2010ae
A
7178 * true - One of the extents need to be relocated
7179 * false - No overflow extents need to be relocated, or there was an error
0c530ab8
A
7180 */
7181static int
6d2010ae 7182hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
0c530ab8
A
7183{
7184 struct BTreeIterator * iterator = NULL;
7185 struct FSBufferDescriptor btdata;
7186 HFSPlusExtentRecord extrec;
7187 HFSPlusExtentKey *extkeyptr;
7188 FCB *fcb;
6d2010ae
A
7189 int overlapped = false;
7190 int i, j;
0c530ab8 7191 int error;
6d2010ae
A
7192 int lockflags = 0;
7193 u_int32_t endblock;
7194
7195 /* Check if data fork overlaps the target space */
7196 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7197 if (filerec->dataFork.extents[i].blockCount == 0) {
7198 break;
7199 }
7200 endblock = filerec->dataFork.extents[i].startBlock +
7201 filerec->dataFork.extents[i].blockCount;
7202 if (endblock > allocLimit) {
7203 overlapped = true;
7204 goto out;
7205 }
7206 }
7207
7208 /* Check if resource fork overlaps the target space */
7209 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
7210 if (filerec->resourceFork.extents[j].blockCount == 0) {
7211 break;
7212 }
7213 endblock = filerec->resourceFork.extents[j].startBlock +
7214 filerec->resourceFork.extents[j].blockCount;
7215 if (endblock > allocLimit) {
7216 overlapped = true;
7217 goto out;
7218 }
7219 }
7220
7221 /* Return back if there are no overflow extents for this file */
7222 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
7223 goto out;
7224 }
0c530ab8 7225
0c530ab8 7226 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
b7266188 7227 return 0;
0c530ab8
A
7228 }
7229 bzero(iterator, sizeof(*iterator));
7230 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
7231 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
b7266188 7232 extkeyptr->forkType = 0;
6d2010ae 7233 extkeyptr->fileID = filerec->fileID;
b7266188 7234 extkeyptr->startBlock = 0;
0c530ab8
A
7235
7236 btdata.bufferAddress = &extrec;
7237 btdata.itemSize = sizeof(extrec);
7238 btdata.itemCount = 1;
7239
7240 fcb = VTOF(hfsmp->hfs_extents_vp);
7241
6d2010ae
A
7242 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
7243
b7266188
A
7244 /* This will position the iterator just before the first overflow
7245 * extent record for given fileID. It will always return btNotFound,
7246 * so we special case the error code.
7247 */
0c530ab8 7248 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
b7266188
A
7249 if (error && (error != btNotFound)) {
7250 goto out;
7251 }
7252
7253 /* BTIterateRecord() might return error if the btree is empty, and
7254 * therefore we return that the extent does not overflow to the caller
7255 */
7256 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
0c530ab8
A
7257 while (error == 0) {
7258 /* Stop when we encounter a different file. */
6d2010ae 7259 if (extkeyptr->fileID != filerec->fileID) {
0c530ab8
A
7260 break;
7261 }
b7266188 7262 /* Check if any of the forks exist in the target space. */
0c530ab8
A
7263 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7264 if (extrec[i].blockCount == 0) {
7265 break;
7266 }
6d2010ae
A
7267 endblock = extrec[i].startBlock + extrec[i].blockCount;
7268 if (endblock > allocLimit) {
7269 overlapped = true;
b7266188 7270 goto out;
0c530ab8
A
7271 }
7272 }
7273 /* Look for more records. */
7274 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7275 }
7276
b7266188 7277out:
6d2010ae
A
7278 if (lockflags) {
7279 hfs_systemfile_unlock(hfsmp, lockflags);
7280 }
7281 if (iterator) {
7282 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7283 }
b7266188 7284 return overlapped;
0c530ab8
A
7285}
7286
7287
7288/*
7289 * Calculate the progress of a file system resize operation.
7290 */
7291__private_extern__
7292int
7293hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
7294{
7295 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
7296 return (ENXIO);
7297 }
7298
6d2010ae
A
7299 if (hfsmp->hfs_resize_totalblocks > 0) {
7300 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
7301 } else {
0c530ab8 7302 *progress = 0;
6d2010ae 7303 }
0c530ab8
A
7304
7305 return (0);
7306}
7307
7308
b0d623f7
A
7309/*
7310 * Creates a UUID from a unique "name" in the HFS UUID Name space.
7311 * See version 3 UUID.
7312 */
7313static void
7314hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
7315{
7316 MD5_CTX md5c;
7317 uint8_t rawUUID[8];
7318
7319 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
7320 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
7321
7322 MD5Init( &md5c );
7323 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
7324 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
7325 MD5Final( result, &md5c );
7326
7327 result[6] = 0x30 | ( result[6] & 0x0F );
7328 result[8] = 0x80 | ( result[8] & 0x3F );
7329}
7330
91447636
A
7331/*
7332 * Get file system attributes.
7333 */
7334static int
7335hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7336{
2d21ac55
A
7337#define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
7338#define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6d2010ae 7339#define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
2d21ac55 7340
91447636
A
7341 ExtendedVCB *vcb = VFSTOVCB(mp);
7342 struct hfsmount *hfsmp = VFSTOHFS(mp);
b0d623f7 7343 u_int32_t freeCNIDs;
91447636 7344
b0d623f7 7345 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
91447636 7346
2d21ac55
A
7347 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
7348 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
7349 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
7350 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
cf7d32b8 7351 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
2d21ac55
A
7352 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
7353 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
7354 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
7355 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
91447636
A
7356 /* XXX needs clarification */
7357 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
7358 /* Maximum files is constrained by total blocks. */
2d21ac55
A
7359 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
7360 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
91447636
A
7361
7362 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
7363 fsap->f_fsid.val[1] = vfs_typenum(mp);
7364 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
7365
7366 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
7367 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
7368
7369 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
7370 vol_capabilities_attr_t *cap;
7371
7372 cap = &fsap->f_capabilities;
7373
7374 if (hfsmp->hfs_flags & HFS_STANDARD) {
7375 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7376 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7377 VOL_CAP_FMT_CASE_PRESERVING |
2d21ac55
A
7378 VOL_CAP_FMT_FAST_STATFS |
7379 VOL_CAP_FMT_HIDDEN_FILES |
7380 VOL_CAP_FMT_PATH_FROM_ID;
91447636
A
7381 } else {
7382 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7383 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7384 VOL_CAP_FMT_SYMBOLICLINKS |
7385 VOL_CAP_FMT_HARDLINKS |
7386 VOL_CAP_FMT_JOURNAL |
2d21ac55 7387 VOL_CAP_FMT_ZERO_RUNS |
91447636
A
7388 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
7389 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
7390 VOL_CAP_FMT_CASE_PRESERVING |
7391 VOL_CAP_FMT_FAST_STATFS |
2d21ac55
A
7392 VOL_CAP_FMT_2TB_FILESIZE |
7393 VOL_CAP_FMT_HIDDEN_FILES |
b0d623f7
A
7394#if HFS_COMPRESSION
7395 VOL_CAP_FMT_PATH_FROM_ID |
7396 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7397#else
2d21ac55 7398 VOL_CAP_FMT_PATH_FROM_ID;
b0d623f7 7399#endif
91447636
A
7400 }
7401 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
7402 VOL_CAP_INT_SEARCHFS |
7403 VOL_CAP_INT_ATTRLIST |
7404 VOL_CAP_INT_NFSEXPORT |
7405 VOL_CAP_INT_READDIRATTR |
7406 VOL_CAP_INT_EXCHANGEDATA |
7407 VOL_CAP_INT_ALLOCATE |
7408 VOL_CAP_INT_VOL_RENAME |
7409 VOL_CAP_INT_ADVLOCK |
2d21ac55
A
7410 VOL_CAP_INT_FLOCK |
7411#if NAMEDSTREAMS
7412 VOL_CAP_INT_EXTENDED_ATTR |
7413 VOL_CAP_INT_NAMEDSTREAMS;
7414#else
7415 VOL_CAP_INT_EXTENDED_ATTR;
7416#endif
91447636
A
7417 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
7418 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
7419
7420 cap->valid[VOL_CAPABILITIES_FORMAT] =
7421 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7422 VOL_CAP_FMT_SYMBOLICLINKS |
7423 VOL_CAP_FMT_HARDLINKS |
7424 VOL_CAP_FMT_JOURNAL |
7425 VOL_CAP_FMT_JOURNAL_ACTIVE |
7426 VOL_CAP_FMT_NO_ROOT_TIMES |
7427 VOL_CAP_FMT_SPARSE_FILES |
7428 VOL_CAP_FMT_ZERO_RUNS |
7429 VOL_CAP_FMT_CASE_SENSITIVE |
7430 VOL_CAP_FMT_CASE_PRESERVING |
7431 VOL_CAP_FMT_FAST_STATFS |
2d21ac55
A
7432 VOL_CAP_FMT_2TB_FILESIZE |
7433 VOL_CAP_FMT_OPENDENYMODES |
7434 VOL_CAP_FMT_HIDDEN_FILES |
b0d623f7
A
7435#if HFS_COMPRESSION
7436 VOL_CAP_FMT_PATH_FROM_ID |
7437 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7438#else
2d21ac55 7439 VOL_CAP_FMT_PATH_FROM_ID;
b0d623f7 7440#endif
91447636
A
7441 cap->valid[VOL_CAPABILITIES_INTERFACES] =
7442 VOL_CAP_INT_SEARCHFS |
7443 VOL_CAP_INT_ATTRLIST |
7444 VOL_CAP_INT_NFSEXPORT |
7445 VOL_CAP_INT_READDIRATTR |
7446 VOL_CAP_INT_EXCHANGEDATA |
7447 VOL_CAP_INT_COPYFILE |
7448 VOL_CAP_INT_ALLOCATE |
7449 VOL_CAP_INT_VOL_RENAME |
7450 VOL_CAP_INT_ADVLOCK |
2d21ac55
A
7451 VOL_CAP_INT_FLOCK |
7452 VOL_CAP_INT_MANLOCK |
7453#if NAMEDSTREAMS
7454 VOL_CAP_INT_EXTENDED_ATTR |
7455 VOL_CAP_INT_NAMEDSTREAMS;
7456#else
7457 VOL_CAP_INT_EXTENDED_ATTR;
7458#endif
91447636
A
7459 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
7460 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
7461 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
7462 }
7463 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
7464 vol_attributes_attr_t *attrp = &fsap->f_attributes;
7465
6d2010ae 7466 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
91447636
A
7467 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7468 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
2d21ac55 7469 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
91447636
A
7470 attrp->validattr.forkattr = 0;
7471
6d2010ae 7472 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
91447636
A
7473 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7474 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
2d21ac55 7475 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
91447636
A
7476 attrp->nativeattr.forkattr = 0;
7477 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
7478 }
6d2010ae 7479 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
91447636
A
7480 fsap->f_create_time.tv_nsec = 0;
7481 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
7482 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
7483 fsap->f_modify_time.tv_nsec = 0;
7484 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
7485
7486 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
7487 fsap->f_backup_time.tv_nsec = 0;
7488 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
7489 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
2d21ac55 7490 u_int16_t subtype = 0;
91447636
A
7491
7492 /*
7493 * Subtypes (flavors) for HFS
7494 * 0: Mac OS Extended
7495 * 1: Mac OS Extended (Journaled)
7496 * 2: Mac OS Extended (Case Sensitive)
7497 * 3: Mac OS Extended (Case Sensitive, Journaled)
7498 * 4 - 127: Reserved
7499 * 128: Mac OS Standard
7500 *
7501 */
7502 if (hfsmp->hfs_flags & HFS_STANDARD) {
7503 subtype = HFS_SUBTYPE_STANDARDHFS;
7504 } else /* HFS Plus */ {
7505 if (hfsmp->jnl)
7506 subtype |= HFS_SUBTYPE_JOURNALED;
7507 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
7508 subtype |= HFS_SUBTYPE_CASESENSITIVE;
7509 }
7510 fsap->f_fssubtype = subtype;
7511 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
7512 }
7513
7514 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
2d21ac55 7515 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
91447636
A
7516 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7517 }
b0d623f7
A
7518 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
7519 hfs_getvoluuid(hfsmp, fsap->f_uuid);
7520 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
7521 }
91447636
A
7522 return (0);
7523}
7524
7525/*
7526 * Perform a volume rename. Requires the FS' root vp.
7527 */
7528static int
7529hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
7530{
7531 ExtendedVCB *vcb = VTOVCB(vp);
7532 struct cnode *cp = VTOC(vp);
7533 struct hfsmount *hfsmp = VTOHFS(vp);
7534 struct cat_desc to_desc;
7535 struct cat_desc todir_desc;
7536 struct cat_desc new_desc;
7537 cat_cookie_t cookie;
7538 int lockflags;
7539 int error = 0;
6d2010ae
A
7540 char converted_volname[256];
7541 size_t volname_length = 0;
7542 size_t conv_volname_length = 0;
7543
91447636
A
7544
7545 /*
7546 * Ignore attempts to rename a volume to a zero-length name.
7547 */
7548 if (name[0] == 0)
7549 return(0);
7550
7551 bzero(&to_desc, sizeof(to_desc));
7552 bzero(&todir_desc, sizeof(todir_desc));
7553 bzero(&new_desc, sizeof(new_desc));
7554 bzero(&cookie, sizeof(cookie));
7555
7556 todir_desc.cd_parentcnid = kHFSRootParentID;
7557 todir_desc.cd_cnid = kHFSRootFolderID;
7558 todir_desc.cd_flags = CD_ISDIR;
7559
2d21ac55 7560 to_desc.cd_nameptr = (const u_int8_t *)name;
91447636
A
7561 to_desc.cd_namelen = strlen(name);
7562 to_desc.cd_parentcnid = kHFSRootParentID;
7563 to_desc.cd_cnid = cp->c_cnid;
7564 to_desc.cd_flags = CD_ISDIR;
7565
7566 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
7567 if ((error = hfs_start_transaction(hfsmp)) == 0) {
7568 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
7569 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
7570
7571 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
7572
7573 /*
7574 * If successful, update the name in the VCB, ensure it's terminated.
7575 */
7576 if (!error) {
2d21ac55 7577 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6d2010ae 7578 volname_length = strlen ((const char*)vcb->vcbVN);
316670eb 7579#define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
6d2010ae
A
7580 /* Send the volume name down to CoreStorage if necessary */
7581 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
7582 if (error == 0) {
7583 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7584 }
7585 error = 0;
91447636 7586 }
6d2010ae 7587
91447636
A
7588 hfs_systemfile_unlock(hfsmp, lockflags);
7589 cat_postflight(hfsmp, &cookie, p);
7590
7591 if (error)
2d21ac55 7592 MarkVCBDirty(vcb);
91447636
A
7593 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7594 }
7595 hfs_end_transaction(hfsmp);
7596 }
7597 if (!error) {
7598 /* Release old allocated name buffer */
7599 if (cp->c_desc.cd_flags & CD_HASBUF) {
2d21ac55 7600 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
91447636
A
7601
7602 cp->c_desc.cd_nameptr = 0;
7603 cp->c_desc.cd_namelen = 0;
7604 cp->c_desc.cd_flags &= ~CD_HASBUF;
2d21ac55 7605 vfs_removename(tmp_name);
91447636
A
7606 }
7607 /* Update cnode's catalog descriptor */
7608 replace_desc(cp, &new_desc);
7609 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7610 cp->c_touch_chgtime = TRUE;
7611 }
7612
7613 hfs_unlock(cp);
7614 }
7615
7616 return(error);
7617}
7618
7619/*
7620 * Get file system attributes.
7621 */
7622static int
7623hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7624{
7625 kauth_cred_t cred = vfs_context_ucred(context);
7626 int error = 0;
7627
7628 /*
7629 * Must be superuser or owner of filesystem to change volume attributes
7630 */
7631 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7632 return(EACCES);
7633
7634 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7635 vnode_t root_vp;
7636
7637 error = hfs_vfs_root(mp, &root_vp, context);
7638 if (error)
7639 goto out;
7640
7641 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7642 (void) vnode_put(root_vp);
7643 if (error)
7644 goto out;
7645
7646 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7647 }
7648
7649out:
7650 return error;
7651}
7652
2d21ac55
A
7653/* If a runtime corruption is detected, set the volume inconsistent
7654 * bit in the volume attributes. The volume inconsistent bit is a persistent
7655 * bit which represents that the volume is corrupt and needs repair.
7656 * The volume inconsistent bit can be set from the kernel when it detects
7657 * runtime corruption or from file system repair utilities like fsck_hfs when
7658 * a repair operation fails. The bit should be cleared only from file system
7659 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7660 */
7661void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7662{
7663 HFS_MOUNT_LOCK(hfsmp, TRUE);
7664 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7665 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7666 MarkVCBDirty(hfsmp);
7667 }
b0d623f7
A
7668 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7669 /* Log information to ASL log */
7670 fslog_fs_corrupt(hfsmp->hfs_mp);
7671 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7672 }
2d21ac55
A
7673 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7674}
7675
7676/* Replay the journal on the device node provided. Returns zero if
7677 * journal replay succeeded or no journal was supposed to be replayed.
7678 */
b0d623f7 7679static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
2d21ac55
A
7680{
7681 int retval = 0;
316670eb 7682 int error = 0;
2d21ac55
A
7683 struct mount *mp = NULL;
7684 struct hfs_mount_args *args = NULL;
7685
2d21ac55 7686 /* Replay allowed only on raw devices */
6d2010ae 7687 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
2d21ac55
A
7688 retval = EINVAL;
7689 goto out;
7690 }
7691
7692 /* Create dummy mount structures */
7693 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
b0d623f7
A
7694 if (mp == NULL) {
7695 retval = ENOMEM;
7696 goto out;
7697 }
2d21ac55
A
7698 bzero(mp, sizeof(struct mount));
7699 mount_lock_init(mp);
7700
7701 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
b0d623f7
A
7702 if (args == NULL) {
7703 retval = ENOMEM;
7704 goto out;
7705 }
2d21ac55
A
7706 bzero(args, sizeof(struct hfs_mount_args));
7707
7708 retval = hfs_mountfs(devvp, mp, args, 1, context);
6d2010ae
A
7709 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7710
7711 /* FSYNC the devnode to be sure all data has been flushed */
316670eb
A
7712 error = VNOP_FSYNC(devvp, MNT_WAIT, context);
7713 if (error) {
7714 retval = error;
7715 }
2d21ac55
A
7716
7717out:
7718 if (mp) {
7719 mount_lock_destroy(mp);
7720 FREE(mp, M_TEMP);
7721 }
7722 if (args) {
7723 FREE(args, M_TEMP);
7724 }
2d21ac55
A
7725 return retval;
7726}
55e303ae 7727
1c79356b
A
7728/*
7729 * hfs vfs operations.
7730 */
7731struct vfsops hfs_vfsops = {
9bccf70c
A
7732 hfs_mount,
7733 hfs_start,
7734 hfs_unmount,
91447636 7735 hfs_vfs_root,
9bccf70c 7736 hfs_quotactl,
91447636 7737 hfs_vfs_getattr, /* was hfs_statfs */
9bccf70c 7738 hfs_sync,
91447636 7739 hfs_vfs_vget,
9bccf70c
A
7740 hfs_fhtovp,
7741 hfs_vptofh,
7742 hfs_init,
91447636 7743 hfs_sysctl,
2d21ac55
A
7744 hfs_vfs_setattr,
7745 {NULL}
1c79356b 7746};