]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
a07a4c38b74c64a415aa775fdc7ea3f3154954be
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91 #include <sys/buf_internal.h>
92
93 #include <kern/locks.h>
94
95 #include <vfs/vfs_journal.h>
96
97 #include <miscfs/specfs/specdev.h>
98 #include <hfs/hfs_mount.h>
99
100 #include <libkern/crypto/md5.h>
101 #include <uuid/uuid.h>
102
103 #include "hfs.h"
104 #include "hfs_catalog.h"
105 #include "hfs_cnode.h"
106 #include "hfs_dbg.h"
107 #include "hfs_endian.h"
108 #include "hfs_hotfiles.h"
109 #include "hfs_quota.h"
110 #include "hfs_btreeio.h"
111
112 #include "hfscommon/headers/FileMgrInternal.h"
113 #include "hfscommon/headers/BTreesInternal.h"
114
115 #if CONFIG_PROTECT
116 #include <sys/cprotect.h>
117 #endif
118
119 #if CONFIG_HFS_ALLOC_RBTREE
120 #include "hfscommon/headers/HybridAllocator.h"
121 #endif
122
123 #define HFS_MOUNT_DEBUG 1
124
125 #if HFS_DIAGNOSTIC
126 int hfs_dbg_all = 0;
127 int hfs_dbg_err = 0;
128 #endif
129
130 /* Enable/disable debugging code for live volume resizing */
131 int hfs_resize_debug = 0;
132
133 lck_grp_attr_t * hfs_group_attr;
134 lck_attr_t * hfs_lock_attr;
135 lck_grp_t * hfs_mutex_group;
136 lck_grp_t * hfs_rwlock_group;
137 lck_grp_t * hfs_spinlock_group;
138
139 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
140 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
141
142 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
143 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
144
145 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
146 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
147 static int hfs_flushfiles(struct mount *, int, struct proc *);
148 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
149 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
150 static int hfs_init(struct vfsconf *vfsp);
151 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
152 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
153 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
154 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
155 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
156 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
157 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
158 static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
159
160 void hfs_initialize_allocator (struct hfsmount *hfsmp);
161 int hfs_teardown_allocator (struct hfsmount *hfsmp);
162 void hfs_unmap_blocks (struct hfsmount *hfsmp);
163
164 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
165 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
166 int hfs_reload(struct mount *mp);
167 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
168 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
169 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
170 user_addr_t newp, size_t newlen, vfs_context_t context);
171 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
172
173 /*
174 * Called by vfs_mountroot when mounting HFS Plus as root.
175 */
176
177 int
178 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
179 {
180 struct hfsmount *hfsmp;
181 ExtendedVCB *vcb;
182 struct vfsstatfs *vfsp;
183 int error;
184
185 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
186 if (HFS_MOUNT_DEBUG) {
187 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
188 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
189 }
190 return (error);
191 }
192
193 /* Init hfsmp */
194 hfsmp = VFSTOHFS(mp);
195
196 hfsmp->hfs_uid = UNKNOWNUID;
197 hfsmp->hfs_gid = UNKNOWNGID;
198 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
199 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
200
201 /* Establish the free block reserve. */
202 vcb = HFSTOVCB(hfsmp);
203 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
204 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
205
206 vfsp = vfs_statfs(mp);
207 (void)hfs_statfs(mp, vfsp, NULL);
208
209 return (0);
210 }
211
212
213 /*
214 * VFS Operations.
215 *
216 * mount system call
217 */
218
219 int
220 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
221 {
222 struct proc *p = vfs_context_proc(context);
223 struct hfsmount *hfsmp = NULL;
224 struct hfs_mount_args args;
225 int retval = E_NONE;
226 u_int32_t cmdflags;
227
228 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
229 if (HFS_MOUNT_DEBUG) {
230 printf("hfs_mount: copyin returned %d for fs\n", retval);
231 }
232 return (retval);
233 }
234 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
235 if (cmdflags & MNT_UPDATE) {
236 hfsmp = VFSTOHFS(mp);
237
238 /* Reload incore data after an fsck. */
239 if (cmdflags & MNT_RELOAD) {
240 if (vfs_isrdonly(mp)) {
241 int error = hfs_reload(mp);
242 if (error && HFS_MOUNT_DEBUG) {
243 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
244 }
245 return error;
246 }
247 else {
248 if (HFS_MOUNT_DEBUG) {
249 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
250 }
251 return (EINVAL);
252 }
253 }
254
255 /* Change to a read-only file system. */
256 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
257 vfs_isrdonly(mp)) {
258 int flags;
259
260 /* Set flag to indicate that a downgrade to read-only
261 * is in progress and therefore block any further
262 * modifications to the file system.
263 */
264 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
265 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
266 hfsmp->hfs_downgrading_proc = current_thread();
267 hfs_unlock_global (hfsmp);
268
269 /* use VFS_SYNC to push out System (btree) files */
270 retval = VFS_SYNC(mp, MNT_WAIT, context);
271 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
272 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
273 hfsmp->hfs_downgrading_proc = NULL;
274 if (HFS_MOUNT_DEBUG) {
275 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
276 }
277 goto out;
278 }
279
280 flags = WRITECLOSE;
281 if (cmdflags & MNT_FORCE)
282 flags |= FORCECLOSE;
283
284 if ((retval = hfs_flushfiles(mp, flags, p))) {
285 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
286 hfsmp->hfs_downgrading_proc = NULL;
287 if (HFS_MOUNT_DEBUG) {
288 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
289 }
290 goto out;
291 }
292
293 /* mark the volume cleanly unmounted */
294 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
295 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
296 hfsmp->hfs_flags |= HFS_READ_ONLY;
297
298 /*
299 * Close down the journal.
300 *
301 * NOTE: It is critically important to close down the journal
302 * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
303 * In a journaled environment it is expected that the journal be
304 * the only actor permitted to issue I/O for metadata blocks in HFS.
305 * If we were to call VNOP_FSYNC prior to closing down the journal,
306 * we would inadvertantly issue (and wait for) the I/O we just
307 * initiated above as part of the flushvolumeheader call.
308 *
309 * To avoid this, we follow the same order of operations as in
310 * unmount and issue the journal_close prior to calling VNOP_FSYNC.
311 */
312
313 if (hfsmp->jnl) {
314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
315
316 journal_close(hfsmp->jnl);
317 hfsmp->jnl = NULL;
318
319 // Note: we explicitly don't want to shutdown
320 // access to the jvp because we may need
321 // it later if we go back to being read-write.
322
323 hfs_unlock_global (hfsmp);
324 }
325
326
327 /*
328 * Write out any pending I/O still outstanding against the device node
329 * now that the journal has been closed.
330 */
331 if (!retval) {
332 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
333 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
334 } else {
335 vnode_get(hfsmp->hfs_devvp);
336 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
337 vnode_put(hfsmp->hfs_devvp);
338 }
339 }
340
341 if (retval) {
342 if (HFS_MOUNT_DEBUG) {
343 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
344 }
345 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
346 hfsmp->hfs_downgrading_proc = NULL;
347 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
348 goto out;
349 }
350
351 #if CONFIG_HFS_ALLOC_RBTREE
352 (void) hfs_teardown_allocator(hfsmp);
353 #endif
354 hfsmp->hfs_downgrading_proc = NULL;
355 }
356
357 /* Change to a writable file system. */
358 if (vfs_iswriteupgrade(mp)) {
359 #if CONFIG_HFS_ALLOC_RBTREE
360 thread_t allocator_thread;
361 #endif
362
363 /*
364 * On inconsistent disks, do not allow read-write mount
365 * unless it is the boot volume being mounted.
366 */
367 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
368 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
369 if (HFS_MOUNT_DEBUG) {
370 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
371 }
372 retval = EINVAL;
373 goto out;
374 }
375
376 // If the journal was shut-down previously because we were
377 // asked to be read-only, let's start it back up again now
378
379 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
380 && hfsmp->jnl == NULL
381 && hfsmp->jvp != NULL) {
382 int jflags;
383
384 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
385 jflags = JOURNAL_RESET;
386 } else {
387 jflags = 0;
388 }
389
390 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
391
392 hfsmp->jnl = journal_open(hfsmp->jvp,
393 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
394 hfsmp->jnl_size,
395 hfsmp->hfs_devvp,
396 hfsmp->hfs_logical_block_size,
397 jflags,
398 0,
399 hfs_sync_metadata, hfsmp->hfs_mp);
400
401 /*
402 * Set up the trim callback function so that we can add
403 * recently freed extents to the free extent cache once
404 * the transaction that freed them is written to the
405 * journal on disk.
406 */
407 if (hfsmp->jnl)
408 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
409
410 hfs_unlock_global (hfsmp);
411
412 if (hfsmp->jnl == NULL) {
413 if (HFS_MOUNT_DEBUG) {
414 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
415 }
416 retval = EINVAL;
417 goto out;
418 } else {
419 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
420 }
421
422 }
423
424 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
425 retval = hfs_erase_unused_nodes(hfsmp);
426 if (retval != E_NONE) {
427 if (HFS_MOUNT_DEBUG) {
428 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
429 }
430 goto out;
431 }
432
433 /* If this mount point was downgraded from read-write
434 * to read-only, clear that information as we are now
435 * moving back to read-write.
436 */
437 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
438 hfsmp->hfs_downgrading_proc = NULL;
439
440 /* mark the volume dirty (clear clean unmount bit) */
441 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
442
443 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
444 if (retval != E_NONE) {
445 if (HFS_MOUNT_DEBUG) {
446 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
447 }
448 goto out;
449 }
450
451 /* Only clear HFS_READ_ONLY after a successful write */
452 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
453
454
455 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
456 /* Setup private/hidden directories for hardlinks. */
457 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
458 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
459
460 hfs_remove_orphans(hfsmp);
461
462 /*
463 * Allow hot file clustering if conditions allow.
464 */
465 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
466 ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
467 (void) hfs_recording_init(hfsmp);
468 }
469 /* Force ACLs on HFS+ file systems. */
470 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
471 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
472 }
473 }
474
475 #if CONFIG_HFS_ALLOC_RBTREE
476 /*
477 * Like the normal mount case, we need to handle creation of the allocation red-black tree
478 * if we're upgrading from read-only to read-write.
479 *
480 * We spawn a thread to create the pair of red-black trees for this volume.
481 * However, in so doing, we must be careful to ensure that if this thread is still
482 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
483 * we'll need to set a bit that indicates we're in progress building the trees here.
484 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
485 * notifies the tree generation code that an unmount is waiting. Also, mark the extent
486 * tree flags that the allocator is enabled for use before we spawn the thread that will start
487 * scanning the RB tree.
488 *
489 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
490 * which has not previously encountered a bad error on the red-black tree code. Also, don't
491 * try to re-build a tree that already exists.
492 *
493 * When this is enabled, we must re-integrate the above function into our bitmap iteration
494 * so that we accurately send TRIMs down to the underlying disk device as needed.
495 */
496
497 if (hfsmp->extent_tree_flags == 0) {
498 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
499 /* Initialize EOF counter so that the thread can assume it started at initial values */
500 hfsmp->offset_block_end = 0;
501
502 InitTree(hfsmp);
503
504 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
505 thread_deallocate(allocator_thread);
506 }
507
508 #endif
509 }
510
511 /* Update file system parameters. */
512 retval = hfs_changefs(mp, &args);
513 if (retval && HFS_MOUNT_DEBUG) {
514 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
515 }
516
517 } else /* not an update request */ {
518
519 /* Set the mount flag to indicate that we support volfs */
520 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
521
522 retval = hfs_mountfs(devvp, mp, &args, 0, context);
523 if (retval && HFS_MOUNT_DEBUG) {
524 printf("hfs_mount: hfs_mountfs returned %d\n", retval);
525 }
526 #if CONFIG_PROTECT
527 /*
528 * If above mount call was successful, and this mount is content protection
529 * enabled, then verify the on-disk EA on the root to ensure that the filesystem
530 * is of a suitable vintage to allow the mount to proceed.
531 */
532 if ((retval == 0) && (cp_fs_protected (mp))) {
533 int err = 0;
534
535 struct cp_root_xattr *xattr = NULL;
536 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
537 if (xattr == NULL) {
538 err = ENOMEM;
539 goto badalloc;
540 }
541 bzero (xattr, sizeof(struct cp_root_xattr));
542 hfsmp = vfs_fsprivate(mp);
543
544 /* go get the EA to get the version information */
545 err = cp_getrootxattr (hfsmp, xattr);
546 /*
547 * If there was no EA there, then write one out.
548 * Assuming EA is not present on the root means
549 * this is an erase install or a very old FS
550 */
551 if (err == ENOATTR) {
552 printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
553 bzero(xattr, sizeof(struct cp_root_xattr));
554 xattr->major_version = CP_NEW_MAJOR_VERS;
555 xattr->minor_version = CP_MINOR_VERS;
556 xattr->flags = 0;
557
558 err = cp_setrootxattr (hfsmp, xattr);
559 }
560
561 /*
562 * For any other error, including having an out of date CP version in the
563 * EA, or for an error out of cp_setrootxattr, deny the mount
564 * and do not proceed further.
565 */
566 if (err || (xattr->major_version != CP_NEW_MAJOR_VERS && xattr->major_version != CP_PREV_MAJOR_VERS)) {
567 /* Deny the mount and tear down. */
568 retval = EPERM;
569 (void) hfs_unmount (mp, MNT_FORCE, context);
570 }
571 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
572 badalloc:
573 if(xattr) {
574 FREE(xattr, M_TEMP);
575 }
576 }
577 #endif
578 }
579 out:
580 if (retval == 0) {
581 (void)hfs_statfs(mp, vfs_statfs(mp), context);
582 }
583 return (retval);
584 }
585
586
587 struct hfs_changefs_cargs {
588 struct hfsmount *hfsmp;
589 int namefix;
590 int permfix;
591 int permswitch;
592 };
593
594 static int
595 hfs_changefs_callback(struct vnode *vp, void *cargs)
596 {
597 ExtendedVCB *vcb;
598 struct cnode *cp;
599 struct cat_desc cndesc;
600 struct cat_attr cnattr;
601 struct hfs_changefs_cargs *args;
602 int lockflags;
603 int error;
604
605 args = (struct hfs_changefs_cargs *)cargs;
606
607 cp = VTOC(vp);
608 vcb = HFSTOVCB(args->hfsmp);
609
610 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
611 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
612 hfs_systemfile_unlock(args->hfsmp, lockflags);
613 if (error) {
614 /*
615 * If we couldn't find this guy skip to the next one
616 */
617 if (args->namefix)
618 cache_purge(vp);
619
620 return (VNODE_RETURNED);
621 }
622 /*
623 * Get the real uid/gid and perm mask from disk.
624 */
625 if (args->permswitch || args->permfix) {
626 cp->c_uid = cnattr.ca_uid;
627 cp->c_gid = cnattr.ca_gid;
628 cp->c_mode = cnattr.ca_mode;
629 }
630 /*
631 * If we're switching name converters then...
632 * Remove the existing entry from the namei cache.
633 * Update name to one based on new encoder.
634 */
635 if (args->namefix) {
636 cache_purge(vp);
637 replace_desc(cp, &cndesc);
638
639 if (cndesc.cd_cnid == kHFSRootFolderID) {
640 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
641 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
642 }
643 } else {
644 cat_releasedesc(&cndesc);
645 }
646 return (VNODE_RETURNED);
647 }
648
649 /* Change fs mount parameters */
650 static int
651 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
652 {
653 int retval = 0;
654 int namefix, permfix, permswitch;
655 struct hfsmount *hfsmp;
656 ExtendedVCB *vcb;
657 hfs_to_unicode_func_t get_unicode_func;
658 unicode_to_hfs_func_t get_hfsname_func;
659 u_int32_t old_encoding = 0;
660 struct hfs_changefs_cargs cargs;
661 u_int32_t mount_flags;
662
663 hfsmp = VFSTOHFS(mp);
664 vcb = HFSTOVCB(hfsmp);
665 mount_flags = (unsigned int)vfs_flags(mp);
666
667 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
668
669 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
670 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
671 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
672 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
673
674 /* The root filesystem must operate with actual permissions: */
675 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
676 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
677 retval = EINVAL;
678 goto exit;
679 }
680 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
681 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
682 else
683 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
684
685 namefix = permfix = 0;
686
687 /*
688 * Tracking of hot files requires up-to-date access times. So if
689 * access time updates are disabled, we must also disable hot files.
690 */
691 if (mount_flags & MNT_NOATIME) {
692 (void) hfs_recording_suspend(hfsmp);
693 }
694
695 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
696 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
697 gTimeZone = args->hfs_timezone;
698 }
699
700 /* Change the default uid, gid and/or mask */
701 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
702 hfsmp->hfs_uid = args->hfs_uid;
703 if (vcb->vcbSigWord == kHFSPlusSigWord)
704 ++permfix;
705 }
706 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
707 hfsmp->hfs_gid = args->hfs_gid;
708 if (vcb->vcbSigWord == kHFSPlusSigWord)
709 ++permfix;
710 }
711 if (args->hfs_mask != (mode_t)VNOVAL) {
712 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
713 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
714 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
715 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
716 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
717 if (vcb->vcbSigWord == kHFSPlusSigWord)
718 ++permfix;
719 }
720 }
721
722 /* Change the hfs encoding value (hfs only) */
723 if ((vcb->vcbSigWord == kHFSSigWord) &&
724 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
725 (hfsmp->hfs_encoding != args->hfs_encoding)) {
726
727 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
728 if (retval)
729 goto exit;
730
731 /*
732 * Connect the new hfs_get_unicode converter but leave
733 * the old hfs_get_hfsname converter in place so that
734 * we can lookup existing vnodes to get their correctly
735 * encoded names.
736 *
737 * When we're all finished, we can then connect the new
738 * hfs_get_hfsname converter and release our interest
739 * in the old converters.
740 */
741 hfsmp->hfs_get_unicode = get_unicode_func;
742 old_encoding = hfsmp->hfs_encoding;
743 hfsmp->hfs_encoding = args->hfs_encoding;
744 ++namefix;
745 }
746
747 if (!(namefix || permfix || permswitch))
748 goto exit;
749
750 /* XXX 3762912 hack to support HFS filesystem 'owner' */
751 if (permfix)
752 vfs_setowner(mp,
753 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
754 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
755
756 /*
757 * For each active vnode fix things that changed
758 *
759 * Note that we can visit a vnode more than once
760 * and we can race with fsync.
761 *
762 * hfs_changefs_callback will be called for each vnode
763 * hung off of this mount point
764 *
765 * The vnode will be properly referenced and unreferenced
766 * around the callback
767 */
768 cargs.hfsmp = hfsmp;
769 cargs.namefix = namefix;
770 cargs.permfix = permfix;
771 cargs.permswitch = permswitch;
772
773 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
774
775 /*
776 * If we're switching name converters we can now
777 * connect the new hfs_get_hfsname converter and
778 * release our interest in the old converters.
779 */
780 if (namefix) {
781 hfsmp->hfs_get_hfsname = get_hfsname_func;
782 vcb->volumeNameEncodingHint = args->hfs_encoding;
783 (void) hfs_relconverter(old_encoding);
784 }
785 exit:
786 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
787 return (retval);
788 }
789
790
791 struct hfs_reload_cargs {
792 struct hfsmount *hfsmp;
793 int error;
794 };
795
796 static int
797 hfs_reload_callback(struct vnode *vp, void *cargs)
798 {
799 struct cnode *cp;
800 struct hfs_reload_cargs *args;
801 int lockflags;
802
803 args = (struct hfs_reload_cargs *)cargs;
804 /*
805 * flush all the buffers associated with this node
806 */
807 (void) buf_invalidateblks(vp, 0, 0, 0);
808
809 cp = VTOC(vp);
810 /*
811 * Remove any directory hints
812 */
813 if (vnode_isdir(vp))
814 hfs_reldirhints(cp, 0);
815
816 /*
817 * Re-read cnode data for all active vnodes (non-metadata files).
818 */
819 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
820 struct cat_fork *datafork;
821 struct cat_desc desc;
822
823 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
824
825 /* lookup by fileID since name could have changed */
826 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
827 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, 0, &desc, &cp->c_attr, datafork);
828 hfs_systemfile_unlock(args->hfsmp, lockflags);
829 if (args->error) {
830 return (VNODE_RETURNED_DONE);
831 }
832
833 /* update cnode's catalog descriptor */
834 (void) replace_desc(cp, &desc);
835 }
836 return (VNODE_RETURNED);
837 }
838
839 /*
840 * Reload all incore data for a filesystem (used after running fsck on
841 * the root filesystem and finding things to fix). The filesystem must
842 * be mounted read-only.
843 *
844 * Things to do to update the mount:
845 * invalidate all cached meta-data.
846 * invalidate all inactive vnodes.
847 * invalidate all cached file data.
848 * re-read volume header from disk.
849 * re-load meta-file info (extents, file size).
850 * re-load B-tree header data.
851 * re-read cnode data for all active vnodes.
852 */
853 int
854 hfs_reload(struct mount *mountp)
855 {
856 register struct vnode *devvp;
857 struct buf *bp;
858 int error, i;
859 struct hfsmount *hfsmp;
860 struct HFSPlusVolumeHeader *vhp;
861 ExtendedVCB *vcb;
862 struct filefork *forkp;
863 struct cat_desc cndesc;
864 struct hfs_reload_cargs args;
865 daddr64_t priIDSector;
866
867 hfsmp = VFSTOHFS(mountp);
868 vcb = HFSTOVCB(hfsmp);
869
870 if (vcb->vcbSigWord == kHFSSigWord)
871 return (EINVAL); /* rooting from HFS is not supported! */
872
873 /*
874 * Invalidate all cached meta-data.
875 */
876 devvp = hfsmp->hfs_devvp;
877 if (buf_invalidateblks(devvp, 0, 0, 0))
878 panic("hfs_reload: dirty1");
879
880 args.hfsmp = hfsmp;
881 args.error = 0;
882 /*
883 * hfs_reload_callback will be called for each vnode
884 * hung off of this mount point that can't be recycled...
885 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
886 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
887 * properly referenced and unreferenced around the callback
888 */
889 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
890
891 if (args.error)
892 return (args.error);
893
894 /*
895 * Re-read VolumeHeader from disk.
896 */
897 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
898 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
899
900 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
901 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
902 hfsmp->hfs_physical_block_size, NOCRED, &bp);
903 if (error) {
904 if (bp != NULL)
905 buf_brelse(bp);
906 return (error);
907 }
908
909 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
910
911 /* Do a quick sanity check */
912 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
913 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
914 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
915 SWAP_BE16(vhp->version) != kHFSXVersion) ||
916 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
917 buf_brelse(bp);
918 return (EIO);
919 }
920
921 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
922 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
923 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
924 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
925 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
926 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
927 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
928 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
929 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
930 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
931 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
932 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
933 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
934 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
935 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
936
937 /*
938 * Re-load meta-file vnode data (extent info, file size, etc).
939 */
940 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
941 for (i = 0; i < kHFSPlusExtentDensity; i++) {
942 forkp->ff_extents[i].startBlock =
943 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
944 forkp->ff_extents[i].blockCount =
945 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
946 }
947 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
948 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
949 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
950
951
952 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
953 for (i = 0; i < kHFSPlusExtentDensity; i++) {
954 forkp->ff_extents[i].startBlock =
955 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
956 forkp->ff_extents[i].blockCount =
957 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
958 }
959 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
960 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
961 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
962
963 if (hfsmp->hfs_attribute_vp) {
964 forkp = VTOF(hfsmp->hfs_attribute_vp);
965 for (i = 0; i < kHFSPlusExtentDensity; i++) {
966 forkp->ff_extents[i].startBlock =
967 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
968 forkp->ff_extents[i].blockCount =
969 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
970 }
971 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
972 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
973 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
974 }
975
976 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
977 for (i = 0; i < kHFSPlusExtentDensity; i++) {
978 forkp->ff_extents[i].startBlock =
979 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
980 forkp->ff_extents[i].blockCount =
981 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
982 }
983 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
984 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
985 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
986
987 buf_brelse(bp);
988 vhp = NULL;
989
990 /*
991 * Re-load B-tree header data
992 */
993 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
994 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
995 return (error);
996
997 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
998 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
999 return (error);
1000
1001 if (hfsmp->hfs_attribute_vp) {
1002 forkp = VTOF(hfsmp->hfs_attribute_vp);
1003 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
1004 return (error);
1005 }
1006
1007 /* Reload the volume name */
1008 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, NULL, NULL)))
1009 return (error);
1010 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
1011 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
1012 cat_releasedesc(&cndesc);
1013
1014 /* Re-establish private/hidden directories. */
1015 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1016 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1017
1018 /* In case any volume information changed to trigger a notification */
1019 hfs_generate_volume_notifications(hfsmp);
1020
1021 return (0);
1022 }
1023
1024
1025
1026 static void
1027 hfs_syncer(void *arg0, void *unused)
1028 {
1029 #pragma unused(unused)
1030
1031 struct hfsmount *hfsmp = arg0;
1032 clock_sec_t secs;
1033 clock_usec_t usecs;
1034 uint32_t delay = HFS_META_DELAY;
1035 uint64_t now;
1036 static int no_max=1;
1037
1038 clock_get_calendar_microtime(&secs, &usecs);
1039 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1040
1041 //
1042 // If the amount of pending writes is more than our limit, wait
1043 // for 2/3 of it to drain and then flush the journal.
1044 //
1045 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1046 int counter=0;
1047 uint64_t pending_io, start, rate = 0;
1048
1049 no_max = 0;
1050
1051 hfs_start_transaction(hfsmp); // so we hold off any new i/o's
1052
1053 pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1054
1055 clock_get_calendar_microtime(&secs, &usecs);
1056 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1057
1058 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1059 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1060 }
1061
1062 if (counter >= 500) {
1063 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1064 }
1065
1066 if (hfsmp->jnl) {
1067 journal_flush(hfsmp->jnl, FALSE);
1068 } else {
1069 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1070 }
1071
1072 clock_get_calendar_microtime(&secs, &usecs);
1073 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1074 hfsmp->hfs_last_sync_time = now;
1075 if (now != start) {
1076 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second
1077 }
1078
1079 hfs_end_transaction(hfsmp);
1080
1081 //
1082 // If a reasonable amount of time elapsed then check the
1083 // i/o rate. If it's taking less than 1 second or more
1084 // than 2 seconds, adjust hfs_max_pending_io so that we
1085 // will allow about 1.5 seconds of i/o to queue up.
1086 //
1087 if (((now - start) >= 300000) && (rate != 0)) {
1088 uint64_t scale = (pending_io * 100) / rate;
1089
1090 if (scale < 100 || scale > 200) {
1091 // set it so that it should take about 1.5 seconds to drain
1092 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1093 }
1094 }
1095
1096 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1097 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1098 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1099 && (hfsmp->hfs_active_threads == 0)
1100 && (hfsmp->hfs_global_lock_nesting == 0))) {
1101
1102 //
1103 // Flush the journal if more than 5 seconds elapsed since
1104 // the last sync OR we have not sync'ed recently and the
1105 // last sync request time was more than 100 milliseconds
1106 // ago and no one is in the middle of a transaction right
1107 // now. Else we defer the sync and reschedule it.
1108 //
1109 if (hfsmp->jnl) {
1110 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1111
1112 journal_flush(hfsmp->jnl, FALSE);
1113
1114 hfs_unlock_global (hfsmp);
1115 } else {
1116 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1117 }
1118
1119 clock_get_calendar_microtime(&secs, &usecs);
1120 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1121 hfsmp->hfs_last_sync_time = now;
1122
1123 } else if (hfsmp->hfs_active_threads == 0) {
1124 uint64_t deadline;
1125
1126 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1127 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1128
1129 // note: we intentionally return early here and do not
1130 // decrement the sync_scheduled and sync_incomplete
1131 // variables because we rescheduled the timer.
1132
1133 return;
1134 }
1135
1136 //
1137 // NOTE: we decrement these *after* we're done the journal_flush() since
1138 // it can take a significant amount of time and so we don't want more
1139 // callbacks scheduled until we're done this one.
1140 //
1141 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1142 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1143 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1144 }
1145
1146
1147 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1148
1149 /*
1150 * Initialization code for Red-Black Tree Allocator
1151 *
1152 * This function will build the two red-black trees necessary for allocating space
1153 * from the metadata zone as well as normal allocations. Currently, we use
1154 * an advisory read to get most of the data into the buffer cache.
1155 * This function is intended to be run in a separate thread so as not to slow down mount.
1156 *
1157 */
1158
1159 void
1160 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1161
1162 #if CONFIG_HFS_ALLOC_RBTREE
1163 u_int32_t err;
1164
1165 /*
1166 * Take the allocation file lock. Journal transactions will block until
1167 * we're done here.
1168 */
1169 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1170
1171 /*
1172 * GenerateTree assumes that the bitmap lock is held when you call the function.
1173 * It will drop and re-acquire the lock periodically as needed to let other allocations
1174 * through. It returns with the bitmap lock held. Since we only maintain one tree,
1175 * we don't need to specify a start block (always starts at 0).
1176 */
1177 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1178 if (err) {
1179 goto bailout;
1180 }
1181 /* Mark offset tree as built */
1182 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1183
1184 bailout:
1185 /*
1186 * GenerateTree may drop the bitmap lock during operation in order to give other
1187 * threads a chance to allocate blocks, but it will always return with the lock held, so
1188 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1189 */
1190 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1191 if (err != 0) {
1192 /* Wakeup any waiters on the allocation bitmap lock */
1193 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1194 }
1195
1196 hfs_systemfile_unlock(hfsmp, flags);
1197 #else
1198 #pragma unused (hfsmp)
1199 #endif
1200 }
1201
1202 void hfs_unmap_blocks (struct hfsmount *hfsmp) {
1203 /*
1204 * Take the allocation file lock. Journal transactions will block until
1205 * we're done here.
1206 */
1207 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1208
1209 /*
1210 * UnmapBlocks assumes that the bitmap lock is held when you call the function.
1211 * We don't care if there were any error issuing unmaps yet.
1212 */
1213 (void) UnmapBlocks(hfsmp);
1214
1215 hfs_systemfile_unlock(hfsmp, flags);
1216 }
1217
1218
1219 /*
1220 * Teardown code for the Red-Black Tree allocator.
1221 * This function consolidates the code which serializes with respect
1222 * to a thread that may be potentially still building the tree when we need to begin
1223 * tearing it down. Since the red-black tree may not be live when we enter this function
1224 * we return:
1225 * 1 -> Tree was live.
1226 * 0 -> Tree was not active at time of call.
1227 */
1228
1229 int
1230 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1231 int rb_used = 0;
1232
1233 #if CONFIG_HFS_ALLOC_RBTREE
1234
1235 int flags = 0;
1236
1237 /*
1238 * Check to see if the tree-generation is still on-going.
1239 * If it is, then block until it's done.
1240 */
1241
1242 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1243
1244
1245 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1246 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1247
1248 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1249 &hfsmp->extent_tree_flags, THREAD_UNINT);
1250 }
1251
1252 if (hfs_isrbtree_active (hfsmp)) {
1253 rb_used = 1;
1254
1255 /* Tear down the RB Trees while we have the bitmap locked */
1256 DestroyTrees(hfsmp);
1257
1258 }
1259
1260 hfs_systemfile_unlock(hfsmp, flags);
1261 #else
1262 #pragma unused (hfsmp)
1263 #endif
1264 return rb_used;
1265
1266 }
1267
1268 static int hfs_root_unmounted_cleanly = 0;
1269
1270 SYSCTL_DECL(_vfs_generic);
1271 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1272
1273 /*
1274 * Common code for mount and mountroot
1275 */
1276 int
1277 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1278 int journal_replay_only, vfs_context_t context)
1279 {
1280 struct proc *p = vfs_context_proc(context);
1281 int retval = E_NONE;
1282 struct hfsmount *hfsmp = NULL;
1283 struct buf *bp;
1284 dev_t dev;
1285 HFSMasterDirectoryBlock *mdbp = NULL;
1286 int ronly;
1287 #if QUOTA
1288 int i;
1289 #endif
1290 int mntwrapper;
1291 kauth_cred_t cred;
1292 u_int64_t disksize;
1293 daddr64_t log_blkcnt;
1294 u_int32_t log_blksize;
1295 u_int32_t phys_blksize;
1296 u_int32_t minblksize;
1297 u_int32_t iswritable;
1298 daddr64_t mdb_offset;
1299 int isvirtual = 0;
1300 int isroot = 0;
1301 u_int32_t device_features = 0;
1302 int isssd;
1303 #if CONFIG_HFS_ALLOC_RBTREE
1304 thread_t allocator_thread;
1305 #endif
1306
1307 if (args == NULL) {
1308 /* only hfs_mountroot passes us NULL as the 'args' argument */
1309 isroot = 1;
1310 }
1311
1312 ronly = vfs_isrdonly(mp);
1313 dev = vnode_specrdev(devvp);
1314 cred = p ? vfs_context_ucred(context) : NOCRED;
1315 mntwrapper = 0;
1316
1317 bp = NULL;
1318 hfsmp = NULL;
1319 mdbp = NULL;
1320 minblksize = kHFSBlockSize;
1321
1322 /* Advisory locking should be handled at the VFS layer */
1323 vfs_setlocklocal(mp);
1324
1325 /* Get the logical block size (treated as physical block size everywhere) */
1326 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1327 if (HFS_MOUNT_DEBUG) {
1328 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1329 }
1330 retval = ENXIO;
1331 goto error_exit;
1332 }
1333 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1334 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1335 retval = ENXIO;
1336 goto error_exit;
1337 }
1338
1339 /* Get the physical block size. */
1340 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1341 if (retval) {
1342 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1343 if (HFS_MOUNT_DEBUG) {
1344 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1345 }
1346 retval = ENXIO;
1347 goto error_exit;
1348 }
1349 /* If device does not support this ioctl, assume that physical
1350 * block size is same as logical block size
1351 */
1352 phys_blksize = log_blksize;
1353 }
1354 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1355 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1356 retval = ENXIO;
1357 goto error_exit;
1358 }
1359
1360 /* Switch to 512 byte sectors (temporarily) */
1361 if (log_blksize > 512) {
1362 u_int32_t size512 = 512;
1363
1364 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1365 if (HFS_MOUNT_DEBUG) {
1366 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1367 }
1368 retval = ENXIO;
1369 goto error_exit;
1370 }
1371 }
1372 /* Get the number of 512 byte physical blocks. */
1373 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1374 /* resetting block size may fail if getting block count did */
1375 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1376 if (HFS_MOUNT_DEBUG) {
1377 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1378 }
1379 retval = ENXIO;
1380 goto error_exit;
1381 }
1382 /* Compute an accurate disk size (i.e. within 512 bytes) */
1383 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1384
1385 /*
1386 * On Tiger it is not necessary to switch the device
1387 * block size to be 4k if there are more than 31-bits
1388 * worth of blocks but to insure compatibility with
1389 * pre-Tiger systems we have to do it.
1390 *
1391 * If the device size is not a multiple of 4K (8 * 512), then
1392 * switching the logical block size isn't going to help because
1393 * we will be unable to write the alternate volume header.
1394 * In this case, just leave the logical block size unchanged.
1395 */
1396 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1397 minblksize = log_blksize = 4096;
1398 if (phys_blksize < log_blksize)
1399 phys_blksize = log_blksize;
1400 }
1401
1402 /*
1403 * The cluster layer is not currently prepared to deal with a logical
1404 * block size larger than the system's page size. (It can handle
1405 * blocks per page, but not multiple pages per block.) So limit the
1406 * logical block size to the page size.
1407 */
1408 if (log_blksize > PAGE_SIZE)
1409 log_blksize = PAGE_SIZE;
1410
1411 /* Now switch to our preferred physical block size. */
1412 if (log_blksize > 512) {
1413 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1414 if (HFS_MOUNT_DEBUG) {
1415 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1416 }
1417 retval = ENXIO;
1418 goto error_exit;
1419 }
1420 /* Get the count of physical blocks. */
1421 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1422 if (HFS_MOUNT_DEBUG) {
1423 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1424 }
1425 retval = ENXIO;
1426 goto error_exit;
1427 }
1428 }
1429 /*
1430 * At this point:
1431 * minblksize is the minimum physical block size
1432 * log_blksize has our preferred physical block size
1433 * log_blkcnt has the total number of physical blocks
1434 */
1435
1436 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1437 if ((retval = (int)buf_meta_bread(devvp,
1438 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1439 phys_blksize, cred, &bp))) {
1440 if (HFS_MOUNT_DEBUG) {
1441 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1442 }
1443 goto error_exit;
1444 }
1445 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1446 if (mdbp == NULL) {
1447 retval = ENOMEM;
1448 if (HFS_MOUNT_DEBUG) {
1449 printf("hfs_mountfs: MALLOC failed\n");
1450 }
1451 goto error_exit;
1452 }
1453 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1454 buf_brelse(bp);
1455 bp = NULL;
1456
1457 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1458 if (hfsmp == NULL) {
1459 if (HFS_MOUNT_DEBUG) {
1460 printf("hfs_mountfs: MALLOC (2) failed\n");
1461 }
1462 retval = ENOMEM;
1463 goto error_exit;
1464 }
1465 bzero(hfsmp, sizeof(struct hfsmount));
1466
1467 hfs_chashinit_finish(hfsmp);
1468
1469 /*
1470 * See if the disk supports unmap (trim).
1471 *
1472 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
1473 * returned by vfs_ioattr. We need to call VNOP_IOCTL ourselves.
1474 */
1475 if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
1476 if (device_features & DK_FEATURE_UNMAP) {
1477 hfsmp->hfs_flags |= HFS_UNMAP;
1478 }
1479 }
1480
1481 /*
1482 * See if the disk is a solid state device, too. We need this to decide what to do about
1483 * hotfiles.
1484 */
1485 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1486 if (isssd) {
1487 hfsmp->hfs_flags |= HFS_SSD;
1488 }
1489 }
1490
1491
1492 /*
1493 * Init the volume information structure
1494 */
1495
1496 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1497 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1498 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1499 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1500 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1501
1502 vfs_setfsprivate(mp, hfsmp);
1503 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1504 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1505 hfsmp->hfs_devvp = devvp;
1506 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1507 hfsmp->hfs_logical_block_size = log_blksize;
1508 hfsmp->hfs_logical_block_count = log_blkcnt;
1509 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1510 hfsmp->hfs_physical_block_size = phys_blksize;
1511 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1512 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1513 if (ronly)
1514 hfsmp->hfs_flags |= HFS_READ_ONLY;
1515 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1516 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1517
1518 #if QUOTA
1519 for (i = 0; i < MAXQUOTAS; i++)
1520 dqfileinit(&hfsmp->hfs_qfiles[i]);
1521 #endif
1522
1523 if (args) {
1524 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1525 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1526 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1527 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1528 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1529 if (args->hfs_mask != (mode_t)VNOVAL) {
1530 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1531 if (args->flags & HFSFSMNT_NOXONFILES) {
1532 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1533 } else {
1534 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1535 }
1536 } else {
1537 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1538 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1539 }
1540 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1541 mntwrapper = 1;
1542 } else {
1543 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1544 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1545 hfsmp->hfs_uid = UNKNOWNUID;
1546 hfsmp->hfs_gid = UNKNOWNGID;
1547 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1548 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1549 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1550 }
1551 }
1552
1553 /* Find out if disk media is writable. */
1554 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1555 if (iswritable)
1556 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1557 else
1558 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1559 }
1560
1561 // record the current time at which we're mounting this volume
1562 struct timeval tv;
1563 microtime(&tv);
1564 hfsmp->hfs_mount_time = tv.tv_sec;
1565
1566 /* Mount a standard HFS disk */
1567 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1568 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1569 #if CONFIG_HFS_STD
1570 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1571 if (vfs_isrdwr(mp)) {
1572 retval = EROFS;
1573 goto error_exit;
1574 }
1575
1576 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1577
1578 /* Treat it as if it's read-only and not writeable */
1579 hfsmp->hfs_flags |= HFS_READ_ONLY;
1580 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1581
1582 /* If only journal replay is requested, exit immediately */
1583 if (journal_replay_only) {
1584 retval = 0;
1585 goto error_exit;
1586 }
1587
1588 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1589 retval = EINVAL; /* Cannot root from HFS standard disks */
1590 goto error_exit;
1591 }
1592 /* HFS disks can only use 512 byte physical blocks */
1593 if (log_blksize > kHFSBlockSize) {
1594 log_blksize = kHFSBlockSize;
1595 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1596 retval = ENXIO;
1597 goto error_exit;
1598 }
1599 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1600 retval = ENXIO;
1601 goto error_exit;
1602 }
1603 hfsmp->hfs_logical_block_size = log_blksize;
1604 hfsmp->hfs_logical_block_count = log_blkcnt;
1605 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1606 hfsmp->hfs_physical_block_size = log_blksize;
1607 hfsmp->hfs_log_per_phys = 1;
1608 }
1609 if (args) {
1610 hfsmp->hfs_encoding = args->hfs_encoding;
1611 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1612
1613 /* establish the timezone */
1614 gTimeZone = args->hfs_timezone;
1615 }
1616
1617 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1618 &hfsmp->hfs_get_hfsname);
1619 if (retval)
1620 goto error_exit;
1621
1622 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1623 if (retval)
1624 (void) hfs_relconverter(hfsmp->hfs_encoding);
1625 #else
1626 /* On platforms where HFS Standard is not supported, deny the mount altogether */
1627 retval = EINVAL;
1628 goto error_exit;
1629 #endif
1630
1631 } else /* Mount an HFS Plus disk */ {
1632 HFSPlusVolumeHeader *vhp;
1633 off_t embeddedOffset;
1634 int jnl_disable = 0;
1635
1636 /* Get the embedded Volume Header */
1637 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1638 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1639 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1640 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1641
1642 /*
1643 * If the embedded volume doesn't start on a block
1644 * boundary, then switch the device to a 512-byte
1645 * block size so everything will line up on a block
1646 * boundary.
1647 */
1648 if ((embeddedOffset % log_blksize) != 0) {
1649 printf("hfs_mountfs: embedded volume offset not"
1650 " a multiple of physical block size (%d);"
1651 " switching to 512\n", log_blksize);
1652 log_blksize = 512;
1653 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1654 (caddr_t)&log_blksize, FWRITE, context)) {
1655
1656 if (HFS_MOUNT_DEBUG) {
1657 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1658 }
1659 retval = ENXIO;
1660 goto error_exit;
1661 }
1662 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1663 (caddr_t)&log_blkcnt, 0, context)) {
1664 if (HFS_MOUNT_DEBUG) {
1665 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1666 }
1667 retval = ENXIO;
1668 goto error_exit;
1669 }
1670 /* Note: relative block count adjustment */
1671 hfsmp->hfs_logical_block_count *=
1672 hfsmp->hfs_logical_block_size / log_blksize;
1673
1674 /* Update logical /physical block size */
1675 hfsmp->hfs_logical_block_size = log_blksize;
1676 hfsmp->hfs_physical_block_size = log_blksize;
1677
1678 phys_blksize = log_blksize;
1679 hfsmp->hfs_log_per_phys = 1;
1680 }
1681
1682 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1683 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1684
1685 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1686
1687 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1688
1689 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1690 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1691 phys_blksize, cred, &bp);
1692 if (retval) {
1693 if (HFS_MOUNT_DEBUG) {
1694 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1695 }
1696 goto error_exit;
1697 }
1698 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1699 buf_brelse(bp);
1700 bp = NULL;
1701 vhp = (HFSPlusVolumeHeader*) mdbp;
1702
1703 } else /* pure HFS+ */ {
1704 embeddedOffset = 0;
1705 vhp = (HFSPlusVolumeHeader*) mdbp;
1706 }
1707
1708 if (isroot) {
1709 hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0);
1710 }
1711
1712 /*
1713 * On inconsistent disks, do not allow read-write mount
1714 * unless it is the boot volume being mounted. We also
1715 * always want to replay the journal if the journal_replay_only
1716 * flag is set because that will (most likely) get the
1717 * disk into a consistent state before fsck_hfs starts
1718 * looking at it.
1719 */
1720 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1721 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1722 && !journal_replay_only
1723 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1724
1725 if (HFS_MOUNT_DEBUG) {
1726 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1727 }
1728 retval = EINVAL;
1729 goto error_exit;
1730 }
1731
1732
1733 // XXXdbg
1734 //
1735 hfsmp->jnl = NULL;
1736 hfsmp->jvp = NULL;
1737 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1738 args->journal_disable) {
1739 jnl_disable = 1;
1740 }
1741
1742 //
1743 // We only initialize the journal here if the last person
1744 // to mount this volume was journaling aware. Otherwise
1745 // we delay journal initialization until later at the end
1746 // of hfs_MountHFSPlusVolume() because the last person who
1747 // mounted it could have messed things up behind our back
1748 // (so we need to go find the .journal file, make sure it's
1749 // the right size, re-sync up if it was moved, etc).
1750 //
1751 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1752 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1753 && !jnl_disable) {
1754
1755 // if we're able to init the journal, mark the mount
1756 // point as journaled.
1757 //
1758 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1759 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1760 } else {
1761 if (retval == EROFS) {
1762 // EROFS is a special error code that means the volume has an external
1763 // journal which we couldn't find. in that case we do not want to
1764 // rewrite the volume header - we'll just refuse to mount the volume.
1765 if (HFS_MOUNT_DEBUG) {
1766 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1767 }
1768 retval = EINVAL;
1769 goto error_exit;
1770 }
1771
1772 // if the journal failed to open, then set the lastMountedVersion
1773 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1774 // of just bailing out because the volume is journaled.
1775 if (!ronly) {
1776 if (HFS_MOUNT_DEBUG) {
1777 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1778 }
1779
1780 HFSPlusVolumeHeader *jvhp;
1781
1782 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1783
1784 if (mdb_offset == 0) {
1785 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1786 }
1787
1788 bp = NULL;
1789 retval = (int)buf_meta_bread(devvp,
1790 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1791 phys_blksize, cred, &bp);
1792 if (retval == 0) {
1793 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1794
1795 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1796 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1797 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1798 buf_bwrite(bp);
1799 } else {
1800 buf_brelse(bp);
1801 }
1802 bp = NULL;
1803 } else if (bp) {
1804 buf_brelse(bp);
1805 // clear this so the error exit path won't try to use it
1806 bp = NULL;
1807 }
1808 }
1809
1810 // if this isn't the root device just bail out.
1811 // If it is the root device we just continue on
1812 // in the hopes that fsck_hfs will be able to
1813 // fix any damage that exists on the volume.
1814 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1815 if (HFS_MOUNT_DEBUG) {
1816 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1817 }
1818 retval = EINVAL;
1819 goto error_exit;
1820 }
1821 }
1822 }
1823 // XXXdbg
1824
1825 /* Either the journal is replayed successfully, or there
1826 * was nothing to replay, or no journal exists. In any case,
1827 * return success.
1828 */
1829 if (journal_replay_only) {
1830 retval = 0;
1831 goto error_exit;
1832 }
1833
1834 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1835
1836 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1837 /*
1838 * If the backend didn't like our physical blocksize
1839 * then retry with physical blocksize of 512.
1840 */
1841 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1842 printf("hfs_mountfs: could not use physical block size "
1843 "(%d) switching to 512\n", log_blksize);
1844 log_blksize = 512;
1845 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1846 if (HFS_MOUNT_DEBUG) {
1847 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1848 }
1849 retval = ENXIO;
1850 goto error_exit;
1851 }
1852 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1853 if (HFS_MOUNT_DEBUG) {
1854 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1855 }
1856 retval = ENXIO;
1857 goto error_exit;
1858 }
1859 devvp->v_specsize = log_blksize;
1860 /* Note: relative block count adjustment (in case this is an embedded volume). */
1861 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1862 hfsmp->hfs_logical_block_size = log_blksize;
1863 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1864
1865 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1866
1867 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1868 // close and re-open this with the new block size
1869 journal_close(hfsmp->jnl);
1870 hfsmp->jnl = NULL;
1871 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1872 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1873 } else {
1874 // if the journal failed to open, then set the lastMountedVersion
1875 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1876 // of just bailing out because the volume is journaled.
1877 if (!ronly) {
1878 if (HFS_MOUNT_DEBUG) {
1879 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1880 }
1881 HFSPlusVolumeHeader *jvhp;
1882
1883 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1884
1885 if (mdb_offset == 0) {
1886 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1887 }
1888
1889 bp = NULL;
1890 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1891 phys_blksize, cred, &bp);
1892 if (retval == 0) {
1893 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1894
1895 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1896 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1897 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1898 buf_bwrite(bp);
1899 } else {
1900 buf_brelse(bp);
1901 }
1902 bp = NULL;
1903 } else if (bp) {
1904 buf_brelse(bp);
1905 // clear this so the error exit path won't try to use it
1906 bp = NULL;
1907 }
1908 }
1909
1910 // if this isn't the root device just bail out.
1911 // If it is the root device we just continue on
1912 // in the hopes that fsck_hfs will be able to
1913 // fix any damage that exists on the volume.
1914 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1915 if (HFS_MOUNT_DEBUG) {
1916 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1917 }
1918 retval = EINVAL;
1919 goto error_exit;
1920 }
1921 }
1922 }
1923
1924 /* Try again with a smaller block size... */
1925 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1926 if (retval && HFS_MOUNT_DEBUG) {
1927 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1928 }
1929 }
1930 if (retval)
1931 (void) hfs_relconverter(0);
1932 }
1933
1934 // save off a snapshot of the mtime from the previous mount
1935 // (for matador).
1936 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1937
1938 if ( retval ) {
1939 if (HFS_MOUNT_DEBUG) {
1940 printf("hfs_mountfs: encountered failure %d \n", retval);
1941 }
1942 goto error_exit;
1943 }
1944
1945 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1946 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1947 vfs_setmaxsymlen(mp, 0);
1948
1949 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1950 #if NAMEDSTREAMS
1951 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1952 #endif
1953 if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1954 /* Tell VFS that we support directory hard links. */
1955 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1956 } else {
1957 /* HFS standard doesn't support extended readdir! */
1958 mount_set_noreaddirext (mp);
1959 }
1960
1961 if (args) {
1962 /*
1963 * Set the free space warning levels for a non-root volume:
1964 *
1965 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1966 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1967 * whichever is less. And last, set the "desired" freespace level to
1968 * to 3% of the volume size or 200MB, whichever is less.
1969 */
1970 hfsmp->hfs_freespace_notify_dangerlimit =
1971 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1972 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1973 hfsmp->hfs_freespace_notify_warninglimit =
1974 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1975 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1976 hfsmp->hfs_freespace_notify_desiredlevel =
1977 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1978 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1979 } else {
1980 /*
1981 * Set the free space warning levels for the root volume:
1982 *
1983 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1984 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1985 * whichever is less. And last, set the "desired" freespace level to
1986 * to 11% of the volume size or 1.25GB, whichever is less.
1987 */
1988 hfsmp->hfs_freespace_notify_dangerlimit =
1989 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1990 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1991 hfsmp->hfs_freespace_notify_warninglimit =
1992 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1993 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1994 hfsmp->hfs_freespace_notify_desiredlevel =
1995 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1996 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1997 };
1998
1999 /* Check if the file system exists on virtual device, like disk image */
2000 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
2001 if (isvirtual) {
2002 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
2003 }
2004 }
2005
2006 /* do not allow ejectability checks on the root device */
2007 if (isroot == 0) {
2008 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
2009 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
2010 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with.
2011 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
2012 if (hfsmp->hfs_syncer == NULL) {
2013 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
2014 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
2015 }
2016 }
2017 }
2018
2019 #if CONFIG_HFS_MOUNT_UNMAP
2020 /* Enable UNMAPs for embedded SSDs only for now */
2021 /*
2022 * TODO: Should we enable this for CoreStorage volumes, too?
2023 */
2024 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2025 if (hfsmp->hfs_flags & HFS_UNMAP) {
2026 hfs_unmap_blocks(hfsmp);
2027 }
2028 }
2029 #endif
2030
2031
2032 #if CONFIG_HFS_ALLOC_RBTREE
2033 /*
2034 * We spawn a thread to create the pair of red-black trees for this volume.
2035 * However, in so doing, we must be careful to ensure that if this thread is still
2036 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
2037 * we'll need to set a bit that indicates we're in progress building the trees here.
2038 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
2039 * notifies the tree generation code that an unmount is waiting. Also mark the bit that
2040 * indicates the tree is live and operating.
2041 *
2042 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
2043 */
2044
2045 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2046 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
2047
2048 /* Initialize EOF counter so that the thread can assume it started at initial values */
2049 hfsmp->offset_block_end = 0;
2050 InitTree(hfsmp);
2051
2052 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
2053 thread_deallocate(allocator_thread);
2054 }
2055
2056 #endif
2057
2058 /*
2059 * Start looking for free space to drop below this level and generate a
2060 * warning immediately if needed:
2061 */
2062 hfsmp->hfs_notification_conditions = 0;
2063 hfs_generate_volume_notifications(hfsmp);
2064
2065 if (ronly == 0) {
2066 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2067 }
2068 FREE(mdbp, M_TEMP);
2069 return (0);
2070
2071 error_exit:
2072 if (bp)
2073 buf_brelse(bp);
2074 if (mdbp)
2075 FREE(mdbp, M_TEMP);
2076
2077 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2078 vnode_clearmountedon(hfsmp->jvp);
2079 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2080 hfsmp->jvp = NULL;
2081 }
2082 if (hfsmp) {
2083 if (hfsmp->hfs_devvp) {
2084 vnode_rele(hfsmp->hfs_devvp);
2085 }
2086 hfs_delete_chash(hfsmp);
2087
2088 FREE(hfsmp, M_HFSMNT);
2089 vfs_setfsprivate(mp, NULL);
2090 }
2091 return (retval);
2092 }
2093
2094
2095 /*
2096 * Make a filesystem operational.
2097 * Nothing to do at the moment.
2098 */
2099 /* ARGSUSED */
2100 static int
2101 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2102 {
2103 return (0);
2104 }
2105
2106
2107 /*
2108 * unmount system call
2109 */
2110 int
2111 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2112 {
2113 struct proc *p = vfs_context_proc(context);
2114 struct hfsmount *hfsmp = VFSTOHFS(mp);
2115 int retval = E_NONE;
2116 int flags;
2117 int force;
2118 int started_tr = 0;
2119 int rb_used = 0;
2120
2121 flags = 0;
2122 force = 0;
2123 if (mntflags & MNT_FORCE) {
2124 flags |= FORCECLOSE;
2125 force = 1;
2126 }
2127
2128 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2129 return (retval);
2130
2131 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2132 (void) hfs_recording_suspend(hfsmp);
2133
2134 /*
2135 * Cancel any pending timers for this volume. Then wait for any timers
2136 * which have fired, but whose callbacks have not yet completed.
2137 */
2138 if (hfsmp->hfs_syncer)
2139 {
2140 struct timespec ts = {0, 100000000}; /* 0.1 seconds */
2141
2142 /*
2143 * Cancel any timers that have been scheduled, but have not
2144 * fired yet. NOTE: The kernel considers a timer complete as
2145 * soon as it starts your callback, so the kernel does not
2146 * keep track of the number of callbacks in progress.
2147 */
2148 if (thread_call_cancel(hfsmp->hfs_syncer))
2149 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2150 thread_call_free(hfsmp->hfs_syncer);
2151 hfsmp->hfs_syncer = NULL;
2152
2153 /*
2154 * This waits for all of the callbacks that were entered before
2155 * we did thread_call_cancel above, but have not completed yet.
2156 */
2157 while(hfsmp->hfs_sync_incomplete > 0)
2158 {
2159 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2160 }
2161
2162 if (hfsmp->hfs_sync_incomplete < 0)
2163 panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2164 }
2165
2166 #if CONFIG_HFS_ALLOC_RBTREE
2167 rb_used = hfs_teardown_allocator(hfsmp);
2168 #endif
2169
2170 /*
2171 * Flush out the b-trees, volume bitmap and Volume Header
2172 */
2173 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2174 retval = hfs_start_transaction(hfsmp);
2175 if (retval == 0) {
2176 started_tr = 1;
2177 } else if (!force) {
2178 goto err_exit;
2179 }
2180
2181 if (hfsmp->hfs_startup_vp) {
2182 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2183 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2184 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2185 if (retval && !force)
2186 goto err_exit;
2187 }
2188
2189 if (hfsmp->hfs_attribute_vp) {
2190 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2191 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2192 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2193 if (retval && !force)
2194 goto err_exit;
2195 }
2196
2197 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2198 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2199 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2200 if (retval && !force)
2201 goto err_exit;
2202
2203 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2204 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2205 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2206 if (retval && !force)
2207 goto err_exit;
2208
2209 if (hfsmp->hfs_allocation_vp) {
2210 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2211 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2212 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2213 if (retval && !force)
2214 goto err_exit;
2215 }
2216
2217 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2218 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2219 if (retval && !force)
2220 goto err_exit;
2221 }
2222
2223 /* If runtime corruption was detected, indicate that the volume
2224 * was not unmounted cleanly.
2225 */
2226 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2227 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2228 } else {
2229 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2230 }
2231
2232
2233 if (rb_used) {
2234 /* If the rb-tree was live, just set min_start to 0 */
2235 hfsmp->nextAllocation = 0;
2236 }
2237 else {
2238 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2239 int i;
2240 u_int32_t min_start = hfsmp->totalBlocks;
2241
2242 // set the nextAllocation pointer to the smallest free block number
2243 // we've seen so on the next mount we won't rescan unnecessarily
2244 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2245 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2246 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2247 min_start = hfsmp->vcbFreeExt[i].startBlock;
2248 }
2249 }
2250 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2251 if (min_start < hfsmp->nextAllocation) {
2252 hfsmp->nextAllocation = min_start;
2253 }
2254 }
2255 }
2256
2257
2258 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2259 if (retval) {
2260 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2261 if (!force)
2262 goto err_exit; /* could not flush everything */
2263 }
2264
2265 if (started_tr) {
2266 hfs_end_transaction(hfsmp);
2267 started_tr = 0;
2268 }
2269 }
2270
2271 if (hfsmp->jnl) {
2272 hfs_journal_flush(hfsmp, FALSE);
2273 }
2274
2275 /*
2276 * Invalidate our caches and release metadata vnodes
2277 */
2278 (void) hfsUnmount(hfsmp, p);
2279
2280 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2281 (void) hfs_relconverter(hfsmp->hfs_encoding);
2282
2283 // XXXdbg
2284 if (hfsmp->jnl) {
2285 journal_close(hfsmp->jnl);
2286 hfsmp->jnl = NULL;
2287 }
2288
2289 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2290
2291 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2292 vnode_clearmountedon(hfsmp->jvp);
2293 retval = VNOP_CLOSE(hfsmp->jvp,
2294 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2295 vfs_context_kernel());
2296 vnode_put(hfsmp->jvp);
2297 hfsmp->jvp = NULL;
2298 }
2299 // XXXdbg
2300
2301 /*
2302 * Last chance to dump unreferenced system files.
2303 */
2304 (void) vflush(mp, NULLVP, FORCECLOSE);
2305
2306 #if HFS_SPARSE_DEV
2307 /* Drop our reference on the backing fs (if any). */
2308 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2309 struct vnode * tmpvp;
2310
2311 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2312 tmpvp = hfsmp->hfs_backingfs_rootvp;
2313 hfsmp->hfs_backingfs_rootvp = NULLVP;
2314 vnode_rele(tmpvp);
2315 }
2316 #endif /* HFS_SPARSE_DEV */
2317 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2318 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2319 vnode_rele(hfsmp->hfs_devvp);
2320
2321 hfs_delete_chash(hfsmp);
2322 FREE(hfsmp, M_HFSMNT);
2323
2324 return (0);
2325
2326 err_exit:
2327 if (started_tr) {
2328 hfs_end_transaction(hfsmp);
2329 }
2330 return retval;
2331 }
2332
2333
2334 /*
2335 * Return the root of a filesystem.
2336 */
2337 static int
2338 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2339 {
2340 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2341 }
2342
2343
2344 /*
2345 * Do operations associated with quotas
2346 */
2347 #if !QUOTA
2348 static int
2349 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2350 {
2351 return (ENOTSUP);
2352 }
2353 #else
2354 static int
2355 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2356 {
2357 struct proc *p = vfs_context_proc(context);
2358 int cmd, type, error;
2359
2360 if (uid == ~0U)
2361 uid = kauth_cred_getuid(vfs_context_ucred(context));
2362 cmd = cmds >> SUBCMDSHIFT;
2363
2364 switch (cmd) {
2365 case Q_SYNC:
2366 case Q_QUOTASTAT:
2367 break;
2368 case Q_GETQUOTA:
2369 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2370 break;
2371 /* fall through */
2372 default:
2373 if ( (error = vfs_context_suser(context)) )
2374 return (error);
2375 }
2376
2377 type = cmds & SUBCMDMASK;
2378 if ((u_int)type >= MAXQUOTAS)
2379 return (EINVAL);
2380 if (vfs_busy(mp, LK_NOWAIT))
2381 return (0);
2382
2383 switch (cmd) {
2384
2385 case Q_QUOTAON:
2386 error = hfs_quotaon(p, mp, type, datap);
2387 break;
2388
2389 case Q_QUOTAOFF:
2390 error = hfs_quotaoff(p, mp, type);
2391 break;
2392
2393 case Q_SETQUOTA:
2394 error = hfs_setquota(mp, uid, type, datap);
2395 break;
2396
2397 case Q_SETUSE:
2398 error = hfs_setuse(mp, uid, type, datap);
2399 break;
2400
2401 case Q_GETQUOTA:
2402 error = hfs_getquota(mp, uid, type, datap);
2403 break;
2404
2405 case Q_SYNC:
2406 error = hfs_qsync(mp);
2407 break;
2408
2409 case Q_QUOTASTAT:
2410 error = hfs_quotastat(mp, type, datap);
2411 break;
2412
2413 default:
2414 error = EINVAL;
2415 break;
2416 }
2417 vfs_unbusy(mp);
2418
2419 return (error);
2420 }
2421 #endif /* QUOTA */
2422
2423 /* Subtype is composite of bits */
2424 #define HFS_SUBTYPE_JOURNALED 0x01
2425 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2426 /* bits 2 - 6 reserved */
2427 #define HFS_SUBTYPE_STANDARDHFS 0x80
2428
2429 /*
2430 * Get file system statistics.
2431 */
2432 int
2433 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2434 {
2435 ExtendedVCB *vcb = VFSTOVCB(mp);
2436 struct hfsmount *hfsmp = VFSTOHFS(mp);
2437 u_int32_t freeCNIDs;
2438 u_int16_t subtype = 0;
2439
2440 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2441
2442 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2443 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2444 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2445 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2446 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2447 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2448 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2449
2450 /*
2451 * Subtypes (flavors) for HFS
2452 * 0: Mac OS Extended
2453 * 1: Mac OS Extended (Journaled)
2454 * 2: Mac OS Extended (Case Sensitive)
2455 * 3: Mac OS Extended (Case Sensitive, Journaled)
2456 * 4 - 127: Reserved
2457 * 128: Mac OS Standard
2458 *
2459 */
2460 if (hfsmp->hfs_flags & HFS_STANDARD) {
2461 subtype = HFS_SUBTYPE_STANDARDHFS;
2462 } else /* HFS Plus */ {
2463 if (hfsmp->jnl)
2464 subtype |= HFS_SUBTYPE_JOURNALED;
2465 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2466 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2467 }
2468 sbp->f_fssubtype = subtype;
2469
2470 return (0);
2471 }
2472
2473
2474 //
2475 // XXXdbg -- this is a callback to be used by the journal to
2476 // get meta data blocks flushed out to disk.
2477 //
2478 // XXXdbg -- be smarter and don't flush *every* block on each
2479 // call. try to only flush some so we don't wind up
2480 // being too synchronous.
2481 //
2482 __private_extern__
2483 void
2484 hfs_sync_metadata(void *arg)
2485 {
2486 struct mount *mp = (struct mount *)arg;
2487 struct hfsmount *hfsmp;
2488 ExtendedVCB *vcb;
2489 buf_t bp;
2490 int retval;
2491 daddr64_t priIDSector;
2492 hfsmp = VFSTOHFS(mp);
2493 vcb = HFSTOVCB(hfsmp);
2494
2495 // now make sure the super block is flushed
2496 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2497 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2498
2499 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2500 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2501 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2502 if ((retval != 0 ) && (retval != ENXIO)) {
2503 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2504 (int)priIDSector, retval);
2505 }
2506
2507 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2508 buf_bwrite(bp);
2509 } else if (bp) {
2510 buf_brelse(bp);
2511 }
2512
2513 // the alternate super block...
2514 // XXXdbg - we probably don't need to do this each and every time.
2515 // hfs_btreeio.c:FlushAlternate() should flag when it was
2516 // written...
2517 if (hfsmp->hfs_alt_id_sector) {
2518 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2519 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2520 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2521 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2522 buf_bwrite(bp);
2523 } else if (bp) {
2524 buf_brelse(bp);
2525 }
2526 }
2527 }
2528
2529
2530 struct hfs_sync_cargs {
2531 kauth_cred_t cred;
2532 struct proc *p;
2533 int waitfor;
2534 int error;
2535 };
2536
2537
2538 static int
2539 hfs_sync_callback(struct vnode *vp, void *cargs)
2540 {
2541 struct cnode *cp;
2542 struct hfs_sync_cargs *args;
2543 int error;
2544
2545 args = (struct hfs_sync_cargs *)cargs;
2546
2547 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2548 return (VNODE_RETURNED);
2549 }
2550 cp = VTOC(vp);
2551
2552 if ((cp->c_flag & C_MODIFIED) ||
2553 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2554 vnode_hasdirtyblks(vp)) {
2555 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2556
2557 if (error)
2558 args->error = error;
2559 }
2560 hfs_unlock(cp);
2561 return (VNODE_RETURNED);
2562 }
2563
2564
2565
2566 /*
2567 * Go through the disk queues to initiate sandbagged IO;
2568 * go through the inodes to write those that have been modified;
2569 * initiate the writing of the super block if it has been modified.
2570 *
2571 * Note: we are always called with the filesystem marked `MPBUSY'.
2572 */
2573 int
2574 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2575 {
2576 struct proc *p = vfs_context_proc(context);
2577 struct cnode *cp;
2578 struct hfsmount *hfsmp;
2579 ExtendedVCB *vcb;
2580 struct vnode *meta_vp[4];
2581 int i;
2582 int error, allerror = 0;
2583 struct hfs_sync_cargs args;
2584
2585 hfsmp = VFSTOHFS(mp);
2586
2587 /*
2588 * hfs_changefs might be manipulating vnodes so back off
2589 */
2590 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2591 return (0);
2592
2593 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2594 return (EROFS);
2595
2596 /* skip over frozen volumes */
2597 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2598 return 0;
2599
2600 args.cred = kauth_cred_get();
2601 args.waitfor = waitfor;
2602 args.p = p;
2603 args.error = 0;
2604 /*
2605 * hfs_sync_callback will be called for each vnode
2606 * hung off of this mount point... the vnode will be
2607 * properly referenced and unreferenced around the callback
2608 */
2609 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2610
2611 if (args.error)
2612 allerror = args.error;
2613
2614 vcb = HFSTOVCB(hfsmp);
2615
2616 meta_vp[0] = vcb->extentsRefNum;
2617 meta_vp[1] = vcb->catalogRefNum;
2618 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2619 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2620
2621 /* Now sync our three metadata files */
2622 for (i = 0; i < 4; ++i) {
2623 struct vnode *btvp;
2624
2625 btvp = meta_vp[i];;
2626 if ((btvp==0) || (vnode_mount(btvp) != mp))
2627 continue;
2628
2629 /* XXX use hfs_systemfile_lock instead ? */
2630 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2631 cp = VTOC(btvp);
2632
2633 if (((cp->c_flag & C_MODIFIED) == 0) &&
2634 (cp->c_touch_acctime == 0) &&
2635 (cp->c_touch_chgtime == 0) &&
2636 (cp->c_touch_modtime == 0) &&
2637 vnode_hasdirtyblks(btvp) == 0) {
2638 hfs_unlock(VTOC(btvp));
2639 continue;
2640 }
2641 error = vnode_get(btvp);
2642 if (error) {
2643 hfs_unlock(VTOC(btvp));
2644 continue;
2645 }
2646 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2647 allerror = error;
2648
2649 hfs_unlock(cp);
2650 vnode_put(btvp);
2651 };
2652
2653 /*
2654 * Force stale file system control information to be flushed.
2655 */
2656 if (vcb->vcbSigWord == kHFSSigWord) {
2657 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2658 allerror = error;
2659 }
2660 }
2661 #if QUOTA
2662 hfs_qsync(mp);
2663 #endif /* QUOTA */
2664
2665 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2666
2667 /*
2668 * Write back modified superblock.
2669 */
2670 if (IsVCBDirty(vcb)) {
2671 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2672 if (error)
2673 allerror = error;
2674 }
2675
2676 if (hfsmp->jnl) {
2677 hfs_journal_flush(hfsmp, FALSE);
2678 }
2679
2680 {
2681 clock_sec_t secs;
2682 clock_usec_t usecs;
2683 uint64_t now;
2684
2685 clock_get_calendar_microtime(&secs, &usecs);
2686 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2687 hfsmp->hfs_last_sync_time = now;
2688 }
2689
2690 lck_rw_unlock_shared(&hfsmp->hfs_insync);
2691 return (allerror);
2692 }
2693
2694
2695 /*
2696 * File handle to vnode
2697 *
2698 * Have to be really careful about stale file handles:
2699 * - check that the cnode id is valid
2700 * - call hfs_vget() to get the locked cnode
2701 * - check for an unallocated cnode (i_mode == 0)
2702 * - check that the given client host has export rights and return
2703 * those rights via. exflagsp and credanonp
2704 */
2705 static int
2706 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2707 {
2708 struct hfsfid *hfsfhp;
2709 struct vnode *nvp;
2710 int result;
2711
2712 *vpp = NULL;
2713 hfsfhp = (struct hfsfid *)fhp;
2714
2715 if (fhlen < (int)sizeof(struct hfsfid))
2716 return (EINVAL);
2717
2718 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2719 if (result) {
2720 if (result == ENOENT)
2721 result = ESTALE;
2722 return result;
2723 }
2724
2725 /*
2726 * We used to use the create time as the gen id of the file handle,
2727 * but it is not static enough because it can change at any point
2728 * via system calls. We still don't have another volume ID or other
2729 * unique identifier to use for a generation ID across reboots that
2730 * persists until the file is removed. Using only the CNID exposes
2731 * us to the potential wrap-around case, but as of 2/2008, it would take
2732 * over 2 months to wrap around if the machine did nothing but allocate
2733 * CNIDs. Using some kind of wrap counter would only be effective if
2734 * each file had the wrap counter associated with it. For now,
2735 * we use only the CNID to identify the file as it's good enough.
2736 */
2737
2738 *vpp = nvp;
2739
2740 hfs_unlock(VTOC(nvp));
2741 return (0);
2742 }
2743
2744
2745 /*
2746 * Vnode pointer to File handle
2747 */
2748 /* ARGSUSED */
2749 static int
2750 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2751 {
2752 struct cnode *cp;
2753 struct hfsfid *hfsfhp;
2754
2755 if (ISHFS(VTOVCB(vp)))
2756 return (ENOTSUP); /* hfs standard is not exportable */
2757
2758 if (*fhlenp < (int)sizeof(struct hfsfid))
2759 return (EOVERFLOW);
2760
2761 cp = VTOC(vp);
2762 hfsfhp = (struct hfsfid *)fhp;
2763 /* only the CNID is used to identify the file now */
2764 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2765 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2766 *fhlenp = sizeof(struct hfsfid);
2767
2768 return (0);
2769 }
2770
2771
2772 /*
2773 * Initial HFS filesystems, done only once.
2774 */
2775 static int
2776 hfs_init(__unused struct vfsconf *vfsp)
2777 {
2778 static int done = 0;
2779
2780 if (done)
2781 return (0);
2782 done = 1;
2783 hfs_chashinit();
2784 hfs_converterinit();
2785
2786 BTReserveSetup();
2787
2788
2789 hfs_lock_attr = lck_attr_alloc_init();
2790 hfs_group_attr = lck_grp_attr_alloc_init();
2791 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2792 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2793 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2794
2795 #if HFS_COMPRESSION
2796 decmpfs_init();
2797 #endif
2798
2799 return (0);
2800 }
2801
2802 static int
2803 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2804 {
2805 struct hfsmount * hfsmp;
2806 char fstypename[MFSNAMELEN];
2807
2808 if (vp == NULL)
2809 return (EINVAL);
2810
2811 if (!vnode_isvroot(vp))
2812 return (EINVAL);
2813
2814 vnode_vfsname(vp, fstypename);
2815 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2816 return (EINVAL);
2817
2818 hfsmp = VTOHFS(vp);
2819
2820 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2821 return (EINVAL);
2822
2823 *hfsmpp = hfsmp;
2824
2825 return (0);
2826 }
2827
2828 // XXXdbg
2829 #include <sys/filedesc.h>
2830
2831 /*
2832 * HFS filesystem related variables.
2833 */
2834 int
2835 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2836 user_addr_t newp, size_t newlen, vfs_context_t context)
2837 {
2838 struct proc *p = vfs_context_proc(context);
2839 int error;
2840 struct hfsmount *hfsmp;
2841
2842 /* all sysctl names at this level are terminal */
2843
2844 if (name[0] == HFS_ENCODINGBIAS) {
2845 int bias;
2846
2847 bias = hfs_getencodingbias();
2848 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2849 if (error == 0 && newp)
2850 hfs_setencodingbias(bias);
2851 return (error);
2852
2853 } else if (name[0] == HFS_EXTEND_FS) {
2854 u_int64_t newsize;
2855 vnode_t vp = vfs_context_cwd(context);
2856
2857 if (newp == USER_ADDR_NULL || vp == NULLVP)
2858 return (EINVAL);
2859 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2860 return (error);
2861 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2862 if (error)
2863 return (error);
2864
2865 error = hfs_extendfs(hfsmp, newsize, context);
2866 return (error);
2867
2868 } else if (name[0] == HFS_ENCODINGHINT) {
2869 size_t bufsize;
2870 size_t bytes;
2871 u_int32_t hint;
2872 u_int16_t *unicode_name = NULL;
2873 char *filename = NULL;
2874
2875 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2876 return (EINVAL);
2877
2878 bufsize = MAX(newlen * 3, MAXPATHLEN);
2879 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2880 if (filename == NULL) {
2881 error = ENOMEM;
2882 goto encodinghint_exit;
2883 }
2884 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2885 if (filename == NULL) {
2886 error = ENOMEM;
2887 goto encodinghint_exit;
2888 }
2889
2890 error = copyin(newp, (caddr_t)filename, newlen);
2891 if (error == 0) {
2892 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2893 &bytes, bufsize, 0, UTF_DECOMPOSED);
2894 if (error == 0) {
2895 hint = hfs_pickencoding(unicode_name, bytes / 2);
2896 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2897 }
2898 }
2899
2900 encodinghint_exit:
2901 if (unicode_name)
2902 FREE(unicode_name, M_TEMP);
2903 if (filename)
2904 FREE(filename, M_TEMP);
2905 return (error);
2906
2907 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2908 // make the file system journaled...
2909 vnode_t vp = vfs_context_cwd(context);
2910 vnode_t jvp;
2911 ExtendedVCB *vcb;
2912 struct cat_attr jnl_attr, jinfo_attr;
2913 struct cat_fork jnl_fork, jinfo_fork;
2914 void *jnl = NULL;
2915 int lockflags;
2916
2917 /* Only root can enable journaling */
2918 if (!is_suser()) {
2919 return (EPERM);
2920 }
2921 if (vp == NULLVP)
2922 return EINVAL;
2923
2924 hfsmp = VTOHFS(vp);
2925 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2926 return EROFS;
2927 }
2928 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2929 printf("hfs: can't make a plain hfs volume journaled.\n");
2930 return EINVAL;
2931 }
2932
2933 if (hfsmp->jnl) {
2934 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2935 return EAGAIN;
2936 }
2937
2938 vcb = HFSTOVCB(hfsmp);
2939 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2940 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2941 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2942
2943 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2944 hfs_systemfile_unlock(hfsmp, lockflags);
2945 return EINVAL;
2946 }
2947 hfs_systemfile_unlock(hfsmp, lockflags);
2948
2949 // make sure these both exist!
2950 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2951 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2952
2953 return EINVAL;
2954 }
2955
2956 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2957
2958 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2959 (off_t)name[2], (off_t)name[3]);
2960
2961 //
2962 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2963 // enabling the journal on a separate device so it is safe
2964 // to just copy hfs_devvp here. If hfs_util gets the ability
2965 // to dynamically enable the journal on a separate device then
2966 // we will have to do the same thing as hfs_early_journal_init()
2967 // to locate and open the journal device.
2968 //
2969 jvp = hfsmp->hfs_devvp;
2970 jnl = journal_create(jvp,
2971 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2972 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2973 (off_t)((unsigned)name[3]),
2974 hfsmp->hfs_devvp,
2975 hfsmp->hfs_logical_block_size,
2976 0,
2977 0,
2978 hfs_sync_metadata, hfsmp->hfs_mp);
2979
2980 /*
2981 * Set up the trim callback function so that we can add
2982 * recently freed extents to the free extent cache once
2983 * the transaction that freed them is written to the
2984 * journal on disk.
2985 */
2986 if (jnl)
2987 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2988
2989 if (jnl == NULL) {
2990 printf("hfs: FAILED to create the journal!\n");
2991 if (jvp && jvp != hfsmp->hfs_devvp) {
2992 vnode_clearmountedon(jvp);
2993 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2994 }
2995 jvp = NULL;
2996
2997 return EINVAL;
2998 }
2999
3000 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3001
3002 /*
3003 * Flush all dirty metadata buffers.
3004 */
3005 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
3006 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
3007 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
3008 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
3009 if (hfsmp->hfs_attribute_vp)
3010 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
3011
3012 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
3013 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
3014 hfsmp->jvp = jvp;
3015 hfsmp->jnl = jnl;
3016
3017 // save this off for the hack-y check in hfs_remove()
3018 hfsmp->jnl_start = (u_int32_t)name[2];
3019 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
3020 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
3021 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
3022
3023 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3024
3025 hfs_unlock_global (hfsmp);
3026 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3027
3028 {
3029 fsid_t fsid;
3030
3031 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3032 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3033 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3034 }
3035 return 0;
3036 } else if (name[0] == HFS_DISABLE_JOURNALING) {
3037 // clear the journaling bit
3038 vnode_t vp = vfs_context_cwd(context);
3039
3040 /* Only root can disable journaling */
3041 if (!is_suser()) {
3042 return (EPERM);
3043 }
3044 if (vp == NULLVP)
3045 return EINVAL;
3046
3047 hfsmp = VTOHFS(vp);
3048
3049 /*
3050 * Disabling journaling is disallowed on volumes with directory hard links
3051 * because we have not tested the relevant code path.
3052 */
3053 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
3054 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
3055 return EPERM;
3056 }
3057
3058 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
3059
3060 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3061
3062 // Lights out for you buddy!
3063 journal_close(hfsmp->jnl);
3064 hfsmp->jnl = NULL;
3065
3066 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
3067 vnode_clearmountedon(hfsmp->jvp);
3068 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3069 vnode_put(hfsmp->jvp);
3070 }
3071 hfsmp->jvp = NULL;
3072 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3073 hfsmp->jnl_start = 0;
3074 hfsmp->hfs_jnlinfoblkid = 0;
3075 hfsmp->hfs_jnlfileid = 0;
3076
3077 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
3078
3079 hfs_unlock_global (hfsmp);
3080
3081 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3082
3083 {
3084 fsid_t fsid;
3085
3086 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3087 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3088 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3089 }
3090 return 0;
3091 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
3092 vnode_t vp = vfs_context_cwd(context);
3093 off_t jnl_start, jnl_size;
3094
3095 if (vp == NULLVP)
3096 return EINVAL;
3097
3098 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3099 if (proc_is64bit(current_proc()))
3100 return EINVAL;
3101
3102 hfsmp = VTOHFS(vp);
3103 if (hfsmp->jnl == NULL) {
3104 jnl_start = 0;
3105 jnl_size = 0;
3106 } else {
3107 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3108 jnl_size = (off_t)hfsmp->jnl_size;
3109 }
3110
3111 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3112 return error;
3113 }
3114 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3115 return error;
3116 }
3117
3118 return 0;
3119 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3120
3121 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3122
3123 } else if (name[0] == VFS_CTL_QUERY) {
3124 struct sysctl_req *req;
3125 union union_vfsidctl vc;
3126 struct mount *mp;
3127 struct vfsquery vq;
3128
3129 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3130
3131 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3132 if (error) return (error);
3133
3134 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3135 if (mp == NULL) return (ENOENT);
3136
3137 hfsmp = VFSTOHFS(mp);
3138 bzero(&vq, sizeof(vq));
3139 vq.vq_flags = hfsmp->hfs_notification_conditions;
3140 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3141 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3142 vnode_t devvp = NULL;
3143 int device_fd;
3144 if (namelen != 2) {
3145 return (EINVAL);
3146 }
3147 device_fd = name[1];
3148 error = file_vnode(device_fd, &devvp);
3149 if (error) {
3150 return error;
3151 }
3152 error = vnode_getwithref(devvp);
3153 if (error) {
3154 file_drop(device_fd);
3155 return error;
3156 }
3157 error = hfs_journal_replay(devvp, context);
3158 file_drop(device_fd);
3159 vnode_put(devvp);
3160 return error;
3161 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3162 hfs_resize_debug = 1;
3163 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3164 return 0;
3165 }
3166
3167 return (ENOTSUP);
3168 }
3169
3170 /*
3171 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3172 * the build_path ioctl. We use it to leverage the code below that updates
3173 * the origin list cache if necessary
3174 */
3175
3176 int
3177 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3178 {
3179 int error;
3180 int lockflags;
3181 struct hfsmount *hfsmp;
3182
3183 hfsmp = VFSTOHFS(mp);
3184
3185 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3186 if (error)
3187 return (error);
3188
3189 /*
3190 * ADLs may need to have their origin state updated
3191 * since build_path needs a valid parent. The same is true
3192 * for hardlinked files as well. There isn't a race window here
3193 * in re-acquiring the cnode lock since we aren't pulling any data
3194 * out of the cnode; instead, we're going to the catalog.
3195 */
3196 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3197 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3198 cnode_t *cp = VTOC(*vpp);
3199 struct cat_desc cdesc;
3200
3201 if (!hfs_haslinkorigin(cp)) {
3202 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3203 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3204 hfs_systemfile_unlock(hfsmp, lockflags);
3205 if (error == 0) {
3206 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3207 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3208 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3209 }
3210 cat_releasedesc(&cdesc);
3211 }
3212 }
3213 hfs_unlock(cp);
3214 }
3215 return (0);
3216 }
3217
3218
3219 /*
3220 * Look up an HFS object by ID.
3221 *
3222 * The object is returned with an iocount reference and the cnode locked.
3223 *
3224 * If the object is a file then it will represent the data fork.
3225 */
3226 int
3227 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3228 {
3229 struct vnode *vp = NULLVP;
3230 struct cat_desc cndesc;
3231 struct cat_attr cnattr;
3232 struct cat_fork cnfork;
3233 u_int32_t linkref = 0;
3234 int error;
3235
3236 /* Check for cnids that should't be exported. */
3237 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3238 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3239 return (ENOENT);
3240 }
3241 /* Don't export our private directories. */
3242 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3243 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3244 return (ENOENT);
3245 }
3246 /*
3247 * Check the hash first
3248 */
3249 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3250 if (vp) {
3251 *vpp = vp;
3252 return(0);
3253 }
3254
3255 bzero(&cndesc, sizeof(cndesc));
3256 bzero(&cnattr, sizeof(cnattr));
3257 bzero(&cnfork, sizeof(cnfork));
3258
3259 /*
3260 * Not in hash, lookup in catalog
3261 */
3262 if (cnid == kHFSRootParentID) {
3263 static char hfs_rootname[] = "/";
3264
3265 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3266 cndesc.cd_namelen = 1;
3267 cndesc.cd_parentcnid = kHFSRootParentID;
3268 cndesc.cd_cnid = kHFSRootFolderID;
3269 cndesc.cd_flags = CD_ISDIR;
3270
3271 cnattr.ca_fileid = kHFSRootFolderID;
3272 cnattr.ca_linkcount = 1;
3273 cnattr.ca_entries = 1;
3274 cnattr.ca_dircount = 1;
3275 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3276 } else {
3277 int lockflags;
3278 cnid_t pid;
3279 const char *nameptr;
3280
3281 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3282 error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork);
3283 hfs_systemfile_unlock(hfsmp, lockflags);
3284
3285 if (error) {
3286 *vpp = NULL;
3287 return (error);
3288 }
3289
3290 /*
3291 * Check for a raw hardlink inode and save its linkref.
3292 */
3293 pid = cndesc.cd_parentcnid;
3294 nameptr = (const char *)cndesc.cd_nameptr;
3295
3296 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3297 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3298 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3299
3300 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3301 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3302 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3303
3304 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3305 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3306 *vpp = NULL;
3307 cat_releasedesc(&cndesc);
3308 return (ENOENT); /* open unlinked file */
3309 }
3310 }
3311
3312 /*
3313 * Finish initializing cnode descriptor for hardlinks.
3314 *
3315 * We need a valid name and parent for reverse lookups.
3316 */
3317 if (linkref) {
3318 cnid_t nextlinkid;
3319 cnid_t prevlinkid;
3320 struct cat_desc linkdesc;
3321 int lockflags;
3322
3323 cnattr.ca_linkref = linkref;
3324
3325 /*
3326 * Pick up the first link in the chain and get a descriptor for it.
3327 * This allows blind volfs paths to work for hardlinks.
3328 */
3329 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) &&
3330 (nextlinkid != 0)) {
3331 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3332 error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3333 hfs_systemfile_unlock(hfsmp, lockflags);
3334 if (error == 0) {
3335 cat_releasedesc(&cndesc);
3336 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3337 }
3338 }
3339 }
3340
3341 if (linkref) {
3342 int newvnode_flags = 0;
3343
3344 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3345 &cnfork, &vp, &newvnode_flags);
3346 if (error == 0) {
3347 VTOC(vp)->c_flag |= C_HARDLINK;
3348 vnode_setmultipath(vp);
3349 }
3350 } else {
3351 struct componentname cn;
3352 int newvnode_flags = 0;
3353
3354 /* Supply hfs_getnewvnode with a component name. */
3355 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3356 cn.cn_nameiop = LOOKUP;
3357 cn.cn_flags = ISLASTCN | HASBUF;
3358 cn.cn_context = NULL;
3359 cn.cn_pnlen = MAXPATHLEN;
3360 cn.cn_nameptr = cn.cn_pnbuf;
3361 cn.cn_namelen = cndesc.cd_namelen;
3362 cn.cn_hash = 0;
3363 cn.cn_consume = 0;
3364 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3365
3366 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3367 &cnfork, &vp, &newvnode_flags);
3368
3369 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3370 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3371 }
3372 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3373 }
3374 cat_releasedesc(&cndesc);
3375
3376 *vpp = vp;
3377 if (vp && skiplock) {
3378 hfs_unlock(VTOC(vp));
3379 }
3380 return (error);
3381 }
3382
3383
3384 /*
3385 * Flush out all the files in a filesystem.
3386 */
3387 static int
3388 #if QUOTA
3389 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3390 #else
3391 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3392 #endif /* QUOTA */
3393 {
3394 struct hfsmount *hfsmp;
3395 struct vnode *skipvp = NULLVP;
3396 int error;
3397 #if QUOTA
3398 int quotafilecnt;
3399 int i;
3400 #endif
3401
3402 hfsmp = VFSTOHFS(mp);
3403
3404 #if QUOTA
3405 /*
3406 * The open quota files have an indirect reference on
3407 * the root directory vnode. We must account for this
3408 * extra reference when doing the intial vflush.
3409 */
3410 quotafilecnt = 0;
3411 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3412
3413 /* Find out how many quota files we have open. */
3414 for (i = 0; i < MAXQUOTAS; i++) {
3415 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3416 ++quotafilecnt;
3417 }
3418
3419 /* Obtain the root vnode so we can skip over it. */
3420 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3421 }
3422 #endif /* QUOTA */
3423
3424 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3425 if (error != 0)
3426 return(error);
3427
3428 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3429
3430 #if QUOTA
3431 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3432 if (skipvp) {
3433 /*
3434 * See if there are additional references on the
3435 * root vp besides the ones obtained from the open
3436 * quota files and the hfs_chash_getvnode call above.
3437 */
3438 if ((error == 0) &&
3439 (vnode_isinuse(skipvp, quotafilecnt))) {
3440 error = EBUSY; /* root directory is still open */
3441 }
3442 hfs_unlock(VTOC(skipvp));
3443 vnode_put(skipvp);
3444 }
3445 if (error && (flags & FORCECLOSE) == 0)
3446 return (error);
3447
3448 for (i = 0; i < MAXQUOTAS; i++) {
3449 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3450 continue;
3451 hfs_quotaoff(p, mp, i);
3452 }
3453 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3454 }
3455 #endif /* QUOTA */
3456
3457 return (error);
3458 }
3459
3460 /*
3461 * Update volume encoding bitmap (HFS Plus only)
3462 */
3463 __private_extern__
3464 void
3465 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3466 {
3467 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3468 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3469
3470 u_int32_t index;
3471
3472 switch (encoding) {
3473 case kTextEncodingMacUkrainian:
3474 index = kIndexMacUkrainian;
3475 break;
3476 case kTextEncodingMacFarsi:
3477 index = kIndexMacFarsi;
3478 break;
3479 default:
3480 index = encoding;
3481 break;
3482 }
3483
3484 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3485 HFS_MOUNT_LOCK(hfsmp, TRUE)
3486 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3487 MarkVCBDirty(hfsmp);
3488 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3489 }
3490 }
3491
3492 /*
3493 * Update volume stats
3494 *
3495 * On journal volumes this will cause a volume header flush
3496 */
3497 int
3498 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3499 {
3500 struct timeval tv;
3501
3502 microtime(&tv);
3503
3504 lck_mtx_lock(&hfsmp->hfs_mutex);
3505
3506 MarkVCBDirty(hfsmp);
3507 hfsmp->hfs_mtime = tv.tv_sec;
3508
3509 switch (op) {
3510 case VOL_UPDATE:
3511 break;
3512 case VOL_MKDIR:
3513 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3514 ++hfsmp->hfs_dircount;
3515 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3516 ++hfsmp->vcbNmRtDirs;
3517 break;
3518 case VOL_RMDIR:
3519 if (hfsmp->hfs_dircount != 0)
3520 --hfsmp->hfs_dircount;
3521 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3522 --hfsmp->vcbNmRtDirs;
3523 break;
3524 case VOL_MKFILE:
3525 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3526 ++hfsmp->hfs_filecount;
3527 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3528 ++hfsmp->vcbNmFls;
3529 break;
3530 case VOL_RMFILE:
3531 if (hfsmp->hfs_filecount != 0)
3532 --hfsmp->hfs_filecount;
3533 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3534 --hfsmp->vcbNmFls;
3535 break;
3536 }
3537
3538 lck_mtx_unlock(&hfsmp->hfs_mutex);
3539
3540 if (hfsmp->jnl) {
3541 hfs_flushvolumeheader(hfsmp, 0, 0);
3542 }
3543
3544 return (0);
3545 }
3546
3547
3548 static int
3549 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3550 {
3551 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3552 struct filefork *fp;
3553 HFSMasterDirectoryBlock *mdb;
3554 struct buf *bp = NULL;
3555 int retval;
3556 int sector_size;
3557 ByteCount namelen;
3558
3559 sector_size = hfsmp->hfs_logical_block_size;
3560 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
3561 if (retval) {
3562 if (bp)
3563 buf_brelse(bp);
3564 return retval;
3565 }
3566
3567 lck_mtx_lock(&hfsmp->hfs_mutex);
3568
3569 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
3570
3571 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3572 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3573 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3574 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3575 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3576 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3577 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3578 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3579
3580 namelen = strlen((char *)vcb->vcbVN);
3581 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3582 /* Retry with MacRoman in case that's how it was exported. */
3583 if (retval)
3584 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3585
3586 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3587 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3588 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3589 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3590 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3591
3592 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3593
3594 fp = VTOF(vcb->extentsRefNum);
3595 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3596 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3597 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3598 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3599 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3600 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3601 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3602 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3603 FTOC(fp)->c_flag &= ~C_MODIFIED;
3604
3605 fp = VTOF(vcb->catalogRefNum);
3606 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3607 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3608 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3609 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3610 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3611 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3612 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3613 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3614 FTOC(fp)->c_flag &= ~C_MODIFIED;
3615
3616 MarkVCBClean( vcb );
3617
3618 lck_mtx_unlock(&hfsmp->hfs_mutex);
3619
3620 /* If requested, flush out the alternate MDB */
3621 if (altflush) {
3622 struct buf *alt_bp = NULL;
3623
3624 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) {
3625 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
3626
3627 (void) VNOP_BWRITE(alt_bp);
3628 } else if (alt_bp)
3629 buf_brelse(alt_bp);
3630 }
3631
3632 if (waitfor != MNT_WAIT)
3633 buf_bawrite(bp);
3634 else
3635 retval = VNOP_BWRITE(bp);
3636
3637 return (retval);
3638 }
3639
3640 /*
3641 * Flush any dirty in-memory mount data to the on-disk
3642 * volume header.
3643 *
3644 * Note: the on-disk volume signature is intentionally
3645 * not flushed since the on-disk "H+" and "HX" signatures
3646 * are always stored in-memory as "H+".
3647 */
3648 int
3649 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3650 {
3651 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3652 struct filefork *fp;
3653 HFSPlusVolumeHeader *volumeHeader, *altVH;
3654 int retval;
3655 struct buf *bp, *alt_bp;
3656 int i;
3657 daddr64_t priIDSector;
3658 int critical;
3659 u_int16_t signature;
3660 u_int16_t hfsversion;
3661
3662 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3663 return(0);
3664 }
3665 if (hfsmp->hfs_flags & HFS_STANDARD) {
3666 return hfs_flushMDB(hfsmp, waitfor, altflush);
3667 }
3668 critical = altflush;
3669 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3670 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3671
3672 if (hfs_start_transaction(hfsmp) != 0) {
3673 return EINVAL;
3674 }
3675
3676 bp = NULL;
3677 alt_bp = NULL;
3678
3679 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3680 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3681 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3682 if (retval) {
3683 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3684 goto err_exit;
3685 }
3686
3687 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3688 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3689
3690 /*
3691 * Sanity check what we just read. If it's bad, try the alternate
3692 * instead.
3693 */
3694 signature = SWAP_BE16 (volumeHeader->signature);
3695 hfsversion = SWAP_BE16 (volumeHeader->version);
3696 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3697 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3698 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3699 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3700 vcb->vcbVN, signature, hfsversion,
3701 SWAP_BE32 (volumeHeader->blockSize),
3702 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3703 hfs_mark_volume_inconsistent(hfsmp);
3704
3705 if (hfsmp->hfs_alt_id_sector) {
3706 retval = buf_meta_bread(hfsmp->hfs_devvp,
3707 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3708 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3709 if (retval) {
3710 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3711 goto err_exit;
3712 }
3713
3714 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3715 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3716 signature = SWAP_BE16(altVH->signature);
3717 hfsversion = SWAP_BE16(altVH->version);
3718
3719 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3720 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3721 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3722 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3723 vcb->vcbVN, signature, hfsversion,
3724 SWAP_BE32(altVH->blockSize));
3725 retval = EIO;
3726 goto err_exit;
3727 }
3728
3729 /* The alternate is plausible, so use it. */
3730 bcopy(altVH, volumeHeader, kMDBSize);
3731 buf_brelse(alt_bp);
3732 alt_bp = NULL;
3733 } else {
3734 /* No alternate VH, nothing more we can do. */
3735 retval = EIO;
3736 goto err_exit;
3737 }
3738 }
3739
3740 if (hfsmp->jnl) {
3741 journal_modify_block_start(hfsmp->jnl, bp);
3742 }
3743
3744 /*
3745 * For embedded HFS+ volumes, update create date if it changed
3746 * (ie from a setattrlist call)
3747 */
3748 if ((vcb->hfsPlusIOPosOffset != 0) &&
3749 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3750 struct buf *bp2;
3751 HFSMasterDirectoryBlock *mdb;
3752
3753 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3754 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3755 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3756 if (retval) {
3757 if (bp2)
3758 buf_brelse(bp2);
3759 retval = 0;
3760 } else {
3761 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3762 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3763
3764 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3765 {
3766 if (hfsmp->jnl) {
3767 journal_modify_block_start(hfsmp->jnl, bp2);
3768 }
3769
3770 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3771
3772 if (hfsmp->jnl) {
3773 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3774 } else {
3775 (void) VNOP_BWRITE(bp2); /* write out the changes */
3776 }
3777 }
3778 else
3779 {
3780 buf_brelse(bp2); /* just release it */
3781 }
3782 }
3783 }
3784
3785 lck_mtx_lock(&hfsmp->hfs_mutex);
3786
3787 /* Note: only update the lower 16 bits worth of attributes */
3788 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3789 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3790 if (hfsmp->jnl) {
3791 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3792 } else {
3793 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3794 }
3795 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3796 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3797 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3798 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3799 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
3800 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
3801 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3802 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3803 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3804 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3805 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3806 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3807 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3808
3809 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3810 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3811 critical = 1;
3812 }
3813
3814 /*
3815 * System files are only dirty when altflush is set.
3816 */
3817 if (altflush == 0) {
3818 goto done;
3819 }
3820
3821 /* Sync Extents over-flow file meta data */
3822 fp = VTOF(vcb->extentsRefNum);
3823 if (FTOC(fp)->c_flag & C_MODIFIED) {
3824 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3825 volumeHeader->extentsFile.extents[i].startBlock =
3826 SWAP_BE32 (fp->ff_extents[i].startBlock);
3827 volumeHeader->extentsFile.extents[i].blockCount =
3828 SWAP_BE32 (fp->ff_extents[i].blockCount);
3829 }
3830 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3831 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3832 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3833 FTOC(fp)->c_flag &= ~C_MODIFIED;
3834 }
3835
3836 /* Sync Catalog file meta data */
3837 fp = VTOF(vcb->catalogRefNum);
3838 if (FTOC(fp)->c_flag & C_MODIFIED) {
3839 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3840 volumeHeader->catalogFile.extents[i].startBlock =
3841 SWAP_BE32 (fp->ff_extents[i].startBlock);
3842 volumeHeader->catalogFile.extents[i].blockCount =
3843 SWAP_BE32 (fp->ff_extents[i].blockCount);
3844 }
3845 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3846 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3847 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3848 FTOC(fp)->c_flag &= ~C_MODIFIED;
3849 }
3850
3851 /* Sync Allocation file meta data */
3852 fp = VTOF(vcb->allocationsRefNum);
3853 if (FTOC(fp)->c_flag & C_MODIFIED) {
3854 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3855 volumeHeader->allocationFile.extents[i].startBlock =
3856 SWAP_BE32 (fp->ff_extents[i].startBlock);
3857 volumeHeader->allocationFile.extents[i].blockCount =
3858 SWAP_BE32 (fp->ff_extents[i].blockCount);
3859 }
3860 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3861 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3862 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3863 FTOC(fp)->c_flag &= ~C_MODIFIED;
3864 }
3865
3866 /* Sync Attribute file meta data */
3867 if (hfsmp->hfs_attribute_vp) {
3868 fp = VTOF(hfsmp->hfs_attribute_vp);
3869 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3870 volumeHeader->attributesFile.extents[i].startBlock =
3871 SWAP_BE32 (fp->ff_extents[i].startBlock);
3872 volumeHeader->attributesFile.extents[i].blockCount =
3873 SWAP_BE32 (fp->ff_extents[i].blockCount);
3874 }
3875 FTOC(fp)->c_flag &= ~C_MODIFIED;
3876 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3877 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3878 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3879 }
3880
3881 /* Sync Startup file meta data */
3882 if (hfsmp->hfs_startup_vp) {
3883 fp = VTOF(hfsmp->hfs_startup_vp);
3884 if (FTOC(fp)->c_flag & C_MODIFIED) {
3885 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3886 volumeHeader->startupFile.extents[i].startBlock =
3887 SWAP_BE32 (fp->ff_extents[i].startBlock);
3888 volumeHeader->startupFile.extents[i].blockCount =
3889 SWAP_BE32 (fp->ff_extents[i].blockCount);
3890 }
3891 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3892 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3893 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3894 FTOC(fp)->c_flag &= ~C_MODIFIED;
3895 }
3896 }
3897
3898 done:
3899 MarkVCBClean(hfsmp);
3900 lck_mtx_unlock(&hfsmp->hfs_mutex);
3901
3902 /* If requested, flush out the alternate volume header */
3903 if (altflush && hfsmp->hfs_alt_id_sector) {
3904 if (buf_meta_bread(hfsmp->hfs_devvp,
3905 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3906 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3907 if (hfsmp->jnl) {
3908 journal_modify_block_start(hfsmp->jnl, alt_bp);
3909 }
3910
3911 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3912 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3913 kMDBSize);
3914
3915 if (hfsmp->jnl) {
3916 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3917 } else {
3918 (void) VNOP_BWRITE(alt_bp);
3919 }
3920 } else if (alt_bp)
3921 buf_brelse(alt_bp);
3922 }
3923
3924 if (hfsmp->jnl) {
3925 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3926 } else {
3927 if (waitfor != MNT_WAIT)
3928 buf_bawrite(bp);
3929 else {
3930 retval = VNOP_BWRITE(bp);
3931 /* When critical data changes, flush the device cache */
3932 if (critical && (retval == 0)) {
3933 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3934 NULL, FWRITE, NULL);
3935 }
3936 }
3937 }
3938 hfs_end_transaction(hfsmp);
3939
3940 return (retval);
3941
3942 err_exit:
3943 if (alt_bp)
3944 buf_brelse(alt_bp);
3945 if (bp)
3946 buf_brelse(bp);
3947 hfs_end_transaction(hfsmp);
3948 return retval;
3949 }
3950
3951
3952 /*
3953 * Extend a file system.
3954 */
3955 int
3956 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3957 {
3958 struct proc *p = vfs_context_proc(context);
3959 kauth_cred_t cred = vfs_context_ucred(context);
3960 struct vnode *vp;
3961 struct vnode *devvp;
3962 struct buf *bp;
3963 struct filefork *fp = NULL;
3964 ExtendedVCB *vcb;
3965 struct cat_fork forkdata;
3966 u_int64_t oldsize;
3967 u_int64_t newblkcnt;
3968 u_int64_t prev_phys_block_count;
3969 u_int32_t addblks;
3970 u_int64_t sector_count;
3971 u_int32_t sector_size;
3972 u_int32_t phys_sector_size;
3973 u_int32_t overage_blocks;
3974 daddr64_t prev_alt_sector;
3975 daddr_t bitmapblks;
3976 int lockflags = 0;
3977 int error;
3978 int64_t oldBitmapSize;
3979 Boolean usedExtendFileC = false;
3980 int transaction_begun = 0;
3981
3982 devvp = hfsmp->hfs_devvp;
3983 vcb = HFSTOVCB(hfsmp);
3984
3985 /*
3986 * - HFS Plus file systems only.
3987 * - Journaling must be enabled.
3988 * - No embedded volumes.
3989 */
3990 if ((vcb->vcbSigWord == kHFSSigWord) ||
3991 (hfsmp->jnl == NULL) ||
3992 (vcb->hfsPlusIOPosOffset != 0)) {
3993 return (EPERM);
3994 }
3995 /*
3996 * If extending file system by non-root, then verify
3997 * ownership and check permissions.
3998 */
3999 if (suser(cred, NULL)) {
4000 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
4001
4002 if (error)
4003 return (error);
4004 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
4005 if (error == 0) {
4006 error = hfs_write_access(vp, cred, p, false);
4007 }
4008 hfs_unlock(VTOC(vp));
4009 vnode_put(vp);
4010 if (error)
4011 return (error);
4012
4013 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
4014 if (error)
4015 return (error);
4016 }
4017 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
4018 return (ENXIO);
4019 }
4020 if (sector_size != hfsmp->hfs_logical_block_size) {
4021 return (ENXIO);
4022 }
4023 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
4024 return (ENXIO);
4025 }
4026 if ((sector_size * sector_count) < newsize) {
4027 printf("hfs_extendfs: not enough space on device\n");
4028 return (ENOSPC);
4029 }
4030 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
4031 if (error) {
4032 if ((error != ENOTSUP) && (error != ENOTTY)) {
4033 return (ENXIO);
4034 }
4035 /* If ioctl is not supported, force physical and logical sector size to be same */
4036 phys_sector_size = sector_size;
4037 }
4038 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4039
4040 /*
4041 * Validate new size.
4042 */
4043 if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
4044 printf("hfs_extendfs: invalid size\n");
4045 return (EINVAL);
4046 }
4047 newblkcnt = newsize / vcb->blockSize;
4048 if (newblkcnt > (u_int64_t)0xFFFFFFFF)
4049 return (EOVERFLOW);
4050
4051 addblks = newblkcnt - vcb->totalBlocks;
4052
4053 if (hfs_resize_debug) {
4054 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
4055 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
4056 }
4057 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
4058
4059 HFS_MOUNT_LOCK(hfsmp, TRUE);
4060 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4061 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4062 error = EALREADY;
4063 goto out;
4064 }
4065 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4066 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4067
4068 /* Start with a clean journal. */
4069 hfs_journal_flush(hfsmp, TRUE);
4070
4071 /*
4072 * Enclose changes inside a transaction.
4073 */
4074 if (hfs_start_transaction(hfsmp) != 0) {
4075 error = EINVAL;
4076 goto out;
4077 }
4078 transaction_begun = 1;
4079
4080
4081 /* Update the hfsmp fields for the physical information about the device */
4082 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4083 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4084
4085 hfsmp->hfs_logical_block_count = sector_count;
4086 /*
4087 * Note that the new AltVH location must be based on the device's EOF rather than the new
4088 * filesystem's EOF, so we use logical_block_count here rather than newsize.
4089 */
4090 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
4091 HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
4092 hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
4093
4094
4095 /*
4096 * Note: we take the attributes lock in case we have an attribute data vnode
4097 * which needs to change size.
4098 */
4099 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4100 vp = vcb->allocationsRefNum;
4101 fp = VTOF(vp);
4102 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
4103
4104 /*
4105 * Calculate additional space required (if any) by allocation bitmap.
4106 */
4107 oldBitmapSize = fp->ff_size;
4108 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
4109 if (bitmapblks > (daddr_t)fp->ff_blocks)
4110 bitmapblks -= fp->ff_blocks;
4111 else
4112 bitmapblks = 0;
4113
4114 /*
4115 * The allocation bitmap can contain unused bits that are beyond end of
4116 * current volume's allocation blocks. Usually they are supposed to be
4117 * zero'ed out but there can be cases where they might be marked as used.
4118 * After extending the file system, those bits can represent valid
4119 * allocation blocks, so we mark all the bits from the end of current
4120 * volume to end of allocation bitmap as "free".
4121 *
4122 * Figure out the number of overage blocks before proceeding though,
4123 * so we don't add more bytes to our I/O than necessary.
4124 * First figure out the total number of blocks representable by the
4125 * end of the bitmap file vs. the total number of blocks in the new FS.
4126 * Then subtract away the number of blocks in the current FS. This is how much
4127 * we can mark as free right now without having to grow the bitmap file.
4128 */
4129 overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
4130 overage_blocks = MIN (overage_blocks, newblkcnt);
4131 overage_blocks -= vcb->totalBlocks;
4132
4133 BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
4134
4135 if (bitmapblks > 0) {
4136 daddr64_t blkno;
4137 daddr_t blkcnt;
4138 off_t bytesAdded;
4139
4140 /*
4141 * Get the bitmap's current size (in allocation blocks) so we know
4142 * where to start zero filling once the new space is added. We've
4143 * got to do this before the bitmap is grown.
4144 */
4145 blkno = (daddr64_t)fp->ff_blocks;
4146
4147 /*
4148 * Try to grow the allocation file in the normal way, using allocation
4149 * blocks already existing in the file system. This way, we might be
4150 * able to grow the bitmap contiguously, or at least in the metadata
4151 * zone.
4152 */
4153 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4154 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4155 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4156
4157 if (error == 0) {
4158 usedExtendFileC = true;
4159 } else {
4160 /*
4161 * If the above allocation failed, fall back to allocating the new
4162 * extent of the bitmap from the space we're going to add. Since those
4163 * blocks don't yet belong to the file system, we have to update the
4164 * extent list directly, and manually adjust the file size.
4165 */
4166 bytesAdded = 0;
4167 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4168 if (error) {
4169 printf("hfs_extendfs: error %d adding extents\n", error);
4170 goto out;
4171 }
4172 fp->ff_blocks += bitmapblks;
4173 VTOC(vp)->c_blocks = fp->ff_blocks;
4174 VTOC(vp)->c_flag |= C_MODIFIED;
4175 }
4176
4177 /*
4178 * Update the allocation file's size to include the newly allocated
4179 * blocks. Note that ExtendFileC doesn't do this, which is why this
4180 * statement is outside the above "if" statement.
4181 */
4182 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4183
4184 /*
4185 * Zero out the new bitmap blocks.
4186 */
4187 {
4188
4189 bp = NULL;
4190 blkcnt = bitmapblks;
4191 while (blkcnt > 0) {
4192 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4193 if (error) {
4194 if (bp) {
4195 buf_brelse(bp);
4196 }
4197 break;
4198 }
4199 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4200 buf_markaged(bp);
4201 error = (int)buf_bwrite(bp);
4202 if (error)
4203 break;
4204 --blkcnt;
4205 ++blkno;
4206 }
4207 }
4208 if (error) {
4209 printf("hfs_extendfs: error %d clearing blocks\n", error);
4210 goto out;
4211 }
4212 /*
4213 * Mark the new bitmap space as allocated.
4214 *
4215 * Note that ExtendFileC will have marked any blocks it allocated, so
4216 * this is only needed if we used AddFileExtent. Also note that this
4217 * has to come *after* the zero filling of new blocks in the case where
4218 * we used AddFileExtent (since the part of the bitmap we're touching
4219 * is in those newly allocated blocks).
4220 */
4221 if (!usedExtendFileC) {
4222 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4223 if (error) {
4224 printf("hfs_extendfs: error %d setting bitmap\n", error);
4225 goto out;
4226 }
4227 vcb->freeBlocks -= bitmapblks;
4228 }
4229 }
4230 /*
4231 * Mark the new alternate VH as allocated.
4232 */
4233 if (vcb->blockSize == 512)
4234 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4235 else
4236 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4237 if (error) {
4238 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4239 goto out;
4240 }
4241 /*
4242 * Mark the old alternate VH as free.
4243 */
4244 if (vcb->blockSize == 512)
4245 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4246 else
4247 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4248 /*
4249 * Adjust file system variables for new space.
4250 */
4251 vcb->totalBlocks += addblks;
4252 vcb->freeBlocks += addblks;
4253 MarkVCBDirty(vcb);
4254 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4255 if (error) {
4256 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4257 /*
4258 * Restore to old state.
4259 */
4260 if (usedExtendFileC) {
4261 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4262 FTOC(fp)->c_fileid, false);
4263 } else {
4264 fp->ff_blocks -= bitmapblks;
4265 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4266 /*
4267 * No need to mark the excess blocks free since those bitmap blocks
4268 * are no longer part of the bitmap. But we do need to undo the
4269 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4270 */
4271 vcb->freeBlocks += bitmapblks;
4272 }
4273 vcb->totalBlocks -= addblks;
4274 vcb->freeBlocks -= addblks;
4275 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4276 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4277 MarkVCBDirty(vcb);
4278 if (vcb->blockSize == 512) {
4279 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4280 hfs_mark_volume_inconsistent(hfsmp);
4281 }
4282 } else {
4283 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4284 hfs_mark_volume_inconsistent(hfsmp);
4285 }
4286 }
4287 goto out;
4288 }
4289 /*
4290 * Invalidate the old alternate volume header.
4291 */
4292 bp = NULL;
4293 if (prev_alt_sector) {
4294 if (buf_meta_bread(hfsmp->hfs_devvp,
4295 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4296 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4297 journal_modify_block_start(hfsmp->jnl, bp);
4298
4299 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4300
4301 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4302 } else if (bp) {
4303 buf_brelse(bp);
4304 }
4305 }
4306
4307 /*
4308 * Update the metadata zone size based on current volume size
4309 */
4310 hfs_metadatazone_init(hfsmp, false);
4311
4312 /*
4313 * Adjust the size of hfsmp->hfs_attrdata_vp
4314 */
4315 if (hfsmp->hfs_attrdata_vp) {
4316 struct cnode *attr_cp;
4317 struct filefork *attr_fp;
4318
4319 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4320 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4321 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4322
4323 attr_cp->c_blocks = newblkcnt;
4324 attr_fp->ff_blocks = newblkcnt;
4325 attr_fp->ff_extents[0].blockCount = newblkcnt;
4326 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4327 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4328 vnode_put(hfsmp->hfs_attrdata_vp);
4329 }
4330 }
4331
4332 /*
4333 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4334 * locks in the middle of these operations like we do in the truncate case
4335 * where we have to relocate files, we can only update the red-black tree
4336 * if there were actual changes made to the bitmap. Also, we can't really scan the
4337 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4338 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4339 * not currently controlled by the tree.
4340 *
4341 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4342 */
4343 if (error == 0) {
4344 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4345 }
4346
4347 /* Release all locks and sync up journal content before
4348 * checking and extending, if required, the journal
4349 */
4350 if (lockflags) {
4351 hfs_systemfile_unlock(hfsmp, lockflags);
4352 lockflags = 0;
4353 }
4354 if (transaction_begun) {
4355 hfs_end_transaction(hfsmp);
4356 hfs_journal_flush(hfsmp, TRUE);
4357 transaction_begun = 0;
4358 }
4359
4360 /* Increase the journal size, if required. */
4361 error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
4362 if (error) {
4363 printf ("hfs_extendfs: Could not extend journal size\n");
4364 goto out_noalloc;
4365 }
4366
4367 /* Log successful extending */
4368 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4369 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4370
4371 out:
4372 if (error && fp) {
4373 /* Restore allocation fork. */
4374 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4375 VTOC(vp)->c_blocks = fp->ff_blocks;
4376
4377 }
4378
4379 out_noalloc:
4380 HFS_MOUNT_LOCK(hfsmp, TRUE);
4381 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4382 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4383 if (lockflags) {
4384 hfs_systemfile_unlock(hfsmp, lockflags);
4385 }
4386 if (transaction_begun) {
4387 hfs_end_transaction(hfsmp);
4388 hfs_journal_flush(hfsmp, FALSE);
4389 /* Just to be sure, sync all data to the disk */
4390 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4391 }
4392
4393 return MacToVFSError(error);
4394 }
4395
4396 #define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4397
4398 /*
4399 * Truncate a file system (while still mounted).
4400 */
4401 int
4402 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4403 {
4404 struct buf *bp = NULL;
4405 u_int64_t oldsize;
4406 u_int32_t newblkcnt;
4407 u_int32_t reclaimblks = 0;
4408 int lockflags = 0;
4409 int transaction_begun = 0;
4410 Boolean updateFreeBlocks = false;
4411 Boolean disable_sparse = false;
4412 int error = 0;
4413
4414 lck_mtx_lock(&hfsmp->hfs_mutex);
4415 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4416 lck_mtx_unlock(&hfsmp->hfs_mutex);
4417 return (EALREADY);
4418 }
4419 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4420 hfsmp->hfs_resize_blocksmoved = 0;
4421 hfsmp->hfs_resize_totalblocks = 0;
4422 hfsmp->hfs_resize_progress = 0;
4423 lck_mtx_unlock(&hfsmp->hfs_mutex);
4424
4425 /*
4426 * - Journaled HFS Plus volumes only.
4427 * - No embedded volumes.
4428 */
4429 if ((hfsmp->jnl == NULL) ||
4430 (hfsmp->hfsPlusIOPosOffset != 0)) {
4431 error = EPERM;
4432 goto out;
4433 }
4434 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4435 newblkcnt = newsize / hfsmp->blockSize;
4436 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4437
4438 if (hfs_resize_debug) {
4439 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4440 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4441 }
4442
4443 /* Make sure new size is valid. */
4444 if ((newsize < HFS_MIN_SIZE) ||
4445 (newsize >= oldsize) ||
4446 (newsize % hfsmp->hfs_logical_block_size) ||
4447 (newsize % hfsmp->hfs_physical_block_size)) {
4448 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4449 error = EINVAL;
4450 goto out;
4451 }
4452
4453 /*
4454 * Make sure that the file system has enough free blocks reclaim.
4455 *
4456 * Before resize, the disk is divided into four zones -
4457 * A. Allocated_Stationary - These are allocated blocks that exist
4458 * before the new end of disk. These blocks will not be
4459 * relocated or modified during resize.
4460 * B. Free_Stationary - These are free blocks that exist before the
4461 * new end of disk. These blocks can be used for any new
4462 * allocations during resize, including allocation for relocating
4463 * data from the area of disk being reclaimed.
4464 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4465 * beyond the new end of disk. These blocks need to be reclaimed
4466 * during resize by allocating equal number of blocks in Free
4467 * Stationary zone and copying the data.
4468 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4469 * new end of disk. Nothing special needs to be done to reclaim
4470 * them.
4471 *
4472 * Total number of blocks on the disk before resize:
4473 * ------------------------------------------------
4474 * Total Blocks = Allocated_Stationary + Free_Stationary +
4475 * Allocated_To-Reclaim + Free_To-Reclaim
4476 *
4477 * Total number of blocks that need to be reclaimed:
4478 * ------------------------------------------------
4479 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4480 *
4481 * Note that the check below also makes sure that we have enough space
4482 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4483 * Therefore we do not need to check total number of blocks to relocate
4484 * later in the code.
4485 *
4486 * The condition below gets converted to:
4487 *
4488 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4489 *
4490 * which is equivalent to:
4491 *
4492 * Allocated To-Reclaim >= Free Stationary
4493 */
4494 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4495 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4496 error = ENOSPC;
4497 goto out;
4498 }
4499
4500 /* Start with a clean journal. */
4501 hfs_journal_flush(hfsmp, TRUE);
4502
4503 if (hfs_start_transaction(hfsmp) != 0) {
4504 error = EINVAL;
4505 goto out;
4506 }
4507 transaction_begun = 1;
4508
4509 /* Take the bitmap lock to update the alloc limit field */
4510 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4511
4512 /*
4513 * Prevent new allocations from using the part we're trying to truncate.
4514 *
4515 * NOTE: allocLimit is set to the allocation block number where the new
4516 * alternate volume header will be. That way there will be no files to
4517 * interfere with allocating the new alternate volume header, and no files
4518 * in the allocation blocks beyond (i.e. the blocks we're trying to
4519 * truncate away.
4520 *
4521 * Also shrink the red-black tree if needed.
4522 */
4523 if (hfsmp->blockSize == 512) {
4524 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4525 }
4526 else {
4527 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4528 }
4529
4530 /* Sparse devices use first fit allocation which is not ideal
4531 * for volume resize which requires best fit allocation. If a
4532 * sparse device is being truncated, disable the sparse device
4533 * property temporarily for the duration of resize. Also reset
4534 * the free extent cache so that it is rebuilt as sorted by
4535 * totalBlocks instead of startBlock.
4536 *
4537 * Note that this will affect all allocations on the volume and
4538 * ideal fix would be just to modify resize-related allocations,
4539 * but it will result in complexity like handling of two free
4540 * extent caches sorted differently, etc. So we stick to this
4541 * solution for now.
4542 */
4543 HFS_MOUNT_LOCK(hfsmp, TRUE);
4544 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4545 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4546 ResetVCBFreeExtCache(hfsmp);
4547 disable_sparse = true;
4548 }
4549
4550 /*
4551 * Update the volume free block count to reflect the total number
4552 * of free blocks that will exist after a successful resize.
4553 * Relocation of extents will result in no net change in the total
4554 * free space on the disk. Therefore the code that allocates
4555 * space for new extent and deallocates the old extent explicitly
4556 * prevents updating the volume free block count. It will also
4557 * prevent false disk full error when the number of blocks in
4558 * an extent being relocated is more than the free blocks that
4559 * will exist after the volume is resized.
4560 */
4561 hfsmp->freeBlocks -= reclaimblks;
4562 updateFreeBlocks = true;
4563 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4564
4565 if (lockflags) {
4566 hfs_systemfile_unlock(hfsmp, lockflags);
4567 lockflags = 0;
4568 }
4569
4570 /*
4571 * Update the metadata zone size to match the new volume size,
4572 * and if it too less, metadata zone might be disabled.
4573 */
4574 hfs_metadatazone_init(hfsmp, false);
4575
4576 /*
4577 * If some files have blocks at or beyond the location of the
4578 * new alternate volume header, recalculate free blocks and
4579 * reclaim blocks. Otherwise just update free blocks count.
4580 *
4581 * The current allocLimit is set to the location of new alternate
4582 * volume header, and reclaimblks are the total number of blocks
4583 * that need to be reclaimed. So the check below is really
4584 * ignoring the blocks allocated for old alternate volume header.
4585 */
4586 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4587 /*
4588 * hfs_reclaimspace will use separate transactions when
4589 * relocating files (so we don't overwhelm the journal).
4590 */
4591 hfs_end_transaction(hfsmp);
4592 transaction_begun = 0;
4593
4594 /* Attempt to reclaim some space. */
4595 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4596 if (error != 0) {
4597 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4598 error = ENOSPC;
4599 goto out;
4600 }
4601 if (hfs_start_transaction(hfsmp) != 0) {
4602 error = EINVAL;
4603 goto out;
4604 }
4605 transaction_begun = 1;
4606
4607 /* Check if we're clear now. */
4608 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4609 if (error != 0) {
4610 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4611 error = EAGAIN; /* tell client to try again */
4612 goto out;
4613 }
4614 }
4615
4616 /*
4617 * Note: we take the attributes lock in case we have an attribute data vnode
4618 * which needs to change size.
4619 */
4620 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4621
4622 /*
4623 * Allocate last 1KB for alternate volume header.
4624 */
4625 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4626 if (error) {
4627 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4628 goto out;
4629 }
4630
4631 /*
4632 * Mark the old alternate volume header as free.
4633 * We don't bother shrinking allocation bitmap file.
4634 */
4635 if (hfsmp->blockSize == 512)
4636 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4637 else
4638 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4639
4640 /*
4641 * Invalidate the existing alternate volume header.
4642 *
4643 * Don't include this in a transaction (don't call journal_modify_block)
4644 * since this block will be outside of the truncated file system!
4645 */
4646 if (hfsmp->hfs_alt_id_sector) {
4647 error = buf_meta_bread(hfsmp->hfs_devvp,
4648 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4649 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4650 if (error == 0) {
4651 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4652 (void) VNOP_BWRITE(bp);
4653 } else {
4654 if (bp) {
4655 buf_brelse(bp);
4656 }
4657 }
4658 bp = NULL;
4659 }
4660
4661 /* Log successful shrinking. */
4662 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4663 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4664
4665 /*
4666 * Adjust file system variables and flush them to disk.
4667 */
4668 hfsmp->totalBlocks = newblkcnt;
4669 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4670 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
4671
4672 /*
4673 * Note that although the logical block size is updated here, it is only done for
4674 * the benefit of the partition management software. The logical block count change
4675 * has not yet actually been propagated to the disk device yet.
4676 */
4677
4678 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4679 MarkVCBDirty(hfsmp);
4680 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4681 if (error)
4682 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4683
4684 /*
4685 * Adjust the size of hfsmp->hfs_attrdata_vp
4686 */
4687 if (hfsmp->hfs_attrdata_vp) {
4688 struct cnode *cp;
4689 struct filefork *fp;
4690
4691 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4692 cp = VTOC(hfsmp->hfs_attrdata_vp);
4693 fp = VTOF(hfsmp->hfs_attrdata_vp);
4694
4695 cp->c_blocks = newblkcnt;
4696 fp->ff_blocks = newblkcnt;
4697 fp->ff_extents[0].blockCount = newblkcnt;
4698 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4699 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4700 vnode_put(hfsmp->hfs_attrdata_vp);
4701 }
4702 }
4703
4704 out:
4705 /*
4706 * Update the allocLimit to acknowledge the last one or two blocks now.
4707 * Add it to the tree as well if necessary.
4708 */
4709 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4710
4711 HFS_MOUNT_LOCK(hfsmp, TRUE);
4712 if (disable_sparse == true) {
4713 /* Now that resize is completed, set the volume to be sparse
4714 * device again so that all further allocations will be first
4715 * fit instead of best fit. Reset free extent cache so that
4716 * it is rebuilt.
4717 */
4718 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4719 ResetVCBFreeExtCache(hfsmp);
4720 }
4721
4722 if (error && (updateFreeBlocks == true)) {
4723 hfsmp->freeBlocks += reclaimblks;
4724 }
4725
4726 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4727 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4728 }
4729 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4730 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4731
4732 /* On error, reset the metadata zone for original volume size */
4733 if (error && (updateFreeBlocks == true)) {
4734 hfs_metadatazone_init(hfsmp, false);
4735 }
4736
4737 if (lockflags) {
4738 hfs_systemfile_unlock(hfsmp, lockflags);
4739 }
4740 if (transaction_begun) {
4741 hfs_end_transaction(hfsmp);
4742 hfs_journal_flush(hfsmp, FALSE);
4743 /* Just to be sure, sync all data to the disk */
4744 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4745 }
4746
4747 return MacToVFSError(error);
4748 }
4749
4750
4751 /*
4752 * Invalidate the physical block numbers associated with buffer cache blocks
4753 * in the given extent of the given vnode.
4754 */
4755 struct hfs_inval_blk_no {
4756 daddr64_t sectorStart;
4757 daddr64_t sectorCount;
4758 };
4759 static int
4760 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4761 {
4762 daddr64_t blkno;
4763 struct hfs_inval_blk_no *args;
4764
4765 blkno = buf_blkno(bp);
4766 args = args_in;
4767
4768 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4769 buf_setblkno(bp, buf_lblkno(bp));
4770
4771 return BUF_RETURNED;
4772 }
4773 static void
4774 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4775 {
4776 struct hfs_inval_blk_no args;
4777 args.sectorStart = sectorStart;
4778 args.sectorCount = sectorCount;
4779
4780 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4781 }
4782
4783
4784 /*
4785 * Copy the contents of an extent to a new location. Also invalidates the
4786 * physical block number of any buffer cache block in the copied extent
4787 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4788 * determine the new physical block number).
4789 *
4790 * At this point, for regular files, we hold the truncate lock exclusive
4791 * and the cnode lock exclusive.
4792 */
4793 static int
4794 hfs_copy_extent(
4795 struct hfsmount *hfsmp,
4796 struct vnode *vp, /* The file whose extent is being copied. */
4797 u_int32_t oldStart, /* The start of the source extent. */
4798 u_int32_t newStart, /* The start of the destination extent. */
4799 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4800 vfs_context_t context)
4801 {
4802 int err = 0;
4803 size_t bufferSize;
4804 void *buffer = NULL;
4805 struct vfsioattr ioattr;
4806 buf_t bp = NULL;
4807 off_t resid;
4808 size_t ioSize;
4809 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4810 daddr64_t srcSector, destSector;
4811 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4812 #if CONFIG_PROTECT
4813 int cpenabled = 0;
4814 #endif
4815
4816 /*
4817 * Sanity check that we have locked the vnode of the file we're copying.
4818 *
4819 * But since hfs_systemfile_lock() doesn't actually take the lock on
4820 * the allocation file if a journal is active, ignore the check if the
4821 * file being copied is the allocation file.
4822 */
4823 struct cnode *cp = VTOC(vp);
4824 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4825 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4826
4827 #if CONFIG_PROTECT
4828 /*
4829 * Prepare the CP blob and get it ready for use, if necessary.
4830 *
4831 * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
4832 * because they are implicitly protected via the media key on iOS. As such, they
4833 * must not be relocated except with the media key. So it is OK to not pass down
4834 * a special cpentry to the IOMedia/LwVM code for handling.
4835 */
4836 if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
4837 int cp_err = 0;
4838 /*
4839 * Ideally, the file whose extents we are about to manipulate is using the
4840 * newer offset-based IVs so that we can manipulate it regardless of the
4841 * current lock state. However, we must maintain support for older-style
4842 * EAs.
4843 *
4844 * For the older EA case, the IV was tied to the device LBA for file content.
4845 * This means that encrypted data cannot be moved from one location to another
4846 * in the filesystem without garbling the IV data. As a result, we need to
4847 * access the file's plaintext because we cannot do our AES-symmetry trick
4848 * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate)
4849 * to make forward progress. If the keys are unavailable then we will
4850 * simply stop the resize in its tracks here since we cannot move
4851 * this extent at this time.
4852 */
4853 if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
4854 cp_err = cp_handle_relocate(cp, hfsmp);
4855 }
4856
4857 if (cp_err) {
4858 printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
4859 return cp_err;
4860 }
4861
4862 cpenabled = 1;
4863 }
4864 #endif
4865
4866
4867 /*
4868 * Determine the I/O size to use
4869 *
4870 * NOTE: Many external drives will result in an ioSize of 128KB.
4871 * TODO: Should we use a larger buffer, doing several consecutive
4872 * reads, then several consecutive writes?
4873 */
4874 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4875 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4876 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4877 return ENOMEM;
4878
4879 /* Get a buffer for doing the I/O */
4880 bp = buf_alloc(hfsmp->hfs_devvp);
4881 buf_setdataptr(bp, (uintptr_t)buffer);
4882
4883 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4884 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4885 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4886 while (resid > 0) {
4887 ioSize = MIN(bufferSize, (size_t) resid);
4888 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4889
4890 /* Prepare the buffer for reading */
4891 buf_reset(bp, B_READ);
4892 buf_setsize(bp, ioSize);
4893 buf_setcount(bp, ioSize);
4894 buf_setblkno(bp, srcSector);
4895 buf_setlblkno(bp, srcSector);
4896
4897 /*
4898 * Note that because this is an I/O to the device vp
4899 * it is correct to have lblkno and blkno both point to the
4900 * start sector being read from. If it were being issued against the
4901 * underlying file then that would be different.
4902 */
4903
4904 /* Attach the new CP blob to the buffer if needed */
4905 #if CONFIG_PROTECT
4906 if (cpenabled) {
4907 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
4908 /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
4909 cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
4910 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
4911 }
4912 else {
4913 /*
4914 * Use the cnode's cp key. This file is tied to the
4915 * LBAs of the physical blocks that it occupies.
4916 */
4917 buf_setcpaddr (bp, cp->c_cpentry);
4918 }
4919
4920 /* Initialize the content protection file offset to start at 0 */
4921 buf_setcpoff (bp, 0);
4922 }
4923 #endif
4924
4925 /* Do the read */
4926 err = VNOP_STRATEGY(bp);
4927 if (!err)
4928 err = buf_biowait(bp);
4929 if (err) {
4930 #if CONFIG_PROTECT
4931 /* Turn the flag off in error cases. */
4932 if (cpenabled) {
4933 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
4934 }
4935 #endif
4936 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4937 break;
4938 }
4939
4940 /* Prepare the buffer for writing */
4941 buf_reset(bp, B_WRITE);
4942 buf_setsize(bp, ioSize);
4943 buf_setcount(bp, ioSize);
4944 buf_setblkno(bp, destSector);
4945 buf_setlblkno(bp, destSector);
4946 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4947 buf_markfua(bp);
4948
4949 #if CONFIG_PROTECT
4950 /* Attach the CP to the buffer if needed */
4951 if (cpenabled) {
4952 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
4953 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
4954 }
4955 else {
4956 /*
4957 * Use the cnode's CP key. This file is still tied
4958 * to the LBAs of the physical blocks that it occupies.
4959 */
4960 buf_setcpaddr (bp, cp->c_cpentry);
4961 }
4962 /*
4963 * The last STRATEGY call may have updated the cp file offset behind our
4964 * back, so we cannot trust it. Re-initialize the content protection
4965 * file offset back to 0 before initiating the write portion of this I/O.
4966 */
4967 buf_setcpoff (bp, 0);
4968 }
4969 #endif
4970
4971 /* Do the write */
4972 vnode_startwrite(hfsmp->hfs_devvp);
4973 err = VNOP_STRATEGY(bp);
4974 if (!err) {
4975 err = buf_biowait(bp);
4976 }
4977 #if CONFIG_PROTECT
4978 /* Turn the flag off regardless once the strategy call finishes. */
4979 if (cpenabled) {
4980 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
4981 }
4982 #endif
4983 if (err) {
4984 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4985 break;
4986 }
4987
4988 resid -= ioSize;
4989 srcSector += ioSizeSectors;
4990 destSector += ioSizeSectors;
4991 }
4992 if (bp)
4993 buf_free(bp);
4994 if (buffer)
4995 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4996
4997 /* Make sure all writes have been flushed to disk. */
4998 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4999 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5000 if (err) {
5001 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
5002 err = 0; /* Don't fail the copy. */
5003 }
5004 }
5005
5006 if (!err)
5007 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
5008
5009 return err;
5010 }
5011
5012
5013 /* Structure to store state of reclaiming extents from a
5014 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
5015 * initializes the values in this structure which are then
5016 * used by code that reclaims and splits the extents.
5017 */
5018 struct hfs_reclaim_extent_info {
5019 struct vnode *vp;
5020 u_int32_t fileID;
5021 u_int8_t forkType;
5022 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
5023 u_int8_t is_sysfile; /* Extent belongs to system file */
5024 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
5025 u_int8_t extent_index;
5026 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
5027 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
5028 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
5029 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
5030 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
5031 union record {
5032 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
5033 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
5034 } record;
5035 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
5036 * For catalog extent record, points to the correct
5037 * extent information in filefork. For overflow extent
5038 * record, or xattr record, points to extent record
5039 * in the structure above
5040 */
5041 struct cat_desc *dirlink_desc;
5042 struct cat_attr *dirlink_attr;
5043 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
5044 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
5045 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
5046 * use it for writing updated extent record
5047 */
5048 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
5049 u_int16_t recordlen;
5050 int overflow_count; /* For debugging, counter for overflow extent record */
5051 FCB *fcb; /* Pointer to the current btree being traversed */
5052 };
5053
5054 /*
5055 * Split the current extent into two extents, with first extent
5056 * to contain given number of allocation blocks. Splitting of
5057 * extent creates one new extent entry which can result in
5058 * shifting of many entries through all the extent records of a
5059 * file, and/or creating a new extent record in the overflow
5060 * extent btree.
5061 *
5062 * Example:
5063 * The diagram below represents two consecutive extent records,
5064 * for simplicity, lets call them record X and X+1 respectively.
5065 * Interesting extent entries have been denoted by letters.
5066 * If the letter is unchanged before and after split, it means
5067 * that the extent entry was not modified during the split.
5068 * A '.' means that the entry remains unchanged after the split
5069 * and is not relevant for our example. A '0' means that the
5070 * extent entry is empty.
5071 *
5072 * If there isn't sufficient contiguous free space to relocate
5073 * an extent (extent "C" below), we will have to break the one
5074 * extent into multiple smaller extents, and relocate each of
5075 * the smaller extents individually. The way we do this is by
5076 * finding the largest contiguous free space that is currently
5077 * available (N allocation blocks), and then convert extent "C"
5078 * into two extents, C1 and C2, that occupy exactly the same
5079 * allocation blocks as extent C. Extent C1 is the first
5080 * N allocation blocks of extent C, and extent C2 is the remainder
5081 * of extent C. Then we can relocate extent C1 since we know
5082 * we have enough contiguous free space to relocate it in its
5083 * entirety. We then repeat the process starting with extent C2.
5084 *
5085 * In record X, only the entries following entry C are shifted, and
5086 * the original entry C is replaced with two entries C1 and C2 which
5087 * are actually two extent entries for contiguous allocation blocks.
5088 *
5089 * Note that the entry E from record X is shifted into record X+1 as
5090 * the new first entry. Since the first entry of record X+1 is updated,
5091 * the FABN will also get updated with the blockCount of entry E.
5092 * This also results in shifting of all extent entries in record X+1.
5093 * Note that the number of empty entries after the split has been
5094 * changed from 3 to 2.
5095 *
5096 * Before:
5097 * record X record X+1
5098 * ---------------------===--------- ---------------------------------
5099 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
5100 * ---------------------===--------- ---------------------------------
5101 *
5102 * After:
5103 * ---------------------=======----- ---------------------------------
5104 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
5105 * ---------------------=======----- ---------------------------------
5106 *
5107 * C1.startBlock = C.startBlock
5108 * C1.blockCount = N
5109 *
5110 * C2.startBlock = C.startBlock + N
5111 * C2.blockCount = C.blockCount - N
5112 *
5113 * FABN = old FABN - E.blockCount
5114 *
5115 * Inputs:
5116 * extent_info - This is the structure that contains state about
5117 * the current file, extent, and extent record that
5118 * is being relocated. This structure is shared
5119 * among code that traverses through all the extents
5120 * of the file, code that relocates extents, and
5121 * code that splits the extent.
5122 * newBlockCount - The blockCount of the extent to be split after
5123 * successfully split operation.
5124 * Output:
5125 * Zero on success, non-zero on failure.
5126 */
5127 static int
5128 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
5129 {
5130 int error = 0;
5131 int index = extent_info->extent_index;
5132 int i;
5133 HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
5134 HFSPlusExtentDescriptor last_extent;
5135 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
5136 HFSPlusExtentRecord *extents_rec = NULL;
5137 HFSPlusExtentKey *extents_key = NULL;
5138 HFSPlusAttrRecord *xattr_rec = NULL;
5139 HFSPlusAttrKey *xattr_key = NULL;
5140 struct BTreeIterator iterator;
5141 struct FSBufferDescriptor btdata;
5142 uint16_t reclen;
5143 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
5144 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
5145 Boolean create_record = false;
5146 Boolean is_xattr;
5147 struct cnode *cp;
5148
5149 is_xattr = extent_info->is_xattr;
5150 extents = extent_info->extents;
5151 cp = VTOC(extent_info->vp);
5152
5153 if (newBlockCount == 0) {
5154 if (hfs_resize_debug) {
5155 printf ("hfs_split_extent: No splitting required for newBlockCount=0\n");
5156 }
5157 return error;
5158 }
5159
5160 if (hfs_resize_debug) {
5161 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
5162 }
5163
5164 /* Extents overflow btree can not have more than 8 extents.
5165 * No split allowed if the 8th extent is already used.
5166 */
5167 if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
5168 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
5169 error = ENOSPC;
5170 goto out;
5171 }
5172
5173 /* Determine the starting allocation block number for the following
5174 * overflow extent record, if any, before the current record
5175 * gets modified.
5176 */
5177 read_recStartBlock = extent_info->recStartBlock;
5178 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5179 if (extents[i].blockCount == 0) {
5180 break;
5181 }
5182 read_recStartBlock += extents[i].blockCount;
5183 }
5184
5185 /* Shift and split */
5186 if (index == kHFSPlusExtentDensity-1) {
5187 /* The new extent created after split will go into following overflow extent record */
5188 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
5189 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
5190
5191 /* Last extent in the record will be split, so nothing to shift */
5192 } else {
5193 /* Splitting of extents can result in at most of one
5194 * extent entry to be shifted into following overflow extent
5195 * record. So, store the last extent entry for later.
5196 */
5197 shift_extent = extents[kHFSPlusExtentDensity-1];
5198 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
5199 printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
5200 }
5201
5202 /* Start shifting extent information from the end of the extent
5203 * record to the index where we want to insert the new extent.
5204 * Note that kHFSPlusExtentDensity-1 is already saved above, and
5205 * does not need to be shifted. The extent entry that is being
5206 * split does not get shifted.
5207 */
5208 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
5209 if (hfs_resize_debug) {
5210 if (extents[i].blockCount) {
5211 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
5212 }
5213 }
5214 extents[i+1] = extents[i];
5215 }
5216 }
5217
5218 if (index == kHFSPlusExtentDensity-1) {
5219 /* The second half of the extent being split will be the overflow
5220 * entry that will go into following overflow extent record. The
5221 * value has been stored in 'shift_extent' above, so there is
5222 * nothing to be done here.
5223 */
5224 } else {
5225 /* Update the values in the second half of the extent being split
5226 * before updating the first half of the split. Note that the
5227 * extent to split or first half of the split is at index 'index'
5228 * and a new extent or second half of the split will be inserted at
5229 * 'index+1' or into following overflow extent record.
5230 */
5231 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
5232 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
5233 }
5234 /* Update the extent being split, only the block count will change */
5235 extents[index].blockCount = newBlockCount;
5236
5237 if (hfs_resize_debug) {
5238 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
5239 if (index != kHFSPlusExtentDensity-1) {
5240 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
5241 } else {
5242 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
5243 }
5244 }
5245
5246 /* Write out information about the newly split extent to the disk */
5247 if (extent_info->catalog_fp) {
5248 /* (extent_info->catalog_fp != NULL) means the newly split
5249 * extent exists in the catalog record. This means that
5250 * the cnode was updated. Therefore, to write out the changes,
5251 * mark the cnode as modified. We cannot call hfs_update()
5252 * in this function because the caller hfs_reclaim_extent()
5253 * is holding the catalog lock currently.
5254 */
5255 cp->c_flag |= C_MODIFIED;
5256 } else {
5257 /* The newly split extent is for large EAs or is in overflow
5258 * extent record, so update it directly in the btree using the
5259 * iterator information from the shared extent_info structure
5260 */
5261 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5262 &(extent_info->btdata), extent_info->recordlen);
5263 if (error) {
5264 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5265 goto out;
5266 }
5267 }
5268
5269 /* No extent entry to be shifted into another extent overflow record */
5270 if (shift_extent.blockCount == 0) {
5271 if (hfs_resize_debug) {
5272 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5273 }
5274 error = 0;
5275 goto out;
5276 }
5277
5278 /* The overflow extent entry has to be shifted into an extent
5279 * overflow record. This means that we might have to shift
5280 * extent entries from all subsequent overflow records by one.
5281 * We start iteration from the first record to the last record,
5282 * and shift the extent entry from one record to another.
5283 * We might have to create a new extent record for the last
5284 * extent entry for the file.
5285 */
5286
5287 /* Initialize iterator to search the next record */
5288 bzero(&iterator, sizeof(iterator));
5289 if (is_xattr) {
5290 /* Copy the key from the iterator that was used to update the modified attribute record. */
5291 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5292 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5293 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5294
5295 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5296 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5297 if (xattr_rec == NULL) {
5298 error = ENOMEM;
5299 goto out;
5300 }
5301 btdata.bufferAddress = xattr_rec;
5302 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5303 btdata.itemCount = 1;
5304 extents = xattr_rec->overflowExtents.extents;
5305 } else {
5306 /* Initialize the extent key for the current file */
5307 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5308 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5309 extents_key->forkType = extent_info->forkType;
5310 extents_key->fileID = extent_info->fileID;
5311 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5312
5313 MALLOC(extents_rec, HFSPlusExtentRecord *,
5314 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5315 if (extents_rec == NULL) {
5316 error = ENOMEM;
5317 goto out;
5318 }
5319 btdata.bufferAddress = extents_rec;
5320 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5321 btdata.itemCount = 1;
5322 extents = extents_rec[0];
5323 }
5324
5325 /* The overflow extent entry has to be shifted into an extent
5326 * overflow record. This means that we might have to shift
5327 * extent entries from all subsequent overflow records by one.
5328 * We start iteration from the first record to the last record,
5329 * examine one extent record in each iteration and shift one
5330 * extent entry from one record to another. We might have to
5331 * create a new extent record for the last extent entry for the
5332 * file.
5333 *
5334 * If shift_extent.blockCount is non-zero, it means that there is
5335 * an extent entry that needs to be shifted into the next
5336 * overflow extent record. We keep on going till there are no such
5337 * entries left to be shifted. This will also change the starting
5338 * allocation block number of the extent record which is part of
5339 * the key for the extent record in each iteration. Note that
5340 * because the extent record key is changing while we are searching,
5341 * the record can not be updated directly, instead it has to be
5342 * deleted and inserted again.
5343 */
5344 while (shift_extent.blockCount) {
5345 if (hfs_resize_debug) {
5346 printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5347 }
5348
5349 /* Search if there is any existing overflow extent record
5350 * that matches the current file and the logical start block
5351 * number.
5352 *
5353 * For this, the logical start block number in the key is
5354 * the value calculated based on the logical start block
5355 * number of the current extent record and the total number
5356 * of blocks existing in the current extent record.
5357 */
5358 if (is_xattr) {
5359 xattr_key->startBlock = read_recStartBlock;
5360 } else {
5361 extents_key->startBlock = read_recStartBlock;
5362 }
5363 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5364 if (error) {
5365 if (error != btNotFound) {
5366 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5367 goto out;
5368 }
5369 /* No matching record was found, so create a new extent record.
5370 * Note: Since no record was found, we can't rely on the
5371 * btree key in the iterator any longer. This will be initialized
5372 * later before we insert the record.
5373 */
5374 create_record = true;
5375 }
5376
5377 /* The extra extent entry from the previous record is being inserted
5378 * as the first entry in the current extent record. This will change
5379 * the file allocation block number (FABN) of the current extent
5380 * record, which is the startBlock value from the extent record key.
5381 * Since one extra entry is being inserted in the record, the new
5382 * FABN for the record will less than old FABN by the number of blocks
5383 * in the new extent entry being inserted at the start. We have to
5384 * do this before we update read_recStartBlock to point at the
5385 * startBlock of the following record.
5386 */
5387 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5388 if (hfs_resize_debug) {
5389 if (create_record) {
5390 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5391 }
5392 }
5393
5394 /* Now update the read_recStartBlock to account for total number
5395 * of blocks in this extent record. It will now point to the
5396 * starting allocation block number for the next extent record.
5397 */
5398 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5399 if (extents[i].blockCount == 0) {
5400 break;
5401 }
5402 read_recStartBlock += extents[i].blockCount;
5403 }
5404
5405 if (create_record == true) {
5406 /* Initialize new record content with only one extent entry */
5407 bzero(extents, sizeof(HFSPlusExtentRecord));
5408 /* The new record will contain only one extent entry */
5409 extents[0] = shift_extent;
5410 /* There are no more overflow extents to be shifted */
5411 shift_extent.startBlock = shift_extent.blockCount = 0;
5412
5413 if (is_xattr) {
5414 /* BTSearchRecord above returned btNotFound,
5415 * but since the attribute btree is never empty
5416 * if we are trying to insert new overflow
5417 * record for the xattrs, the extents_key will
5418 * contain correct data. So we don't need to
5419 * re-initialize it again like below.
5420 */
5421
5422 /* Initialize the new xattr record */
5423 xattr_rec->recordType = kHFSPlusAttrExtents;
5424 xattr_rec->overflowExtents.reserved = 0;
5425 reclen = sizeof(HFSPlusAttrExtents);
5426 } else {
5427 /* BTSearchRecord above returned btNotFound,
5428 * which means that extents_key content might
5429 * not correspond to the record that we are
5430 * trying to create, especially when the extents
5431 * overflow btree is empty. So we reinitialize
5432 * the extents_key again always.
5433 */
5434 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5435 extents_key->forkType = extent_info->forkType;
5436 extents_key->fileID = extent_info->fileID;
5437
5438 /* Initialize the new extent record */
5439 reclen = sizeof(HFSPlusExtentRecord);
5440 }
5441 } else {
5442 /* The overflow extent entry from previous record will be
5443 * the first entry in this extent record. If the last
5444 * extent entry in this record is valid, it will be shifted
5445 * into the following extent record as its first entry. So
5446 * save the last entry before shifting entries in current
5447 * record.
5448 */
5449 last_extent = extents[kHFSPlusExtentDensity-1];
5450
5451 /* Shift all entries by one index towards the end */
5452 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5453 extents[i+1] = extents[i];
5454 }
5455
5456 /* Overflow extent entry saved from previous record
5457 * is now the first entry in the current record.
5458 */
5459 extents[0] = shift_extent;
5460
5461 if (hfs_resize_debug) {
5462 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5463 }
5464
5465 /* The last entry from current record will be the
5466 * overflow entry which will be the first entry for
5467 * the following extent record.
5468 */
5469 shift_extent = last_extent;
5470
5471 /* Since the key->startBlock is being changed for this record,
5472 * it should be deleted and inserted with the new key.
5473 */
5474 error = BTDeleteRecord(extent_info->fcb, &iterator);
5475 if (error) {
5476 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5477 goto out;
5478 }
5479 if (hfs_resize_debug) {
5480 printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5481 }
5482 }
5483
5484 /* Insert the newly created or modified extent record */
5485 bzero(&iterator.hint, sizeof(iterator.hint));
5486 if (is_xattr) {
5487 xattr_key->startBlock = write_recStartBlock;
5488 } else {
5489 extents_key->startBlock = write_recStartBlock;
5490 }
5491 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5492 if (error) {
5493 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5494 goto out;
5495 }
5496 if (hfs_resize_debug) {
5497 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5498 }
5499 }
5500
5501 out:
5502 /*
5503 * Extents overflow btree or attributes btree headers might have
5504 * been modified during the split/shift operation, so flush the
5505 * changes to the disk while we are inside journal transaction.
5506 * We should only be able to generate I/O that modifies the B-Tree
5507 * header nodes while we're in the middle of a journal transaction.
5508 * Otherwise it might result in panic during unmount.
5509 */
5510 BTFlushPath(extent_info->fcb);
5511
5512 if (extents_rec) {
5513 FREE (extents_rec, M_TEMP);
5514 }
5515 if (xattr_rec) {
5516 FREE (xattr_rec, M_TEMP);
5517 }
5518 return error;
5519 }
5520
5521
5522 /*
5523 * Relocate an extent if it lies beyond the expected end of volume.
5524 *
5525 * This function is called for every extent of the file being relocated.
5526 * It allocates space for relocation, copies the data, deallocates
5527 * the old extent, and update corresponding on-disk extent. If the function
5528 * does not find contiguous space to relocate an extent, it splits the
5529 * extent in smaller size to be able to relocate it out of the area of
5530 * disk being reclaimed. As an optimization, if an extent lies partially
5531 * in the area of the disk being reclaimed, it is split so that we only
5532 * have to relocate the area that was overlapping with the area of disk
5533 * being reclaimed.
5534 *
5535 * Note that every extent is relocated in its own transaction so that
5536 * they do not overwhelm the journal. This function handles the extent
5537 * record that exists in the catalog record, extent record from overflow
5538 * extents btree, and extents for large EAs.
5539 *
5540 * Inputs:
5541 * extent_info - This is the structure that contains state about
5542 * the current file, extent, and extent record that
5543 * is being relocated. This structure is shared
5544 * among code that traverses through all the extents
5545 * of the file, code that relocates extents, and
5546 * code that splits the extent.
5547 */
5548 static int
5549 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5550 {
5551 int error = 0;
5552 int index;
5553 struct cnode *cp;
5554 u_int32_t oldStartBlock;
5555 u_int32_t oldBlockCount;
5556 u_int32_t newStartBlock;
5557 u_int32_t newBlockCount;
5558 u_int32_t roundedBlockCount;
5559 uint16_t node_size;
5560 uint32_t remainder_blocks;
5561 u_int32_t alloc_flags;
5562 int blocks_allocated = false;
5563
5564 index = extent_info->extent_index;
5565 cp = VTOC(extent_info->vp);
5566
5567 oldStartBlock = extent_info->extents[index].startBlock;
5568 oldBlockCount = extent_info->extents[index].blockCount;
5569
5570 if (0 && hfs_resize_debug) {
5571 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5572 }
5573
5574 /* If the current extent lies completely within allocLimit,
5575 * it does not require any relocation.
5576 */
5577 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5578 extent_info->cur_blockCount += oldBlockCount;
5579 return error;
5580 }
5581
5582 /* Every extent should be relocated in its own transaction
5583 * to make sure that we don't overflow the journal buffer.
5584 */
5585 error = hfs_start_transaction(hfsmp);
5586 if (error) {
5587 return error;
5588 }
5589 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5590
5591 /* Check if the extent lies partially in the area to reclaim,
5592 * i.e. it starts before allocLimit and ends beyond allocLimit.
5593 * We have already skipped extents that lie completely within
5594 * allocLimit in the check above, so we only check for the
5595 * startBlock. If it lies partially, split it so that we
5596 * only relocate part of the extent.
5597 */
5598 if (oldStartBlock < allocLimit) {
5599 newBlockCount = allocLimit - oldStartBlock;
5600
5601 if (hfs_resize_debug) {
5602 int idx = extent_info->extent_index;
5603 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5604 }
5605
5606 /* If the extent belongs to a btree, check and trim
5607 * it to be multiple of the node size.
5608 */
5609 if (extent_info->is_sysfile) {
5610 node_size = get_btree_nodesize(extent_info->vp);
5611 /* If the btree node size is less than the block size,
5612 * splitting this extent will not split a node across
5613 * different extents. So we only check and trim if
5614 * node size is more than the allocation block size.
5615 */
5616 if (node_size > hfsmp->blockSize) {
5617 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5618 if (remainder_blocks) {
5619 newBlockCount -= remainder_blocks;
5620 if (hfs_resize_debug) {
5621 printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5622 }
5623 }
5624 }
5625 /* The newBlockCount is zero because of rounding-down so that
5626 * btree nodes are not split across extents. Therefore this
5627 * straddling extent across resize-boundary does not require
5628 * splitting. Skip over to relocating of complete extent.
5629 */
5630 if (newBlockCount == 0) {
5631 if (hfs_resize_debug) {
5632 printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n");
5633 }
5634 goto relocate_full_extent;
5635 }
5636 }
5637
5638 /* Split the extents into two parts --- the first extent lies
5639 * completely within allocLimit and therefore does not require
5640 * relocation. The second extent will require relocation which
5641 * will be handled when the caller calls this function again
5642 * for the next extent.
5643 */
5644 error = hfs_split_extent(extent_info, newBlockCount);
5645 if (error == 0) {
5646 /* Split success, no relocation required */
5647 goto out;
5648 }
5649 /* Split failed, so try to relocate entire extent */
5650 if (hfs_resize_debug) {
5651 int idx = extent_info->extent_index;
5652 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5653 }
5654 }
5655
5656 relocate_full_extent:
5657 /* At this point, the current extent requires relocation.
5658 * We will try to allocate space equal to the size of the extent
5659 * being relocated first to try to relocate it without splitting.
5660 * If the allocation fails, we will try to allocate contiguous
5661 * blocks out of metadata zone. If that allocation also fails,
5662 * then we will take a whatever contiguous block run is returned
5663 * by the allocation, split the extent into two parts, and then
5664 * relocate the first splitted extent.
5665 */
5666 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5667 if (extent_info->is_sysfile) {
5668 alloc_flags |= HFS_ALLOC_METAZONE;
5669 }
5670
5671 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5672 &newStartBlock, &newBlockCount);
5673 if ((extent_info->is_sysfile == false) &&
5674 ((error == dskFulErr) || (error == ENOSPC))) {
5675 /* For non-system files, try reallocating space in metadata zone */
5676 alloc_flags |= HFS_ALLOC_METAZONE;
5677 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5678 alloc_flags, &newStartBlock, &newBlockCount);
5679 }
5680 if ((error == dskFulErr) || (error == ENOSPC)) {
5681 /* We did not find desired contiguous space for this extent.
5682 * So try to allocate the maximum contiguous space available.
5683 */
5684 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5685
5686 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5687 alloc_flags, &newStartBlock, &newBlockCount);
5688 if (error) {
5689 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5690 goto out;
5691 }
5692 blocks_allocated = true;
5693
5694 /* The number of blocks allocated is less than the requested
5695 * number of blocks. For btree extents, check and trim the
5696 * extent to be multiple of the node size.
5697 */
5698 if (extent_info->is_sysfile) {
5699 node_size = get_btree_nodesize(extent_info->vp);
5700 if (node_size > hfsmp->blockSize) {
5701 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5702 if (remainder_blocks) {
5703 roundedBlockCount = newBlockCount - remainder_blocks;
5704 /* Free tail-end blocks of the newly allocated extent */
5705 BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
5706 newBlockCount - roundedBlockCount,
5707 HFS_ALLOC_SKIPFREEBLKS);
5708 newBlockCount = roundedBlockCount;
5709 if (hfs_resize_debug) {
5710 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5711 }
5712 if (newBlockCount == 0) {
5713 printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
5714 error = ENOSPC;
5715 goto out;
5716 }
5717 }
5718 }
5719 }
5720
5721 /* The number of blocks allocated is less than the number of
5722 * blocks requested, so split this extent --- the first extent
5723 * will be relocated as part of this function call and the caller
5724 * will handle relocating the second extent by calling this
5725 * function again for the second extent.
5726 */
5727 error = hfs_split_extent(extent_info, newBlockCount);
5728 if (error) {
5729 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5730 goto out;
5731 }
5732 oldBlockCount = newBlockCount;
5733 }
5734 if (error) {
5735 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5736 goto out;
5737 }
5738 blocks_allocated = true;
5739
5740 /* Copy data from old location to new location */
5741 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5742 newStartBlock, newBlockCount, context);
5743 if (error) {
5744 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5745 goto out;
5746 }
5747
5748 /* Update the extent record with the new start block information */
5749 extent_info->extents[index].startBlock = newStartBlock;
5750
5751 /* Sync the content back to the disk */
5752 if (extent_info->catalog_fp) {
5753 /* Update the extents in catalog record */
5754 if (extent_info->is_dirlink) {
5755 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5756 extent_info->dirlink_desc, extent_info->dirlink_attr,
5757 &(extent_info->dirlink_fork->ff_data));
5758 } else {
5759 cp->c_flag |= C_MODIFIED;
5760 /* If this is a system file, sync volume headers on disk */
5761 if (extent_info->is_sysfile) {
5762 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5763 }
5764 }
5765 } else {
5766 /* Replace record for extents overflow or extents-based xattrs */
5767 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5768 &(extent_info->btdata), extent_info->recordlen);
5769 }
5770 if (error) {
5771 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5772 goto out;
5773 }
5774
5775 /* Deallocate the old extent */
5776 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5777 if (error) {
5778 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5779 goto out;
5780 }
5781 extent_info->blocks_relocated += newBlockCount;
5782
5783 if (hfs_resize_debug) {
5784 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5785 }
5786
5787 out:
5788 if (error != 0) {
5789 if (blocks_allocated == true) {
5790 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5791 }
5792 } else {
5793 /* On success, increment the total allocation blocks processed */
5794 extent_info->cur_blockCount += newBlockCount;
5795 }
5796
5797 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5798
5799 /* For a non-system file, if an extent entry from catalog record
5800 * was modified, sync the in-memory changes to the catalog record
5801 * on disk before ending the transaction.
5802 */
5803 if ((extent_info->catalog_fp) &&
5804 (extent_info->is_sysfile == false)) {
5805 (void) hfs_update(extent_info->vp, MNT_WAIT);
5806 }
5807
5808 hfs_end_transaction(hfsmp);
5809
5810 return error;
5811 }
5812
5813 /* Report intermediate progress during volume resize */
5814 static void
5815 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5816 {
5817 u_int32_t cur_progress = 0;
5818
5819 hfs_resize_progress(hfsmp, &cur_progress);
5820 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5821 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5822 hfsmp->hfs_resize_progress = cur_progress;
5823 }
5824 return;
5825 }
5826
5827 /*
5828 * Reclaim space at the end of a volume for given file and forktype.
5829 *
5830 * This routine attempts to move any extent which contains allocation blocks
5831 * at or after "allocLimit." A separate transaction is used for every extent
5832 * that needs to be moved. If there is not contiguous space available for
5833 * moving an extent, it can be split into smaller extents. The contents of
5834 * any moved extents are read and written via the volume's device vnode --
5835 * NOT via "vp." During the move, moved blocks which are part of a transaction
5836 * have their physical block numbers invalidated so they will eventually be
5837 * written to their new locations.
5838 *
5839 * This function is also called for directory hard links. Directory hard links
5840 * are regular files with no data fork and resource fork that contains alias
5841 * information for backward compatibility with pre-Leopard systems. However
5842 * non-Mac OS X implementation can add/modify data fork or resource fork
5843 * information to directory hard links, so we check, and if required, relocate
5844 * both data fork and resource fork.
5845 *
5846 * Inputs:
5847 * hfsmp The volume being resized.
5848 * vp The vnode for the system file.
5849 * fileID ID of the catalog record that needs to be relocated
5850 * forktype The type of fork that needs relocated,
5851 * kHFSResourceForkType for resource fork,
5852 * kHFSDataForkType for data fork
5853 * allocLimit Allocation limit for the new volume size,
5854 * do not use this block or beyond. All extents
5855 * that use this block or any blocks beyond this limit
5856 * will be relocated.
5857 *
5858 * Side Effects:
5859 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5860 * blocks that were relocated.
5861 */
5862 static int
5863 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5864 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5865 {
5866 int error = 0;
5867 struct hfs_reclaim_extent_info *extent_info;
5868 int i;
5869 int lockflags = 0;
5870 struct cnode *cp;
5871 struct filefork *fp;
5872 int took_truncate_lock = false;
5873 int release_desc = false;
5874 HFSPlusExtentKey *key;
5875
5876 /* If there is no vnode for this file, then there's nothing to do. */
5877 if (vp == NULL) {
5878 return 0;
5879 }
5880
5881 cp = VTOC(vp);
5882
5883 if (hfs_resize_debug) {
5884 const char *filename = (const char *) cp->c_desc.cd_nameptr;
5885 int namelen = cp->c_desc.cd_namelen;
5886
5887 if (filename == NULL) {
5888 filename = "";
5889 namelen = 0;
5890 }
5891 printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
5892 }
5893
5894 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5895 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5896 if (extent_info == NULL) {
5897 return ENOMEM;
5898 }
5899 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5900 extent_info->vp = vp;
5901 extent_info->fileID = fileID;
5902 extent_info->forkType = forktype;
5903 extent_info->is_sysfile = vnode_issystem(vp);
5904 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5905 extent_info->is_dirlink = true;
5906 }
5907 /* We always need allocation bitmap and extent btree lock */
5908 lockflags = SFL_BITMAP | SFL_EXTENTS;
5909 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5910 lockflags |= SFL_CATALOG;
5911 } else if (fileID == kHFSAttributesFileID) {
5912 lockflags |= SFL_ATTRIBUTE;
5913 } else if (fileID == kHFSStartupFileID) {
5914 lockflags |= SFL_STARTUP;
5915 }
5916 extent_info->lockflags = lockflags;
5917 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5918
5919 /* Flush data associated with current file on disk.
5920 *
5921 * If the current vnode is directory hard link, no flushing of
5922 * journal or vnode is required. The current kernel does not
5923 * modify data/resource fork of directory hard links, so nothing
5924 * will be in the cache. If a directory hard link is newly created,
5925 * the resource fork data is written directly using devvp and
5926 * the code that actually relocates data (hfs_copy_extent()) also
5927 * uses devvp for its I/O --- so they will see a consistent copy.
5928 */
5929 if (extent_info->is_sysfile) {
5930 /* If the current vnode is system vnode, flush journal
5931 * to make sure that all data is written to the disk.
5932 */
5933 error = hfs_journal_flush(hfsmp, TRUE);
5934 if (error) {
5935 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5936 goto out;
5937 }
5938 } else if (extent_info->is_dirlink == false) {
5939 /* Flush all blocks associated with this regular file vnode.
5940 * Normally there should not be buffer cache blocks for regular
5941 * files, but for objects like symlinks, we can have buffer cache
5942 * blocks associated with the vnode. Therefore we call
5943 * buf_flushdirtyblks() also.
5944 */
5945 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5946
5947 hfs_unlock(cp);
5948 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5949 took_truncate_lock = true;
5950 (void) cluster_push(vp, 0);
5951 error = hfs_lock(cp, HFS_FORCE_LOCK);
5952 if (error) {
5953 goto out;
5954 }
5955
5956 /* If the file no longer exists, nothing left to do */
5957 if (cp->c_flag & C_NOEXISTS) {
5958 error = 0;
5959 goto out;
5960 }
5961
5962 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5963 * be copying consistent bits. (Otherwise, it's possible that an async
5964 * write will complete to the old extent after we read from it. That
5965 * could lead to corruption.)
5966 */
5967 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5968 if (error) {
5969 goto out;
5970 }
5971 }
5972
5973 if (hfs_resize_debug) {
5974 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5975 }
5976
5977 if (extent_info->is_dirlink) {
5978 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5979 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5980 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5981 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5982 MALLOC(extent_info->dirlink_fork, struct filefork *,
5983 sizeof(struct filefork), M_TEMP, M_WAITOK);
5984 if ((extent_info->dirlink_desc == NULL) ||
5985 (extent_info->dirlink_attr == NULL) ||
5986 (extent_info->dirlink_fork == NULL)) {
5987 error = ENOMEM;
5988 goto out;
5989 }
5990
5991 /* Lookup catalog record for directory hard link and
5992 * create a fake filefork for the value looked up from
5993 * the disk.
5994 */
5995 fp = extent_info->dirlink_fork;
5996 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5997 extent_info->dirlink_fork->ff_cp = cp;
5998 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5999 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
6000 extent_info->dirlink_desc, extent_info->dirlink_attr,
6001 &(extent_info->dirlink_fork->ff_data));
6002 hfs_systemfile_unlock(hfsmp, lockflags);
6003 if (error) {
6004 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
6005 goto out;
6006 }
6007 release_desc = true;
6008 } else {
6009 fp = VTOF(vp);
6010 }
6011
6012 extent_info->catalog_fp = fp;
6013 extent_info->recStartBlock = 0;
6014 extent_info->extents = extent_info->catalog_fp->ff_extents;
6015 /* Relocate extents from the catalog record */
6016 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6017 if (fp->ff_extents[i].blockCount == 0) {
6018 break;
6019 }
6020 extent_info->extent_index = i;
6021 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6022 if (error) {
6023 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
6024 goto out;
6025 }
6026 }
6027
6028 /* If the number of allocation blocks processed for reclaiming
6029 * are less than total number of blocks for the file, continuing
6030 * working on overflow extents record.
6031 */
6032 if (fp->ff_blocks <= extent_info->cur_blockCount) {
6033 if (0 && hfs_resize_debug) {
6034 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6035 }
6036 goto out;
6037 }
6038
6039 if (hfs_resize_debug) {
6040 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6041 }
6042
6043 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6044 if (extent_info->iterator == NULL) {
6045 error = ENOMEM;
6046 goto out;
6047 }
6048 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6049 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
6050 key->keyLength = kHFSPlusExtentKeyMaximumLength;
6051 key->forkType = forktype;
6052 key->fileID = fileID;
6053 key->startBlock = extent_info->cur_blockCount;
6054
6055 extent_info->btdata.bufferAddress = extent_info->record.overflow;
6056 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
6057 extent_info->btdata.itemCount = 1;
6058
6059 extent_info->catalog_fp = NULL;
6060
6061 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
6062 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6063 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6064 &(extent_info->btdata), &(extent_info->recordlen),
6065 extent_info->iterator);
6066 hfs_systemfile_unlock(hfsmp, lockflags);
6067 while (error == 0) {
6068 extent_info->overflow_count++;
6069 extent_info->recStartBlock = key->startBlock;
6070 extent_info->extents = extent_info->record.overflow;
6071 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6072 if (extent_info->record.overflow[i].blockCount == 0) {
6073 goto out;
6074 }
6075 extent_info->extent_index = i;
6076 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6077 if (error) {
6078 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
6079 goto out;
6080 }
6081 }
6082
6083 /* Look for more overflow records */
6084 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6085 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6086 extent_info->iterator, &(extent_info->btdata),
6087 &(extent_info->recordlen));
6088 hfs_systemfile_unlock(hfsmp, lockflags);
6089 if (error) {
6090 break;
6091 }
6092 /* Stop when we encounter a different file or fork. */
6093 if ((key->fileID != fileID) || (key->forkType != forktype)) {
6094 break;
6095 }
6096 }
6097 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6098 error = 0;
6099 }
6100
6101 out:
6102 /* If any blocks were relocated, account them and report progress */
6103 if (extent_info->blocks_relocated) {
6104 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6105 hfs_truncatefs_progress(hfsmp);
6106 if (fileID < kHFSFirstUserCatalogNodeID) {
6107 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
6108 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
6109 }
6110 }
6111 if (extent_info->iterator) {
6112 FREE(extent_info->iterator, M_TEMP);
6113 }
6114 if (release_desc == true) {
6115 cat_releasedesc(extent_info->dirlink_desc);
6116 }
6117 if (extent_info->dirlink_desc) {
6118 FREE(extent_info->dirlink_desc, M_TEMP);
6119 }
6120 if (extent_info->dirlink_attr) {
6121 FREE(extent_info->dirlink_attr, M_TEMP);
6122 }
6123 if (extent_info->dirlink_fork) {
6124 FREE(extent_info->dirlink_fork, M_TEMP);
6125 }
6126 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
6127 (void) hfs_update(vp, MNT_WAIT);
6128 }
6129 if (took_truncate_lock) {
6130 hfs_unlock_truncate(cp, 0);
6131 }
6132 if (extent_info) {
6133 FREE(extent_info, M_TEMP);
6134 }
6135 if (hfs_resize_debug) {
6136 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
6137 }
6138
6139 return error;
6140 }
6141
6142
6143 /*
6144 * This journal_relocate callback updates the journal info block to point
6145 * at the new journal location. This write must NOT be done using the
6146 * transaction. We must write the block immediately. We must also force
6147 * it to get to the media so that the new journal location will be seen by
6148 * the replay code before we can safely let journaled blocks be written
6149 * to their normal locations.
6150 *
6151 * The tests for journal_uses_fua below are mildly hacky. Since the journal
6152 * and the file system are both on the same device, I'm leveraging what
6153 * the journal has decided about FUA.
6154 */
6155 struct hfs_journal_relocate_args {
6156 struct hfsmount *hfsmp;
6157 vfs_context_t context;
6158 u_int32_t newStartBlock;
6159 u_int32_t newBlockCount;
6160 };
6161
6162 static errno_t
6163 hfs_journal_relocate_callback(void *_args)
6164 {
6165 int error;
6166 struct hfs_journal_relocate_args *args = _args;
6167 struct hfsmount *hfsmp = args->hfsmp;
6168 buf_t bp;
6169 JournalInfoBlock *jibp;
6170
6171 error = buf_meta_bread(hfsmp->hfs_devvp,
6172 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6173 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
6174 if (error) {
6175 printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
6176 if (bp) {
6177 buf_brelse(bp);
6178 }
6179 return error;
6180 }
6181 jibp = (JournalInfoBlock*) buf_dataptr(bp);
6182 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
6183 jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
6184 if (journal_uses_fua(hfsmp->jnl))
6185 buf_markfua(bp);
6186 error = buf_bwrite(bp);
6187 if (error) {
6188 printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
6189 return error;
6190 }
6191 if (!journal_uses_fua(hfsmp->jnl)) {
6192 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
6193 if (error) {
6194 printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6195 error = 0; /* Don't fail the operation. */
6196 }
6197 }
6198
6199 return error;
6200 }
6201
6202
6203 /* Type of resize operation in progress */
6204 #define HFS_RESIZE_TRUNCATE 1
6205 #define HFS_RESIZE_EXTEND 2
6206
6207 /*
6208 * Core function to relocate the journal file. This function takes the
6209 * journal size of the newly relocated journal --- the caller can
6210 * provide a new journal size if they want to change the size of
6211 * the journal. The function takes care of updating the journal info
6212 * block and all other data structures correctly.
6213 *
6214 * Note: This function starts a transaction and grabs the btree locks.
6215 */
6216 static int
6217 hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
6218 {
6219 int error;
6220 int journal_err;
6221 int lockflags;
6222 u_int32_t oldStartBlock;
6223 u_int32_t newStartBlock;
6224 u_int32_t oldBlockCount;
6225 u_int32_t newBlockCount;
6226 u_int32_t jnlBlockCount;
6227 u_int32_t alloc_skipfreeblks;
6228 struct cat_desc journal_desc;
6229 struct cat_attr journal_attr;
6230 struct cat_fork journal_fork;
6231 struct hfs_journal_relocate_args callback_args;
6232
6233 /* Calculate the number of allocation blocks required for the journal */
6234 jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
6235
6236 /*
6237 * During truncatefs(), the volume free block count is updated
6238 * before relocating data and reflects the total number of free
6239 * blocks that will exist on volume after the resize is successful.
6240 * This means that the allocation blocks required for relocation
6241 * have already been reserved and accounted for in the free block
6242 * count. Therefore, block allocation and deallocation routines
6243 * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS
6244 * flag.
6245 *
6246 * This special handling is not required when the file system
6247 * is being extended as we want all the allocated and deallocated
6248 * blocks to be accounted for correctly.
6249 */
6250 if (resize_type == HFS_RESIZE_TRUNCATE) {
6251 alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
6252 } else {
6253 alloc_skipfreeblks = 0;
6254 }
6255
6256 error = hfs_start_transaction(hfsmp);
6257 if (error) {
6258 printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
6259 return error;
6260 }
6261 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6262
6263 error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount,
6264 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | alloc_skipfreeblks,
6265 &newStartBlock, &newBlockCount);
6266 if (error) {
6267 printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
6268 goto fail;
6269 }
6270 if (newBlockCount != jnlBlockCount) {
6271 printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
6272 goto free_fail;
6273 }
6274
6275 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork);
6276 if (error) {
6277 printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
6278 goto free_fail;
6279 }
6280
6281 oldStartBlock = journal_fork.cf_extents[0].startBlock;
6282 oldBlockCount = journal_fork.cf_extents[0].blockCount;
6283 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
6284 if (error) {
6285 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6286 goto free_fail;
6287 }
6288
6289 /* Update the catalog record for .journal */
6290 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
6291 journal_fork.cf_extents[0].startBlock = newStartBlock;
6292 journal_fork.cf_extents[0].blockCount = newBlockCount;
6293 journal_fork.cf_blocks = newBlockCount;
6294 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
6295 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
6296 if (error) {
6297 printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
6298 goto free_fail;
6299 }
6300
6301 /*
6302 * If the journal is part of the file system, then tell the journal
6303 * code about the new location. If the journal is on an external
6304 * device, then just keep using it as-is.
6305 */
6306 if (hfsmp->jvp == hfsmp->hfs_devvp) {
6307 callback_args.hfsmp = hfsmp;
6308 callback_args.context = context;
6309 callback_args.newStartBlock = newStartBlock;
6310 callback_args.newBlockCount = newBlockCount;
6311
6312 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
6313 (off_t)newBlockCount*hfsmp->blockSize, 0,
6314 hfs_journal_relocate_callback, &callback_args);
6315 if (error) {
6316 /* NOTE: journal_relocate will mark the journal invalid. */
6317 printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
6318 goto fail;
6319 }
6320 if (hfs_resize_debug) {
6321 printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
6322 }
6323 hfsmp->jnl_start = newStartBlock;
6324 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
6325 }
6326
6327 hfs_systemfile_unlock(hfsmp, lockflags);
6328 error = hfs_end_transaction(hfsmp);
6329 if (error) {
6330 printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
6331 }
6332
6333 return error;
6334
6335 free_fail:
6336 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
6337 if (journal_err) {
6338 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6339 hfs_mark_volume_inconsistent(hfsmp);
6340 }
6341 fail:
6342 hfs_systemfile_unlock(hfsmp, lockflags);
6343 (void) hfs_end_transaction(hfsmp);
6344 if (hfs_resize_debug) {
6345 printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
6346 }
6347 return error;
6348 }
6349
6350
6351 /*
6352 * Relocate the journal file when the file system is being truncated.
6353 * We do not down-size the journal when the file system size is
6354 * reduced, so we always provide the current journal size to the
6355 * relocate code.
6356 */
6357 static int
6358 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6359 {
6360 int error = 0;
6361 u_int32_t startBlock;
6362 u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6363
6364 /*
6365 * Figure out the location of the .journal file. When the journal
6366 * is on an external device, we need to look up the .journal file.
6367 */
6368 if (hfsmp->jvp == hfsmp->hfs_devvp) {
6369 startBlock = hfsmp->jnl_start;
6370 blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6371 } else {
6372 u_int32_t fileid;
6373 u_int32_t old_jnlfileid;
6374 struct cat_attr attr;
6375 struct cat_fork fork;
6376
6377 /*
6378 * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
6379 * is set, and it is trying to hide the .journal file. So temporarily
6380 * unset the field while calling GetFileInfo.
6381 */
6382 old_jnlfileid = hfsmp->hfs_jnlfileid;
6383 hfsmp->hfs_jnlfileid = 0;
6384 fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
6385 hfsmp->hfs_jnlfileid = old_jnlfileid;
6386 if (fileid != old_jnlfileid) {
6387 printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
6388 return EIO;
6389 }
6390
6391 startBlock = fork.cf_extents[0].startBlock;
6392 blockCount = fork.cf_extents[0].blockCount;
6393 }
6394
6395 if (startBlock + blockCount <= allocLimit) {
6396 /* The journal file does not require relocation */
6397 return 0;
6398 }
6399
6400 error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
6401 if (error == 0) {
6402 hfsmp->hfs_resize_blocksmoved += blockCount;
6403 hfs_truncatefs_progress(hfsmp);
6404 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
6405 blockCount, hfsmp->vcbVN);
6406 }
6407
6408 return error;
6409 }
6410
6411
6412 /*
6413 * Move the journal info block to a new location. We have to make sure the
6414 * new copy of the journal info block gets to the media first, then change
6415 * the field in the volume header and the catalog record.
6416 */
6417 static int
6418 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6419 {
6420 int error;
6421 int journal_err;
6422 int lockflags;
6423 u_int32_t oldBlock;
6424 u_int32_t newBlock;
6425 u_int32_t blockCount;
6426 struct cat_desc jib_desc;
6427 struct cat_attr jib_attr;
6428 struct cat_fork jib_fork;
6429 buf_t old_bp, new_bp;
6430
6431 if (hfsmp->vcbJinfoBlock <= allocLimit) {
6432 /* The journal info block does not require relocation */
6433 return 0;
6434 }
6435
6436 error = hfs_start_transaction(hfsmp);
6437 if (error) {
6438 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
6439 return error;
6440 }
6441 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6442
6443 error = BlockAllocate(hfsmp, 1, 1, 1,
6444 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
6445 &newBlock, &blockCount);
6446 if (error) {
6447 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
6448 goto fail;
6449 }
6450 if (blockCount != 1) {
6451 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
6452 goto free_fail;
6453 }
6454 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
6455 if (error) {
6456 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6457 goto free_fail;
6458 }
6459
6460 /* Copy the old journal info block content to the new location */
6461 error = buf_meta_bread(hfsmp->hfs_devvp,
6462 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6463 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
6464 if (error) {
6465 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
6466 if (old_bp) {
6467 buf_brelse(old_bp);
6468 }
6469 goto free_fail;
6470 }
6471 new_bp = buf_getblk(hfsmp->hfs_devvp,
6472 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6473 hfsmp->blockSize, 0, 0, BLK_META);
6474 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
6475 buf_brelse(old_bp);
6476 if (journal_uses_fua(hfsmp->jnl))
6477 buf_markfua(new_bp);
6478 error = buf_bwrite(new_bp);
6479 if (error) {
6480 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
6481 goto free_fail;
6482 }
6483 if (!journal_uses_fua(hfsmp->jnl)) {
6484 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
6485 if (error) {
6486 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6487 /* Don't fail the operation. */
6488 }
6489 }
6490
6491 /* Update the catalog record for .journal_info_block */
6492 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork);
6493 if (error) {
6494 printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
6495 goto fail;
6496 }
6497 oldBlock = jib_fork.cf_extents[0].startBlock;
6498 jib_fork.cf_size = hfsmp->blockSize;
6499 jib_fork.cf_extents[0].startBlock = newBlock;
6500 jib_fork.cf_extents[0].blockCount = 1;
6501 jib_fork.cf_blocks = 1;
6502 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
6503 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
6504 if (error) {
6505 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6506 goto fail;
6507 }
6508
6509 /* Update the pointer to the journal info block in the volume header. */
6510 hfsmp->vcbJinfoBlock = newBlock;
6511 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6512 if (error) {
6513 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6514 goto fail;
6515 }
6516 hfs_systemfile_unlock(hfsmp, lockflags);
6517 error = hfs_end_transaction(hfsmp);
6518 if (error) {
6519 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6520 }
6521 error = hfs_journal_flush(hfsmp, FALSE);
6522 if (error) {
6523 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6524 }
6525
6526 /* Account for the block relocated and print progress */
6527 hfsmp->hfs_resize_blocksmoved += 1;
6528 hfs_truncatefs_progress(hfsmp);
6529 if (!error) {
6530 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6531 hfsmp->vcbVN);
6532 if (hfs_resize_debug) {
6533 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6534 }
6535 }
6536 return error;
6537
6538 free_fail:
6539 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6540 if (journal_err) {
6541 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6542 hfs_mark_volume_inconsistent(hfsmp);
6543 }
6544
6545 fail:
6546 hfs_systemfile_unlock(hfsmp, lockflags);
6547 (void) hfs_end_transaction(hfsmp);
6548 if (hfs_resize_debug) {
6549 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6550 }
6551 return error;
6552 }
6553
6554
6555 static u_int64_t
6556 calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count)
6557 {
6558 u_int64_t journal_size;
6559 u_int32_t journal_scale;
6560
6561 #define DEFAULT_JOURNAL_SIZE (8*1024*1024)
6562 #define MAX_JOURNAL_SIZE (512*1024*1024)
6563
6564 /* Calculate the journal size for this volume. We want
6565 * at least 8 MB of journal for each 100 GB of disk space.
6566 * We cap the size at 512 MB, unless the allocation block
6567 * size is larger, in which case, we use one allocation
6568 * block.
6569 */
6570 journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
6571 journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
6572 if (journal_size > MAX_JOURNAL_SIZE) {
6573 journal_size = MAX_JOURNAL_SIZE;
6574 }
6575 if (journal_size < hfsmp->blockSize) {
6576 journal_size = hfsmp->blockSize;
6577 }
6578 return journal_size;
6579 }
6580
6581
6582 /*
6583 * Calculate the expected journal size based on current partition size.
6584 * If the size of the current journal is less than the calculated size,
6585 * force journal relocation with the new journal size.
6586 */
6587 static int
6588 hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
6589 {
6590 int error = 0;
6591 u_int64_t calc_journal_size;
6592
6593 if (hfsmp->jvp != hfsmp->hfs_devvp) {
6594 if (hfs_resize_debug) {
6595 printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
6596 }
6597 return 0;
6598 }
6599
6600 calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
6601 if (calc_journal_size <= hfsmp->jnl_size) {
6602 /* The journal size requires no modification */
6603 goto out;
6604 }
6605
6606 if (hfs_resize_debug) {
6607 printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
6608 }
6609
6610 /* Extend the journal to the new calculated size */
6611 error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
6612 if (error == 0) {
6613 printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n",
6614 hfsmp->jnl_size, hfsmp->vcbVN);
6615 }
6616 out:
6617 return error;
6618 }
6619
6620
6621 /*
6622 * This function traverses through all extended attribute records for a given
6623 * fileID, and calls function that reclaims data blocks that exist in the
6624 * area of the disk being reclaimed which in turn is responsible for allocating
6625 * new space, copying extent data, deallocating new space, and if required,
6626 * splitting the extent.
6627 *
6628 * Note: The caller has already acquired the cnode lock on the file. Therefore
6629 * we are assured that no other thread would be creating/deleting/modifying
6630 * extended attributes for this file.
6631 *
6632 * Side Effects:
6633 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6634 * blocks that were relocated.
6635 *
6636 * Returns:
6637 * 0 on success, non-zero on failure.
6638 */
6639 static int
6640 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6641 {
6642 int error = 0;
6643 struct hfs_reclaim_extent_info *extent_info;
6644 int i;
6645 HFSPlusAttrKey *key;
6646 int *lockflags;
6647
6648 if (hfs_resize_debug) {
6649 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6650 }
6651
6652 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6653 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6654 if (extent_info == NULL) {
6655 return ENOMEM;
6656 }
6657 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6658 extent_info->vp = vp;
6659 extent_info->fileID = fileID;
6660 extent_info->is_xattr = true;
6661 extent_info->is_sysfile = vnode_issystem(vp);
6662 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6663 lockflags = &(extent_info->lockflags);
6664 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6665
6666 /* Initialize iterator from the extent_info structure */
6667 MALLOC(extent_info->iterator, struct BTreeIterator *,
6668 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6669 if (extent_info->iterator == NULL) {
6670 error = ENOMEM;
6671 goto out;
6672 }
6673 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6674
6675 /* Build attribute key */
6676 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6677 error = hfs_buildattrkey(fileID, NULL, key);
6678 if (error) {
6679 goto out;
6680 }
6681
6682 /* Initialize btdata from extent_info structure. Note that the
6683 * buffer pointer actually points to the xattr record from the
6684 * extent_info structure itself.
6685 */
6686 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6687 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6688 extent_info->btdata.itemCount = 1;
6689
6690 /*
6691 * Sync all extent-based attribute data to the disk.
6692 *
6693 * All extent-based attribute data I/O is performed via cluster
6694 * I/O using a virtual file that spans across entire file system
6695 * space.
6696 */
6697 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6698 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6699 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6700 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6701 if (error) {
6702 goto out;
6703 }
6704
6705 /* Search for extended attribute for current file. This
6706 * will place the iterator before the first matching record.
6707 */
6708 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6709 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6710 &(extent_info->btdata), &(extent_info->recordlen),
6711 extent_info->iterator);
6712 hfs_systemfile_unlock(hfsmp, *lockflags);
6713 if (error) {
6714 if (error != btNotFound) {
6715 goto out;
6716 }
6717 /* btNotFound is expected here, so just mask it */
6718 error = 0;
6719 }
6720
6721 while (1) {
6722 /* Iterate to the next record */
6723 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6724 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6725 extent_info->iterator, &(extent_info->btdata),
6726 &(extent_info->recordlen));
6727 hfs_systemfile_unlock(hfsmp, *lockflags);
6728
6729 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6730 if (error || key->fileID != fileID) {
6731 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6732 error = 0;
6733 }
6734 break;
6735 }
6736
6737 /* We only care about extent-based EAs */
6738 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6739 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6740 continue;
6741 }
6742
6743 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6744 extent_info->overflow_count = 0;
6745 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6746 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6747 extent_info->overflow_count++;
6748 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6749 }
6750
6751 extent_info->recStartBlock = key->startBlock;
6752 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6753 if (extent_info->extents[i].blockCount == 0) {
6754 break;
6755 }
6756 extent_info->extent_index = i;
6757 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6758 if (error) {
6759 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6760 goto out;
6761 }
6762 }
6763 }
6764
6765 out:
6766 /* If any blocks were relocated, account them and report progress */
6767 if (extent_info->blocks_relocated) {
6768 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6769 hfs_truncatefs_progress(hfsmp);
6770 }
6771 if (extent_info->iterator) {
6772 FREE(extent_info->iterator, M_TEMP);
6773 }
6774 if (extent_info) {
6775 FREE(extent_info, M_TEMP);
6776 }
6777 if (hfs_resize_debug) {
6778 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6779 }
6780 return error;
6781 }
6782
6783 /*
6784 * Reclaim any extent-based extended attributes allocation blocks from
6785 * the area of the disk that is being truncated.
6786 *
6787 * The function traverses the attribute btree to find out the fileIDs
6788 * of the extended attributes that need to be relocated. For every
6789 * file whose large EA requires relocation, it looks up the cnode and
6790 * calls hfs_reclaim_xattr() to do all the work for allocating
6791 * new space, copying data, deallocating old space, and if required,
6792 * splitting the extents.
6793 *
6794 * Inputs:
6795 * allocLimit - starting block of the area being reclaimed
6796 *
6797 * Returns:
6798 * returns 0 on success, non-zero on failure.
6799 */
6800 static int
6801 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6802 {
6803 int error = 0;
6804 FCB *fcb;
6805 struct BTreeIterator *iterator = NULL;
6806 struct FSBufferDescriptor btdata;
6807 HFSPlusAttrKey *key;
6808 HFSPlusAttrRecord rec;
6809 int lockflags = 0;
6810 cnid_t prev_fileid = 0;
6811 struct vnode *vp;
6812 int need_relocate;
6813 int btree_operation;
6814 u_int32_t files_moved = 0;
6815 u_int32_t prev_blocksmoved;
6816 int i;
6817
6818 fcb = VTOF(hfsmp->hfs_attribute_vp);
6819 /* Store the value to print total blocks moved by this function in end */
6820 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6821
6822 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6823 return ENOMEM;
6824 }
6825 bzero(iterator, sizeof(*iterator));
6826 key = (HFSPlusAttrKey *)&iterator->key;
6827 btdata.bufferAddress = &rec;
6828 btdata.itemSize = sizeof(rec);
6829 btdata.itemCount = 1;
6830
6831 need_relocate = false;
6832 btree_operation = kBTreeFirstRecord;
6833 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6834 while (1) {
6835 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6836 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6837 hfs_systemfile_unlock(hfsmp, lockflags);
6838 if (error) {
6839 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6840 error = 0;
6841 }
6842 break;
6843 }
6844 btree_operation = kBTreeNextRecord;
6845
6846 /* If the extents of current fileID were already relocated, skip it */
6847 if (prev_fileid == key->fileID) {
6848 continue;
6849 }
6850
6851 /* Check if any of the extents in the current record need to be relocated */
6852 need_relocate = false;
6853 switch(rec.recordType) {
6854 case kHFSPlusAttrForkData:
6855 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6856 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6857 break;
6858 }
6859 if ((rec.forkData.theFork.extents[i].startBlock +
6860 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6861 need_relocate = true;
6862 break;
6863 }
6864 }
6865 break;
6866
6867 case kHFSPlusAttrExtents:
6868 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6869 if (rec.overflowExtents.extents[i].blockCount == 0) {
6870 break;
6871 }
6872 if ((rec.overflowExtents.extents[i].startBlock +
6873 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6874 need_relocate = true;
6875 break;
6876 }
6877 }
6878 break;
6879 };
6880
6881 /* Continue iterating to next attribute record */
6882 if (need_relocate == false) {
6883 continue;
6884 }
6885
6886 /* Look up the vnode for corresponding file. The cnode
6887 * will be locked which will ensure that no one modifies
6888 * the xattrs when we are relocating them.
6889 *
6890 * We want to allow open-unlinked files to be moved,
6891 * so provide allow_deleted == 1 for hfs_vget().
6892 */
6893 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6894 continue;
6895 }
6896
6897 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6898 hfs_unlock(VTOC(vp));
6899 vnode_put(vp);
6900 if (error) {
6901 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6902 break;
6903 }
6904 prev_fileid = key->fileID;
6905 files_moved++;
6906 }
6907
6908 if (files_moved) {
6909 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6910 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6911 files_moved, hfsmp->vcbVN);
6912 }
6913
6914 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6915 return error;
6916 }
6917
6918 /*
6919 * Reclaim blocks from regular files.
6920 *
6921 * This function iterates over all the record in catalog btree looking
6922 * for files with extents that overlap into the space we're trying to
6923 * free up. If a file extent requires relocation, it looks up the vnode
6924 * and calls function to relocate the data.
6925 *
6926 * Returns:
6927 * Zero on success, non-zero on failure.
6928 */
6929 static int
6930 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6931 {
6932 int error;
6933 FCB *fcb;
6934 struct BTreeIterator *iterator = NULL;
6935 struct FSBufferDescriptor btdata;
6936 int btree_operation;
6937 int lockflags;
6938 struct HFSPlusCatalogFile filerec;
6939 struct vnode *vp;
6940 struct vnode *rvp;
6941 struct filefork *datafork;
6942 u_int32_t files_moved = 0;
6943 u_int32_t prev_blocksmoved;
6944
6945 fcb = VTOF(hfsmp->hfs_catalog_vp);
6946 /* Store the value to print total blocks moved by this function at the end */
6947 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6948
6949 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6950 error = ENOMEM;
6951 goto reclaim_filespace_done;
6952 }
6953
6954 #if CONFIG_PROTECT
6955 int keys_generated = 0;
6956 /*
6957 * For content-protected filesystems, we may need to relocate files that
6958 * are encrypted. If they use the new-style offset-based IVs, then
6959 * we can move them regardless of the lock state. We create a temporary
6960 * key here that we use to read/write the data, then we discard it at the
6961 * end of the function.
6962 */
6963 if (cp_fs_protected (hfsmp->hfs_mp)) {
6964 error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
6965 if (error) {
6966 printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
6967 goto reclaim_filespace_done;
6968 }
6969 }
6970 #endif
6971
6972 bzero(iterator, sizeof(*iterator));
6973
6974 btdata.bufferAddress = &filerec;
6975 btdata.itemSize = sizeof(filerec);
6976 btdata.itemCount = 1;
6977
6978 btree_operation = kBTreeFirstRecord;
6979 while (1) {
6980 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6981 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6982 hfs_systemfile_unlock(hfsmp, lockflags);
6983 if (error) {
6984 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6985 error = 0;
6986 }
6987 break;
6988 }
6989 btree_operation = kBTreeNextRecord;
6990
6991 if (filerec.recordType != kHFSPlusFileRecord) {
6992 continue;
6993 }
6994
6995 /* Check if any of the extents require relocation */
6996 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6997 continue;
6998 }
6999
7000 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
7001 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
7002 if (hfs_resize_debug) {
7003 printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
7004 }
7005 continue;
7006 }
7007
7008 /* If data fork exists or item is a directory hard link, relocate blocks */
7009 datafork = VTOF(vp);
7010 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
7011 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
7012 kHFSDataForkType, allocLimit, context);
7013 if (error) {
7014 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7015 hfs_unlock(VTOC(vp));
7016 vnode_put(vp);
7017 break;
7018 }
7019 }
7020
7021 /* If resource fork exists or item is a directory hard link, relocate blocks */
7022 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
7023 if (vnode_isdir(vp)) {
7024 /* Resource fork vnode lookup is invalid for directory hard link.
7025 * So we fake data fork vnode as resource fork vnode.
7026 */
7027 rvp = vp;
7028 } else {
7029 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
7030 if (error) {
7031 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
7032 hfs_unlock(VTOC(vp));
7033 vnode_put(vp);
7034 break;
7035 }
7036 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
7037 }
7038
7039 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
7040 kHFSResourceForkType, allocLimit, context);
7041 if (error) {
7042 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7043 hfs_unlock(VTOC(vp));
7044 vnode_put(vp);
7045 break;
7046 }
7047 }
7048
7049 /* The file forks were relocated successfully, now drop the
7050 * cnode lock and vnode reference, and continue iterating to
7051 * next catalog record.
7052 */
7053 hfs_unlock(VTOC(vp));
7054 vnode_put(vp);
7055 files_moved++;
7056 }
7057
7058 if (files_moved) {
7059 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
7060 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
7061 files_moved, hfsmp->vcbVN);
7062 }
7063
7064 reclaim_filespace_done:
7065 if (iterator) {
7066 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7067 }
7068
7069 #if CONFIG_PROTECT
7070 if (keys_generated) {
7071 cp_entry_destroy(&hfsmp->hfs_resize_cpentry);
7072 }
7073 #endif
7074 return error;
7075 }
7076
7077 /*
7078 * Reclaim space at the end of a file system.
7079 *
7080 * Inputs -
7081 * allocLimit - start block of the space being reclaimed
7082 * reclaimblks - number of allocation blocks to reclaim
7083 */
7084 static int
7085 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
7086 {
7087 int error = 0;
7088
7089 /*
7090 * Preflight the bitmap to find out total number of blocks that need
7091 * relocation.
7092 *
7093 * Note: Since allocLimit is set to the location of new alternate volume
7094 * header, the check below does not account for blocks allocated for old
7095 * alternate volume header.
7096 */
7097 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
7098 if (error) {
7099 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
7100 return error;
7101 }
7102 if (hfs_resize_debug) {
7103 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
7104 }
7105
7106 /* Just to be safe, sync the content of the journal to the disk before we proceed */
7107 hfs_journal_flush(hfsmp, TRUE);
7108
7109 /* First, relocate journal file blocks if they're in the way.
7110 * Doing this first will make sure that journal relocate code
7111 * gets access to contiguous blocks on disk first. The journal
7112 * file has to be contiguous on the disk, otherwise resize will
7113 * fail.
7114 */
7115 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
7116 if (error) {
7117 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
7118 return error;
7119 }
7120
7121 /* Relocate journal info block blocks if they're in the way. */
7122 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
7123 if (error) {
7124 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
7125 return error;
7126 }
7127
7128 /* Relocate extents of the Extents B-tree if they're in the way.
7129 * Relocating extents btree before other btrees is important as
7130 * this will provide access to largest contiguous block range on
7131 * the disk for relocating extents btree. Note that extents btree
7132 * can only have maximum of 8 extents.
7133 */
7134 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
7135 kHFSDataForkType, allocLimit, context);
7136 if (error) {
7137 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
7138 return error;
7139 }
7140
7141 /* Relocate extents of the Allocation file if they're in the way. */
7142 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
7143 kHFSDataForkType, allocLimit, context);
7144 if (error) {
7145 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
7146 return error;
7147 }
7148
7149 /* Relocate extents of the Catalog B-tree if they're in the way. */
7150 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
7151 kHFSDataForkType, allocLimit, context);
7152 if (error) {
7153 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
7154 return error;
7155 }
7156
7157 /* Relocate extents of the Attributes B-tree if they're in the way. */
7158 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
7159 kHFSDataForkType, allocLimit, context);
7160 if (error) {
7161 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
7162 return error;
7163 }
7164
7165 /* Relocate extents of the Startup File if there is one and they're in the way. */
7166 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
7167 kHFSDataForkType, allocLimit, context);
7168 if (error) {
7169 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
7170 return error;
7171 }
7172
7173 /*
7174 * We need to make sure the alternate volume header gets flushed if we moved
7175 * any extents in the volume header. But we need to do that before
7176 * shrinking the size of the volume, or else the journal code will panic
7177 * with an invalid (too large) block number.
7178 *
7179 * Note that blks_moved will be set if ANY extent was moved, even
7180 * if it was just an overflow extent. In this case, the journal_flush isn't
7181 * strictly required, but shouldn't hurt.
7182 */
7183 if (hfsmp->hfs_resize_blocksmoved) {
7184 hfs_journal_flush(hfsmp, TRUE);
7185 }
7186
7187 /* Reclaim extents from catalog file records */
7188 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
7189 if (error) {
7190 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
7191 return error;
7192 }
7193
7194 /* Reclaim extents from extent-based extended attributes, if any */
7195 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
7196 if (error) {
7197 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
7198 return error;
7199 }
7200
7201 return error;
7202 }
7203
7204
7205 /*
7206 * Check if there are any extents (including overflow extents) that overlap
7207 * into the disk space that is being reclaimed.
7208 *
7209 * Output -
7210 * true - One of the extents need to be relocated
7211 * false - No overflow extents need to be relocated, or there was an error
7212 */
7213 static int
7214 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
7215 {
7216 struct BTreeIterator * iterator = NULL;
7217 struct FSBufferDescriptor btdata;
7218 HFSPlusExtentRecord extrec;
7219 HFSPlusExtentKey *extkeyptr;
7220 FCB *fcb;
7221 int overlapped = false;
7222 int i, j;
7223 int error;
7224 int lockflags = 0;
7225 u_int32_t endblock;
7226
7227 /* Check if data fork overlaps the target space */
7228 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7229 if (filerec->dataFork.extents[i].blockCount == 0) {
7230 break;
7231 }
7232 endblock = filerec->dataFork.extents[i].startBlock +
7233 filerec->dataFork.extents[i].blockCount;
7234 if (endblock > allocLimit) {
7235 overlapped = true;
7236 goto out;
7237 }
7238 }
7239
7240 /* Check if resource fork overlaps the target space */
7241 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
7242 if (filerec->resourceFork.extents[j].blockCount == 0) {
7243 break;
7244 }
7245 endblock = filerec->resourceFork.extents[j].startBlock +
7246 filerec->resourceFork.extents[j].blockCount;
7247 if (endblock > allocLimit) {
7248 overlapped = true;
7249 goto out;
7250 }
7251 }
7252
7253 /* Return back if there are no overflow extents for this file */
7254 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
7255 goto out;
7256 }
7257
7258 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
7259 return 0;
7260 }
7261 bzero(iterator, sizeof(*iterator));
7262 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
7263 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
7264 extkeyptr->forkType = 0;
7265 extkeyptr->fileID = filerec->fileID;
7266 extkeyptr->startBlock = 0;
7267
7268 btdata.bufferAddress = &extrec;
7269 btdata.itemSize = sizeof(extrec);
7270 btdata.itemCount = 1;
7271
7272 fcb = VTOF(hfsmp->hfs_extents_vp);
7273
7274 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
7275
7276 /* This will position the iterator just before the first overflow
7277 * extent record for given fileID. It will always return btNotFound,
7278 * so we special case the error code.
7279 */
7280 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
7281 if (error && (error != btNotFound)) {
7282 goto out;
7283 }
7284
7285 /* BTIterateRecord() might return error if the btree is empty, and
7286 * therefore we return that the extent does not overflow to the caller
7287 */
7288 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7289 while (error == 0) {
7290 /* Stop when we encounter a different file. */
7291 if (extkeyptr->fileID != filerec->fileID) {
7292 break;
7293 }
7294 /* Check if any of the forks exist in the target space. */
7295 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7296 if (extrec[i].blockCount == 0) {
7297 break;
7298 }
7299 endblock = extrec[i].startBlock + extrec[i].blockCount;
7300 if (endblock > allocLimit) {
7301 overlapped = true;
7302 goto out;
7303 }
7304 }
7305 /* Look for more records. */
7306 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7307 }
7308
7309 out:
7310 if (lockflags) {
7311 hfs_systemfile_unlock(hfsmp, lockflags);
7312 }
7313 if (iterator) {
7314 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7315 }
7316 return overlapped;
7317 }
7318
7319
7320 /*
7321 * Calculate the progress of a file system resize operation.
7322 */
7323 __private_extern__
7324 int
7325 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
7326 {
7327 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
7328 return (ENXIO);
7329 }
7330
7331 if (hfsmp->hfs_resize_totalblocks > 0) {
7332 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
7333 } else {
7334 *progress = 0;
7335 }
7336
7337 return (0);
7338 }
7339
7340
7341 /*
7342 * Creates a UUID from a unique "name" in the HFS UUID Name space.
7343 * See version 3 UUID.
7344 */
7345 static void
7346 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
7347 {
7348 MD5_CTX md5c;
7349 uint8_t rawUUID[8];
7350
7351 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
7352 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
7353
7354 MD5Init( &md5c );
7355 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
7356 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
7357 MD5Final( result, &md5c );
7358
7359 result[6] = 0x30 | ( result[6] & 0x0F );
7360 result[8] = 0x80 | ( result[8] & 0x3F );
7361 }
7362
7363 /*
7364 * Get file system attributes.
7365 */
7366 static int
7367 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7368 {
7369 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
7370 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
7371 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
7372
7373 ExtendedVCB *vcb = VFSTOVCB(mp);
7374 struct hfsmount *hfsmp = VFSTOHFS(mp);
7375 u_int32_t freeCNIDs;
7376
7377 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
7378
7379 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
7380 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
7381 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
7382 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
7383 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
7384 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
7385 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
7386 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
7387 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
7388 /* XXX needs clarification */
7389 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
7390 /* Maximum files is constrained by total blocks. */
7391 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
7392 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
7393
7394 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
7395 fsap->f_fsid.val[1] = vfs_typenum(mp);
7396 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
7397
7398 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
7399 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
7400
7401 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
7402 vol_capabilities_attr_t *cap;
7403
7404 cap = &fsap->f_capabilities;
7405
7406 if (hfsmp->hfs_flags & HFS_STANDARD) {
7407 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7408 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7409 VOL_CAP_FMT_CASE_PRESERVING |
7410 VOL_CAP_FMT_FAST_STATFS |
7411 VOL_CAP_FMT_HIDDEN_FILES |
7412 VOL_CAP_FMT_PATH_FROM_ID;
7413 } else {
7414 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7415 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7416 VOL_CAP_FMT_SYMBOLICLINKS |
7417 VOL_CAP_FMT_HARDLINKS |
7418 VOL_CAP_FMT_JOURNAL |
7419 VOL_CAP_FMT_ZERO_RUNS |
7420 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
7421 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
7422 VOL_CAP_FMT_CASE_PRESERVING |
7423 VOL_CAP_FMT_FAST_STATFS |
7424 VOL_CAP_FMT_2TB_FILESIZE |
7425 VOL_CAP_FMT_HIDDEN_FILES |
7426 #if HFS_COMPRESSION
7427 VOL_CAP_FMT_PATH_FROM_ID |
7428 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7429 #else
7430 VOL_CAP_FMT_PATH_FROM_ID;
7431 #endif
7432 }
7433 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
7434 VOL_CAP_INT_SEARCHFS |
7435 VOL_CAP_INT_ATTRLIST |
7436 VOL_CAP_INT_NFSEXPORT |
7437 VOL_CAP_INT_READDIRATTR |
7438 VOL_CAP_INT_EXCHANGEDATA |
7439 VOL_CAP_INT_ALLOCATE |
7440 VOL_CAP_INT_VOL_RENAME |
7441 VOL_CAP_INT_ADVLOCK |
7442 VOL_CAP_INT_FLOCK |
7443 #if NAMEDSTREAMS
7444 VOL_CAP_INT_EXTENDED_ATTR |
7445 VOL_CAP_INT_NAMEDSTREAMS;
7446 #else
7447 VOL_CAP_INT_EXTENDED_ATTR;
7448 #endif
7449 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
7450 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
7451
7452 cap->valid[VOL_CAPABILITIES_FORMAT] =
7453 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7454 VOL_CAP_FMT_SYMBOLICLINKS |
7455 VOL_CAP_FMT_HARDLINKS |
7456 VOL_CAP_FMT_JOURNAL |
7457 VOL_CAP_FMT_JOURNAL_ACTIVE |
7458 VOL_CAP_FMT_NO_ROOT_TIMES |
7459 VOL_CAP_FMT_SPARSE_FILES |
7460 VOL_CAP_FMT_ZERO_RUNS |
7461 VOL_CAP_FMT_CASE_SENSITIVE |
7462 VOL_CAP_FMT_CASE_PRESERVING |
7463 VOL_CAP_FMT_FAST_STATFS |
7464 VOL_CAP_FMT_2TB_FILESIZE |
7465 VOL_CAP_FMT_OPENDENYMODES |
7466 VOL_CAP_FMT_HIDDEN_FILES |
7467 #if HFS_COMPRESSION
7468 VOL_CAP_FMT_PATH_FROM_ID |
7469 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7470 #else
7471 VOL_CAP_FMT_PATH_FROM_ID;
7472 #endif
7473 cap->valid[VOL_CAPABILITIES_INTERFACES] =
7474 VOL_CAP_INT_SEARCHFS |
7475 VOL_CAP_INT_ATTRLIST |
7476 VOL_CAP_INT_NFSEXPORT |
7477 VOL_CAP_INT_READDIRATTR |
7478 VOL_CAP_INT_EXCHANGEDATA |
7479 VOL_CAP_INT_COPYFILE |
7480 VOL_CAP_INT_ALLOCATE |
7481 VOL_CAP_INT_VOL_RENAME |
7482 VOL_CAP_INT_ADVLOCK |
7483 VOL_CAP_INT_FLOCK |
7484 VOL_CAP_INT_MANLOCK |
7485 #if NAMEDSTREAMS
7486 VOL_CAP_INT_EXTENDED_ATTR |
7487 VOL_CAP_INT_NAMEDSTREAMS;
7488 #else
7489 VOL_CAP_INT_EXTENDED_ATTR;
7490 #endif
7491 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
7492 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
7493 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
7494 }
7495 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
7496 vol_attributes_attr_t *attrp = &fsap->f_attributes;
7497
7498 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7499 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7500 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
7501 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7502 attrp->validattr.forkattr = 0;
7503
7504 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7505 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7506 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
7507 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7508 attrp->nativeattr.forkattr = 0;
7509 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
7510 }
7511 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
7512 fsap->f_create_time.tv_nsec = 0;
7513 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
7514 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
7515 fsap->f_modify_time.tv_nsec = 0;
7516 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
7517
7518 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
7519 fsap->f_backup_time.tv_nsec = 0;
7520 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
7521 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
7522 u_int16_t subtype = 0;
7523
7524 /*
7525 * Subtypes (flavors) for HFS
7526 * 0: Mac OS Extended
7527 * 1: Mac OS Extended (Journaled)
7528 * 2: Mac OS Extended (Case Sensitive)
7529 * 3: Mac OS Extended (Case Sensitive, Journaled)
7530 * 4 - 127: Reserved
7531 * 128: Mac OS Standard
7532 *
7533 */
7534 if (hfsmp->hfs_flags & HFS_STANDARD) {
7535 subtype = HFS_SUBTYPE_STANDARDHFS;
7536 } else /* HFS Plus */ {
7537 if (hfsmp->jnl)
7538 subtype |= HFS_SUBTYPE_JOURNALED;
7539 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
7540 subtype |= HFS_SUBTYPE_CASESENSITIVE;
7541 }
7542 fsap->f_fssubtype = subtype;
7543 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
7544 }
7545
7546 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7547 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
7548 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7549 }
7550 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
7551 hfs_getvoluuid(hfsmp, fsap->f_uuid);
7552 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
7553 }
7554 return (0);
7555 }
7556
7557 /*
7558 * Perform a volume rename. Requires the FS' root vp.
7559 */
7560 static int
7561 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
7562 {
7563 ExtendedVCB *vcb = VTOVCB(vp);
7564 struct cnode *cp = VTOC(vp);
7565 struct hfsmount *hfsmp = VTOHFS(vp);
7566 struct cat_desc to_desc;
7567 struct cat_desc todir_desc;
7568 struct cat_desc new_desc;
7569 cat_cookie_t cookie;
7570 int lockflags;
7571 int error = 0;
7572 char converted_volname[256];
7573 size_t volname_length = 0;
7574 size_t conv_volname_length = 0;
7575
7576
7577 /*
7578 * Ignore attempts to rename a volume to a zero-length name.
7579 */
7580 if (name[0] == 0)
7581 return(0);
7582
7583 bzero(&to_desc, sizeof(to_desc));
7584 bzero(&todir_desc, sizeof(todir_desc));
7585 bzero(&new_desc, sizeof(new_desc));
7586 bzero(&cookie, sizeof(cookie));
7587
7588 todir_desc.cd_parentcnid = kHFSRootParentID;
7589 todir_desc.cd_cnid = kHFSRootFolderID;
7590 todir_desc.cd_flags = CD_ISDIR;
7591
7592 to_desc.cd_nameptr = (const u_int8_t *)name;
7593 to_desc.cd_namelen = strlen(name);
7594 to_desc.cd_parentcnid = kHFSRootParentID;
7595 to_desc.cd_cnid = cp->c_cnid;
7596 to_desc.cd_flags = CD_ISDIR;
7597
7598 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
7599 if ((error = hfs_start_transaction(hfsmp)) == 0) {
7600 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
7601 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
7602
7603 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
7604
7605 /*
7606 * If successful, update the name in the VCB, ensure it's terminated.
7607 */
7608 if (!error) {
7609 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
7610 volname_length = strlen ((const char*)vcb->vcbVN);
7611 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
7612 /* Send the volume name down to CoreStorage if necessary */
7613 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
7614 if (error == 0) {
7615 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7616 }
7617 error = 0;
7618 }
7619
7620 hfs_systemfile_unlock(hfsmp, lockflags);
7621 cat_postflight(hfsmp, &cookie, p);
7622
7623 if (error)
7624 MarkVCBDirty(vcb);
7625 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7626 }
7627 hfs_end_transaction(hfsmp);
7628 }
7629 if (!error) {
7630 /* Release old allocated name buffer */
7631 if (cp->c_desc.cd_flags & CD_HASBUF) {
7632 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7633
7634 cp->c_desc.cd_nameptr = 0;
7635 cp->c_desc.cd_namelen = 0;
7636 cp->c_desc.cd_flags &= ~CD_HASBUF;
7637 vfs_removename(tmp_name);
7638 }
7639 /* Update cnode's catalog descriptor */
7640 replace_desc(cp, &new_desc);
7641 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7642 cp->c_touch_chgtime = TRUE;
7643 }
7644
7645 hfs_unlock(cp);
7646 }
7647
7648 return(error);
7649 }
7650
7651 /*
7652 * Get file system attributes.
7653 */
7654 static int
7655 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7656 {
7657 kauth_cred_t cred = vfs_context_ucred(context);
7658 int error = 0;
7659
7660 /*
7661 * Must be superuser or owner of filesystem to change volume attributes
7662 */
7663 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7664 return(EACCES);
7665
7666 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7667 vnode_t root_vp;
7668
7669 error = hfs_vfs_root(mp, &root_vp, context);
7670 if (error)
7671 goto out;
7672
7673 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7674 (void) vnode_put(root_vp);
7675 if (error)
7676 goto out;
7677
7678 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7679 }
7680
7681 out:
7682 return error;
7683 }
7684
7685 /* If a runtime corruption is detected, set the volume inconsistent
7686 * bit in the volume attributes. The volume inconsistent bit is a persistent
7687 * bit which represents that the volume is corrupt and needs repair.
7688 * The volume inconsistent bit can be set from the kernel when it detects
7689 * runtime corruption or from file system repair utilities like fsck_hfs when
7690 * a repair operation fails. The bit should be cleared only from file system
7691 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7692 */
7693 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7694 {
7695 HFS_MOUNT_LOCK(hfsmp, TRUE);
7696 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7697 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7698 MarkVCBDirty(hfsmp);
7699 }
7700 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7701 /* Log information to ASL log */
7702 fslog_fs_corrupt(hfsmp->hfs_mp);
7703 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7704 }
7705 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7706 }
7707
7708 /* Replay the journal on the device node provided. Returns zero if
7709 * journal replay succeeded or no journal was supposed to be replayed.
7710 */
7711 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7712 {
7713 int retval = 0;
7714 int error = 0;
7715 struct mount *mp = NULL;
7716 struct hfs_mount_args *args = NULL;
7717
7718 /* Replay allowed only on raw devices */
7719 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7720 retval = EINVAL;
7721 goto out;
7722 }
7723
7724 /* Create dummy mount structures */
7725 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7726 if (mp == NULL) {
7727 retval = ENOMEM;
7728 goto out;
7729 }
7730 bzero(mp, sizeof(struct mount));
7731 mount_lock_init(mp);
7732
7733 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7734 if (args == NULL) {
7735 retval = ENOMEM;
7736 goto out;
7737 }
7738 bzero(args, sizeof(struct hfs_mount_args));
7739
7740 retval = hfs_mountfs(devvp, mp, args, 1, context);
7741 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7742
7743 /* FSYNC the devnode to be sure all data has been flushed */
7744 error = VNOP_FSYNC(devvp, MNT_WAIT, context);
7745 if (error) {
7746 retval = error;
7747 }
7748
7749 out:
7750 if (mp) {
7751 mount_lock_destroy(mp);
7752 FREE(mp, M_TEMP);
7753 }
7754 if (args) {
7755 FREE(args, M_TEMP);
7756 }
7757 return retval;
7758 }
7759
7760 /*
7761 * hfs vfs operations.
7762 */
7763 struct vfsops hfs_vfsops = {
7764 hfs_mount,
7765 hfs_start,
7766 hfs_unmount,
7767 hfs_vfs_root,
7768 hfs_quotactl,
7769 hfs_vfs_getattr, /* was hfs_statfs */
7770 hfs_sync,
7771 hfs_vfs_vget,
7772 hfs_fhtovp,
7773 hfs_vptofh,
7774 hfs_init,
7775 hfs_sysctl,
7776 hfs_vfs_setattr,
7777 {NULL}
7778 };