]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
xnu-1699.24.8.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91
92 #include <kern/locks.h>
93
94 #include <vfs/vfs_journal.h>
95
96 #include <miscfs/specfs/specdev.h>
97 #include <hfs/hfs_mount.h>
98
99 #include <libkern/crypto/md5.h>
100 #include <uuid/uuid.h>
101
102 #include "hfs.h"
103 #include "hfs_catalog.h"
104 #include "hfs_cnode.h"
105 #include "hfs_dbg.h"
106 #include "hfs_endian.h"
107 #include "hfs_hotfiles.h"
108 #include "hfs_quota.h"
109
110 #include "hfscommon/headers/FileMgrInternal.h"
111 #include "hfscommon/headers/BTreesInternal.h"
112
113 #if CONFIG_PROTECT
114 #include <sys/cprotect.h>
115 #endif
116
117 #if CONFIG_HFS_ALLOC_RBTREE
118 #include "hfscommon/headers/HybridAllocator.h"
119 #endif
120
121 #define HFS_MOUNT_DEBUG 1
122
123 #if HFS_DIAGNOSTIC
124 int hfs_dbg_all = 0;
125 int hfs_dbg_err = 0;
126 #endif
127
128 /* Enable/disable debugging code for live volume resizing */
129 int hfs_resize_debug = 0;
130
131 lck_grp_attr_t * hfs_group_attr;
132 lck_attr_t * hfs_lock_attr;
133 lck_grp_t * hfs_mutex_group;
134 lck_grp_t * hfs_rwlock_group;
135 lck_grp_t * hfs_spinlock_group;
136
137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
138 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
139
140 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
141 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
142
143 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
144 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
145 static int hfs_flushfiles(struct mount *, int, struct proc *);
146 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
147 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
148 static int hfs_init(struct vfsconf *vfsp);
149 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
150 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
151 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
152 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
153 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
154 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
155 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
156
157 void hfs_initialize_allocator (struct hfsmount *hfsmp);
158 int hfs_teardown_allocator (struct hfsmount *hfsmp);
159
160 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
161 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
162 int hfs_reload(struct mount *mp);
163 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
164 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
165 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
166 user_addr_t newp, size_t newlen, vfs_context_t context);
167 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
168
169 /*
170 * Called by vfs_mountroot when mounting HFS Plus as root.
171 */
172
173 int
174 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
175 {
176 struct hfsmount *hfsmp;
177 ExtendedVCB *vcb;
178 struct vfsstatfs *vfsp;
179 int error;
180
181 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
182 if (HFS_MOUNT_DEBUG) {
183 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
184 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
185 }
186 return (error);
187 }
188
189 /* Init hfsmp */
190 hfsmp = VFSTOHFS(mp);
191
192 hfsmp->hfs_uid = UNKNOWNUID;
193 hfsmp->hfs_gid = UNKNOWNGID;
194 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
195 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
196
197 /* Establish the free block reserve. */
198 vcb = HFSTOVCB(hfsmp);
199 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
200 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
201
202 vfsp = vfs_statfs(mp);
203 (void)hfs_statfs(mp, vfsp, NULL);
204
205 return (0);
206 }
207
208
209 /*
210 * VFS Operations.
211 *
212 * mount system call
213 */
214
215 int
216 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
217 {
218 struct proc *p = vfs_context_proc(context);
219 struct hfsmount *hfsmp = NULL;
220 struct hfs_mount_args args;
221 int retval = E_NONE;
222 u_int32_t cmdflags;
223
224 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
225 if (HFS_MOUNT_DEBUG) {
226 printf("hfs_mount: copyin returned %d for fs\n", retval);
227 }
228 return (retval);
229 }
230 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
231 if (cmdflags & MNT_UPDATE) {
232 hfsmp = VFSTOHFS(mp);
233
234 /* Reload incore data after an fsck. */
235 if (cmdflags & MNT_RELOAD) {
236 if (vfs_isrdonly(mp)) {
237 int error = hfs_reload(mp);
238 if (error && HFS_MOUNT_DEBUG) {
239 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
240 }
241 return error;
242 }
243 else {
244 if (HFS_MOUNT_DEBUG) {
245 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
246 }
247 return (EINVAL);
248 }
249 }
250
251 /* Change to a read-only file system. */
252 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
253 vfs_isrdonly(mp)) {
254 int flags;
255
256 /* Set flag to indicate that a downgrade to read-only
257 * is in progress and therefore block any further
258 * modifications to the file system.
259 */
260 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
261 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
262 hfsmp->hfs_downgrading_proc = current_thread();
263 hfs_unlock_global (hfsmp);
264
265 /* use VFS_SYNC to push out System (btree) files */
266 retval = VFS_SYNC(mp, MNT_WAIT, context);
267 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
268 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
269 hfsmp->hfs_downgrading_proc = NULL;
270 if (HFS_MOUNT_DEBUG) {
271 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
272 }
273 goto out;
274 }
275
276 flags = WRITECLOSE;
277 if (cmdflags & MNT_FORCE)
278 flags |= FORCECLOSE;
279
280 if ((retval = hfs_flushfiles(mp, flags, p))) {
281 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
282 hfsmp->hfs_downgrading_proc = NULL;
283 if (HFS_MOUNT_DEBUG) {
284 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
285 }
286 goto out;
287 }
288
289 /* mark the volume cleanly unmounted */
290 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
291 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
292 hfsmp->hfs_flags |= HFS_READ_ONLY;
293
294 /* also get the volume bitmap blocks */
295 if (!retval) {
296 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
297 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
298 } else {
299 vnode_get(hfsmp->hfs_devvp);
300 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
301 vnode_put(hfsmp->hfs_devvp);
302 }
303 }
304 if (retval) {
305 if (HFS_MOUNT_DEBUG) {
306 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
307 }
308 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
309 hfsmp->hfs_downgrading_proc = NULL;
310 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
311 goto out;
312 }
313 if (hfsmp->jnl) {
314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
315
316 journal_close(hfsmp->jnl);
317 hfsmp->jnl = NULL;
318
319 // Note: we explicitly don't want to shutdown
320 // access to the jvp because we may need
321 // it later if we go back to being read-write.
322
323 hfs_unlock_global (hfsmp);
324 }
325
326 #if CONFIG_HFS_ALLOC_RBTREE
327 (void) hfs_teardown_allocator(hfsmp);
328 #endif
329 hfsmp->hfs_downgrading_proc = NULL;
330 }
331
332 /* Change to a writable file system. */
333 if (vfs_iswriteupgrade(mp)) {
334 #if CONFIG_HFS_ALLOC_RBTREE
335 thread_t allocator_thread;
336 #endif
337
338 /*
339 * On inconsistent disks, do not allow read-write mount
340 * unless it is the boot volume being mounted.
341 */
342 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
343 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
344 if (HFS_MOUNT_DEBUG) {
345 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
346 }
347 retval = EINVAL;
348 goto out;
349 }
350
351 // If the journal was shut-down previously because we were
352 // asked to be read-only, let's start it back up again now
353
354 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
355 && hfsmp->jnl == NULL
356 && hfsmp->jvp != NULL) {
357 int jflags;
358
359 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
360 jflags = JOURNAL_RESET;
361 } else {
362 jflags = 0;
363 }
364
365 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
366
367 hfsmp->jnl = journal_open(hfsmp->jvp,
368 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
369 hfsmp->jnl_size,
370 hfsmp->hfs_devvp,
371 hfsmp->hfs_logical_block_size,
372 jflags,
373 0,
374 hfs_sync_metadata, hfsmp->hfs_mp);
375
376 /*
377 * Set up the trim callback function so that we can add
378 * recently freed extents to the free extent cache once
379 * the transaction that freed them is written to the
380 * journal on disk.
381 */
382 if (hfsmp->jnl)
383 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
384
385 hfs_unlock_global (hfsmp);
386
387 if (hfsmp->jnl == NULL) {
388 if (HFS_MOUNT_DEBUG) {
389 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
390 }
391 retval = EINVAL;
392 goto out;
393 } else {
394 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
395 }
396
397 }
398
399 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
400 retval = hfs_erase_unused_nodes(hfsmp);
401 if (retval != E_NONE) {
402 if (HFS_MOUNT_DEBUG) {
403 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
404 }
405 goto out;
406 }
407
408 /* If this mount point was downgraded from read-write
409 * to read-only, clear that information as we are now
410 * moving back to read-write.
411 */
412 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
413 hfsmp->hfs_downgrading_proc = NULL;
414
415 /* mark the volume dirty (clear clean unmount bit) */
416 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
417
418 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
419 if (retval != E_NONE) {
420 if (HFS_MOUNT_DEBUG) {
421 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
422 }
423 goto out;
424 }
425
426 /* Only clear HFS_READ_ONLY after a successful write */
427 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
428
429
430 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
431 /* Setup private/hidden directories for hardlinks. */
432 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
433 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
434
435 hfs_remove_orphans(hfsmp);
436
437 /*
438 * Allow hot file clustering if conditions allow.
439 */
440 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
441 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
442 (void) hfs_recording_init(hfsmp);
443 }
444 /* Force ACLs on HFS+ file systems. */
445 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
446 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
447 }
448 }
449
450 #if CONFIG_HFS_ALLOC_RBTREE
451 /*
452 * Like the normal mount case, we need to handle creation of the allocation red-black tree
453 * if we're upgrading from read-only to read-write.
454 *
455 * We spawn a thread to create the pair of red-black trees for this volume.
456 * However, in so doing, we must be careful to ensure that if this thread is still
457 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
458 * we'll need to set a bit that indicates we're in progress building the trees here.
459 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
460 * notifies the tree generation code that an unmount is waiting. Also, mark the extent
461 * tree flags that the allocator is enabled for use before we spawn the thread that will start
462 * scanning the RB tree.
463 *
464 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
465 * which has not previously encountered a bad error on the red-black tree code. Also, don't
466 * try to re-build a tree that already exists.
467 */
468
469 if (hfsmp->extent_tree_flags == 0) {
470 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
471 /* Initialize EOF counter so that the thread can assume it started at initial values */
472 hfsmp->offset_block_end = 0;
473
474 InitTree(hfsmp);
475
476 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
477 thread_deallocate(allocator_thread);
478 }
479
480 #endif
481 }
482
483 /* Update file system parameters. */
484 retval = hfs_changefs(mp, &args);
485 if (retval && HFS_MOUNT_DEBUG) {
486 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
487 }
488
489 } else /* not an update request */ {
490
491 /* Set the mount flag to indicate that we support volfs */
492 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
493
494 retval = hfs_mountfs(devvp, mp, &args, 0, context);
495 if (retval && HFS_MOUNT_DEBUG) {
496 printf("hfs_mount: hfs_mountfs returned %d\n", retval);
497 }
498 #if CONFIG_PROTECT
499 /*
500 * If above mount call was successful, and this mount is content protection
501 * enabled, then verify the on-disk EA on the root to ensure that the filesystem
502 * is of a suitable vintage to allow the mount to proceed.
503 */
504 if ((retval == 0) && (cp_fs_protected (mp))) {
505 int err = 0;
506 struct cp_root_xattr xattr;
507 bzero (&xattr, sizeof(struct cp_root_xattr));
508 hfsmp = vfs_fsprivate(mp);
509
510 /* go get the EA to get the version information */
511 err = cp_getrootxattr (hfsmp, &xattr);
512 /* If there was no EA there, then write one out. */
513 if (err == ENOATTR) {
514 bzero(&xattr, sizeof(struct cp_root_xattr));
515 xattr.major_version = CP_CURRENT_MAJOR_VERS;
516 xattr.minor_version = CP_CURRENT_MINOR_VERS;
517 xattr.flags = 0;
518
519 err = cp_setrootxattr (hfsmp, &xattr);
520 }
521 /*
522 * For any other error, including having an out of date CP version in the
523 * EA, or for an error out of cp_setrootxattr, deny the mount
524 * and do not proceed further.
525 */
526 if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS) {
527 /* Deny the mount and tear down. */
528 retval = EPERM;
529 (void) hfs_unmount (mp, MNT_FORCE, context);
530 }
531 }
532 #endif
533 }
534 out:
535 if (retval == 0) {
536 (void)hfs_statfs(mp, vfs_statfs(mp), context);
537 }
538 return (retval);
539 }
540
541
542 struct hfs_changefs_cargs {
543 struct hfsmount *hfsmp;
544 int namefix;
545 int permfix;
546 int permswitch;
547 };
548
549 static int
550 hfs_changefs_callback(struct vnode *vp, void *cargs)
551 {
552 ExtendedVCB *vcb;
553 struct cnode *cp;
554 struct cat_desc cndesc;
555 struct cat_attr cnattr;
556 struct hfs_changefs_cargs *args;
557 int lockflags;
558 int error;
559
560 args = (struct hfs_changefs_cargs *)cargs;
561
562 cp = VTOC(vp);
563 vcb = HFSTOVCB(args->hfsmp);
564
565 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
566 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
567 hfs_systemfile_unlock(args->hfsmp, lockflags);
568 if (error) {
569 /*
570 * If we couldn't find this guy skip to the next one
571 */
572 if (args->namefix)
573 cache_purge(vp);
574
575 return (VNODE_RETURNED);
576 }
577 /*
578 * Get the real uid/gid and perm mask from disk.
579 */
580 if (args->permswitch || args->permfix) {
581 cp->c_uid = cnattr.ca_uid;
582 cp->c_gid = cnattr.ca_gid;
583 cp->c_mode = cnattr.ca_mode;
584 }
585 /*
586 * If we're switching name converters then...
587 * Remove the existing entry from the namei cache.
588 * Update name to one based on new encoder.
589 */
590 if (args->namefix) {
591 cache_purge(vp);
592 replace_desc(cp, &cndesc);
593
594 if (cndesc.cd_cnid == kHFSRootFolderID) {
595 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
596 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
597 }
598 } else {
599 cat_releasedesc(&cndesc);
600 }
601 return (VNODE_RETURNED);
602 }
603
604 /* Change fs mount parameters */
605 static int
606 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
607 {
608 int retval = 0;
609 int namefix, permfix, permswitch;
610 struct hfsmount *hfsmp;
611 ExtendedVCB *vcb;
612 hfs_to_unicode_func_t get_unicode_func;
613 unicode_to_hfs_func_t get_hfsname_func;
614 u_int32_t old_encoding = 0;
615 struct hfs_changefs_cargs cargs;
616 u_int32_t mount_flags;
617
618 hfsmp = VFSTOHFS(mp);
619 vcb = HFSTOVCB(hfsmp);
620 mount_flags = (unsigned int)vfs_flags(mp);
621
622 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
623
624 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
625 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
626 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
627 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
628
629 /* The root filesystem must operate with actual permissions: */
630 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
631 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
632 retval = EINVAL;
633 goto exit;
634 }
635 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
636 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
637 else
638 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
639
640 namefix = permfix = 0;
641
642 /*
643 * Tracking of hot files requires up-to-date access times. So if
644 * access time updates are disabled, we must also disable hot files.
645 */
646 if (mount_flags & MNT_NOATIME) {
647 (void) hfs_recording_suspend(hfsmp);
648 }
649
650 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
651 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
652 gTimeZone = args->hfs_timezone;
653 }
654
655 /* Change the default uid, gid and/or mask */
656 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
657 hfsmp->hfs_uid = args->hfs_uid;
658 if (vcb->vcbSigWord == kHFSPlusSigWord)
659 ++permfix;
660 }
661 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
662 hfsmp->hfs_gid = args->hfs_gid;
663 if (vcb->vcbSigWord == kHFSPlusSigWord)
664 ++permfix;
665 }
666 if (args->hfs_mask != (mode_t)VNOVAL) {
667 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
668 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
669 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
670 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
671 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
672 if (vcb->vcbSigWord == kHFSPlusSigWord)
673 ++permfix;
674 }
675 }
676
677 /* Change the hfs encoding value (hfs only) */
678 if ((vcb->vcbSigWord == kHFSSigWord) &&
679 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
680 (hfsmp->hfs_encoding != args->hfs_encoding)) {
681
682 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
683 if (retval)
684 goto exit;
685
686 /*
687 * Connect the new hfs_get_unicode converter but leave
688 * the old hfs_get_hfsname converter in place so that
689 * we can lookup existing vnodes to get their correctly
690 * encoded names.
691 *
692 * When we're all finished, we can then connect the new
693 * hfs_get_hfsname converter and release our interest
694 * in the old converters.
695 */
696 hfsmp->hfs_get_unicode = get_unicode_func;
697 old_encoding = hfsmp->hfs_encoding;
698 hfsmp->hfs_encoding = args->hfs_encoding;
699 ++namefix;
700 }
701
702 if (!(namefix || permfix || permswitch))
703 goto exit;
704
705 /* XXX 3762912 hack to support HFS filesystem 'owner' */
706 if (permfix)
707 vfs_setowner(mp,
708 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
709 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
710
711 /*
712 * For each active vnode fix things that changed
713 *
714 * Note that we can visit a vnode more than once
715 * and we can race with fsync.
716 *
717 * hfs_changefs_callback will be called for each vnode
718 * hung off of this mount point
719 *
720 * The vnode will be properly referenced and unreferenced
721 * around the callback
722 */
723 cargs.hfsmp = hfsmp;
724 cargs.namefix = namefix;
725 cargs.permfix = permfix;
726 cargs.permswitch = permswitch;
727
728 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
729
730 /*
731 * If we're switching name converters we can now
732 * connect the new hfs_get_hfsname converter and
733 * release our interest in the old converters.
734 */
735 if (namefix) {
736 hfsmp->hfs_get_hfsname = get_hfsname_func;
737 vcb->volumeNameEncodingHint = args->hfs_encoding;
738 (void) hfs_relconverter(old_encoding);
739 }
740 exit:
741 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
742 return (retval);
743 }
744
745
746 struct hfs_reload_cargs {
747 struct hfsmount *hfsmp;
748 int error;
749 };
750
751 static int
752 hfs_reload_callback(struct vnode *vp, void *cargs)
753 {
754 struct cnode *cp;
755 struct hfs_reload_cargs *args;
756 int lockflags;
757
758 args = (struct hfs_reload_cargs *)cargs;
759 /*
760 * flush all the buffers associated with this node
761 */
762 (void) buf_invalidateblks(vp, 0, 0, 0);
763
764 cp = VTOC(vp);
765 /*
766 * Remove any directory hints
767 */
768 if (vnode_isdir(vp))
769 hfs_reldirhints(cp, 0);
770
771 /*
772 * Re-read cnode data for all active vnodes (non-metadata files).
773 */
774 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
775 struct cat_fork *datafork;
776 struct cat_desc desc;
777
778 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
779
780 /* lookup by fileID since name could have changed */
781 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
782 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
783 hfs_systemfile_unlock(args->hfsmp, lockflags);
784 if (args->error) {
785 return (VNODE_RETURNED_DONE);
786 }
787
788 /* update cnode's catalog descriptor */
789 (void) replace_desc(cp, &desc);
790 }
791 return (VNODE_RETURNED);
792 }
793
794 /*
795 * Reload all incore data for a filesystem (used after running fsck on
796 * the root filesystem and finding things to fix). The filesystem must
797 * be mounted read-only.
798 *
799 * Things to do to update the mount:
800 * invalidate all cached meta-data.
801 * invalidate all inactive vnodes.
802 * invalidate all cached file data.
803 * re-read volume header from disk.
804 * re-load meta-file info (extents, file size).
805 * re-load B-tree header data.
806 * re-read cnode data for all active vnodes.
807 */
808 int
809 hfs_reload(struct mount *mountp)
810 {
811 register struct vnode *devvp;
812 struct buf *bp;
813 int error, i;
814 struct hfsmount *hfsmp;
815 struct HFSPlusVolumeHeader *vhp;
816 ExtendedVCB *vcb;
817 struct filefork *forkp;
818 struct cat_desc cndesc;
819 struct hfs_reload_cargs args;
820 daddr64_t priIDSector;
821
822 hfsmp = VFSTOHFS(mountp);
823 vcb = HFSTOVCB(hfsmp);
824
825 if (vcb->vcbSigWord == kHFSSigWord)
826 return (EINVAL); /* rooting from HFS is not supported! */
827
828 /*
829 * Invalidate all cached meta-data.
830 */
831 devvp = hfsmp->hfs_devvp;
832 if (buf_invalidateblks(devvp, 0, 0, 0))
833 panic("hfs_reload: dirty1");
834
835 args.hfsmp = hfsmp;
836 args.error = 0;
837 /*
838 * hfs_reload_callback will be called for each vnode
839 * hung off of this mount point that can't be recycled...
840 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
841 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
842 * properly referenced and unreferenced around the callback
843 */
844 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
845
846 if (args.error)
847 return (args.error);
848
849 /*
850 * Re-read VolumeHeader from disk.
851 */
852 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
853 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
854
855 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
856 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
857 hfsmp->hfs_physical_block_size, NOCRED, &bp);
858 if (error) {
859 if (bp != NULL)
860 buf_brelse(bp);
861 return (error);
862 }
863
864 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
865
866 /* Do a quick sanity check */
867 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
868 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
869 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
870 SWAP_BE16(vhp->version) != kHFSXVersion) ||
871 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
872 buf_brelse(bp);
873 return (EIO);
874 }
875
876 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
877 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
878 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
879 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
880 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
881 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
882 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
883 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
884 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
885 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
886 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
887 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
888 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
889 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
890 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
891
892 /*
893 * Re-load meta-file vnode data (extent info, file size, etc).
894 */
895 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
896 for (i = 0; i < kHFSPlusExtentDensity; i++) {
897 forkp->ff_extents[i].startBlock =
898 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
899 forkp->ff_extents[i].blockCount =
900 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
901 }
902 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
903 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
904 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
905
906
907 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
908 for (i = 0; i < kHFSPlusExtentDensity; i++) {
909 forkp->ff_extents[i].startBlock =
910 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
911 forkp->ff_extents[i].blockCount =
912 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
913 }
914 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
915 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
916 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
917
918 if (hfsmp->hfs_attribute_vp) {
919 forkp = VTOF(hfsmp->hfs_attribute_vp);
920 for (i = 0; i < kHFSPlusExtentDensity; i++) {
921 forkp->ff_extents[i].startBlock =
922 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
923 forkp->ff_extents[i].blockCount =
924 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
925 }
926 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
927 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
928 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
929 }
930
931 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
932 for (i = 0; i < kHFSPlusExtentDensity; i++) {
933 forkp->ff_extents[i].startBlock =
934 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
935 forkp->ff_extents[i].blockCount =
936 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
937 }
938 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
939 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
940 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
941
942 buf_brelse(bp);
943 vhp = NULL;
944
945 /*
946 * Re-load B-tree header data
947 */
948 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
949 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
950 return (error);
951
952 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
953 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
954 return (error);
955
956 if (hfsmp->hfs_attribute_vp) {
957 forkp = VTOF(hfsmp->hfs_attribute_vp);
958 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
959 return (error);
960 }
961
962 /* Reload the volume name */
963 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
964 return (error);
965 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
966 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
967 cat_releasedesc(&cndesc);
968
969 /* Re-establish private/hidden directories. */
970 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
971 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
972
973 /* In case any volume information changed to trigger a notification */
974 hfs_generate_volume_notifications(hfsmp);
975
976 return (0);
977 }
978
979
980
981 static void
982 hfs_syncer(void *arg0, void *unused)
983 {
984 #pragma unused(unused)
985
986 struct hfsmount *hfsmp = arg0;
987 clock_sec_t secs;
988 clock_usec_t usecs;
989 uint32_t delay = HFS_META_DELAY;
990 uint64_t now;
991 static int no_max=1;
992
993 clock_get_calendar_microtime(&secs, &usecs);
994 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
995
996 //
997 // If the amount of pending writes is more than our limit, wait
998 // for 2/3 of it to drain and then flush the journal.
999 //
1000 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1001 int counter=0;
1002 uint64_t pending_io, start, rate = 0;
1003
1004 no_max = 0;
1005
1006 hfs_start_transaction(hfsmp); // so we hold off any new i/o's
1007
1008 pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1009
1010 clock_get_calendar_microtime(&secs, &usecs);
1011 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1012
1013 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1014 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1015 }
1016
1017 if (counter >= 500) {
1018 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1019 }
1020
1021 if (hfsmp->jnl) {
1022 journal_flush(hfsmp->jnl, FALSE);
1023 } else {
1024 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1025 }
1026
1027 clock_get_calendar_microtime(&secs, &usecs);
1028 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1029 hfsmp->hfs_last_sync_time = now;
1030 if (now != start) {
1031 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second
1032 }
1033
1034 hfs_end_transaction(hfsmp);
1035
1036 //
1037 // If a reasonable amount of time elapsed then check the
1038 // i/o rate. If it's taking less than 1 second or more
1039 // than 2 seconds, adjust hfs_max_pending_io so that we
1040 // will allow about 1.5 seconds of i/o to queue up.
1041 //
1042 if (((now - start) >= 300000) && (rate != 0)) {
1043 uint64_t scale = (pending_io * 100) / rate;
1044
1045 if (scale < 100 || scale > 200) {
1046 // set it so that it should take about 1.5 seconds to drain
1047 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1048 }
1049 }
1050
1051 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1052 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1053 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1054 && (hfsmp->hfs_active_threads == 0)
1055 && (hfsmp->hfs_global_lock_nesting == 0))) {
1056
1057 //
1058 // Flush the journal if more than 5 seconds elapsed since
1059 // the last sync OR we have not sync'ed recently and the
1060 // last sync request time was more than 100 milliseconds
1061 // ago and no one is in the middle of a transaction right
1062 // now. Else we defer the sync and reschedule it.
1063 //
1064 if (hfsmp->jnl) {
1065 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1066
1067 journal_flush(hfsmp->jnl, FALSE);
1068
1069 hfs_unlock_global (hfsmp);
1070 } else {
1071 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1072 }
1073
1074 clock_get_calendar_microtime(&secs, &usecs);
1075 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1076 hfsmp->hfs_last_sync_time = now;
1077
1078 } else if (hfsmp->hfs_active_threads == 0) {
1079 uint64_t deadline;
1080
1081 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1082 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1083
1084 // note: we intentionally return early here and do not
1085 // decrement the sync_scheduled and sync_incomplete
1086 // variables because we rescheduled the timer.
1087
1088 return;
1089 }
1090
1091 //
1092 // NOTE: we decrement these *after* we're done the journal_flush() since
1093 // it can take a significant amount of time and so we don't want more
1094 // callbacks scheduled until we're done this one.
1095 //
1096 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1097 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1098 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1099 }
1100
1101
1102 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1103
1104 /*
1105 * Initialization code for Red-Black Tree Allocator
1106 *
1107 * This function will build the two red-black trees necessary for allocating space
1108 * from the metadata zone as well as normal allocations. Currently, we use
1109 * an advisory read to get most of the data into the buffer cache.
1110 * This function is intended to be run in a separate thread so as not to slow down mount.
1111 *
1112 */
1113
1114 void
1115 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1116
1117 #if CONFIG_HFS_ALLOC_RBTREE
1118 u_int32_t err;
1119
1120 /*
1121 * Take the allocation file lock. Journal transactions will block until
1122 * we're done here.
1123 */
1124 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1125
1126 /*
1127 * GenerateTree assumes that the bitmap lock is held when you call the function.
1128 * It will drop and re-acquire the lock periodically as needed to let other allocations
1129 * through. It returns with the bitmap lock held. Since we only maintain one tree,
1130 * we don't need to specify a start block (always starts at 0).
1131 */
1132 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1133 if (err) {
1134 goto bailout;
1135 }
1136 /* Mark offset tree as built */
1137 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1138
1139 bailout:
1140 /*
1141 * GenerateTree may drop the bitmap lock during operation in order to give other
1142 * threads a chance to allocate blocks, but it will always return with the lock held, so
1143 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1144 */
1145 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1146 if (err != 0) {
1147 /* Wakeup any waiters on the allocation bitmap lock */
1148 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1149 }
1150
1151 hfs_systemfile_unlock(hfsmp, flags);
1152 #else
1153 #pragma unused (hfsmp)
1154 #endif
1155 }
1156
1157
1158 /*
1159 * Teardown code for the Red-Black Tree allocator.
1160 * This function consolidates the code which serializes with respect
1161 * to a thread that may be potentially still building the tree when we need to begin
1162 * tearing it down. Since the red-black tree may not be live when we enter this function
1163 * we return:
1164 * 1 -> Tree was live.
1165 * 0 -> Tree was not active at time of call.
1166 */
1167
1168 int
1169 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1170 int rb_used = 0;
1171
1172 #if CONFIG_HFS_ALLOC_RBTREE
1173
1174 int flags = 0;
1175
1176 /*
1177 * Check to see if the tree-generation is still on-going.
1178 * If it is, then block until it's done.
1179 */
1180
1181 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1182
1183
1184 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1185 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1186
1187 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1188 &hfsmp->extent_tree_flags, THREAD_UNINT);
1189 }
1190
1191 if (hfs_isrbtree_active (hfsmp)) {
1192 rb_used = 1;
1193
1194 /* Tear down the RB Trees while we have the bitmap locked */
1195 DestroyTrees(hfsmp);
1196
1197 }
1198
1199 hfs_systemfile_unlock(hfsmp, flags);
1200 #else
1201 #pragma unused (hfsmp)
1202 #endif
1203 return rb_used;
1204
1205 }
1206
1207
1208 static int hfs_root_unmounted_cleanly = 0;
1209
1210 SYSCTL_DECL(_vfs_generic);
1211 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1212
1213 /*
1214 * Common code for mount and mountroot
1215 */
1216 int
1217 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1218 int journal_replay_only, vfs_context_t context)
1219 {
1220 struct proc *p = vfs_context_proc(context);
1221 int retval = E_NONE;
1222 struct hfsmount *hfsmp = NULL;
1223 struct buf *bp;
1224 dev_t dev;
1225 HFSMasterDirectoryBlock *mdbp = NULL;
1226 int ronly;
1227 #if QUOTA
1228 int i;
1229 #endif
1230 int mntwrapper;
1231 kauth_cred_t cred;
1232 u_int64_t disksize;
1233 daddr64_t log_blkcnt;
1234 u_int32_t log_blksize;
1235 u_int32_t phys_blksize;
1236 u_int32_t minblksize;
1237 u_int32_t iswritable;
1238 daddr64_t mdb_offset;
1239 int isvirtual = 0;
1240 int isroot = 0;
1241 int isssd;
1242 #if CONFIG_HFS_ALLOC_RBTREE
1243 thread_t allocator_thread;
1244 #endif
1245
1246 if (args == NULL) {
1247 /* only hfs_mountroot passes us NULL as the 'args' argument */
1248 isroot = 1;
1249 }
1250
1251 ronly = vfs_isrdonly(mp);
1252 dev = vnode_specrdev(devvp);
1253 cred = p ? vfs_context_ucred(context) : NOCRED;
1254 mntwrapper = 0;
1255
1256 bp = NULL;
1257 hfsmp = NULL;
1258 mdbp = NULL;
1259 minblksize = kHFSBlockSize;
1260
1261 /* Advisory locking should be handled at the VFS layer */
1262 vfs_setlocklocal(mp);
1263
1264 /* Get the logical block size (treated as physical block size everywhere) */
1265 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1266 if (HFS_MOUNT_DEBUG) {
1267 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1268 }
1269 retval = ENXIO;
1270 goto error_exit;
1271 }
1272 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1273 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1274 retval = ENXIO;
1275 goto error_exit;
1276 }
1277
1278 /* Get the physical block size. */
1279 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1280 if (retval) {
1281 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1282 if (HFS_MOUNT_DEBUG) {
1283 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1284 }
1285 retval = ENXIO;
1286 goto error_exit;
1287 }
1288 /* If device does not support this ioctl, assume that physical
1289 * block size is same as logical block size
1290 */
1291 phys_blksize = log_blksize;
1292 }
1293 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1294 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1295 retval = ENXIO;
1296 goto error_exit;
1297 }
1298
1299 /* Switch to 512 byte sectors (temporarily) */
1300 if (log_blksize > 512) {
1301 u_int32_t size512 = 512;
1302
1303 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1304 if (HFS_MOUNT_DEBUG) {
1305 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1306 }
1307 retval = ENXIO;
1308 goto error_exit;
1309 }
1310 }
1311 /* Get the number of 512 byte physical blocks. */
1312 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1313 /* resetting block size may fail if getting block count did */
1314 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1315 if (HFS_MOUNT_DEBUG) {
1316 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1317 }
1318 retval = ENXIO;
1319 goto error_exit;
1320 }
1321 /* Compute an accurate disk size (i.e. within 512 bytes) */
1322 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1323
1324 /*
1325 * On Tiger it is not necessary to switch the device
1326 * block size to be 4k if there are more than 31-bits
1327 * worth of blocks but to insure compatibility with
1328 * pre-Tiger systems we have to do it.
1329 *
1330 * If the device size is not a multiple of 4K (8 * 512), then
1331 * switching the logical block size isn't going to help because
1332 * we will be unable to write the alternate volume header.
1333 * In this case, just leave the logical block size unchanged.
1334 */
1335 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1336 minblksize = log_blksize = 4096;
1337 if (phys_blksize < log_blksize)
1338 phys_blksize = log_blksize;
1339 }
1340
1341 /*
1342 * The cluster layer is not currently prepared to deal with a logical
1343 * block size larger than the system's page size. (It can handle
1344 * blocks per page, but not multiple pages per block.) So limit the
1345 * logical block size to the page size.
1346 */
1347 if (log_blksize > PAGE_SIZE)
1348 log_blksize = PAGE_SIZE;
1349
1350 /* Now switch to our preferred physical block size. */
1351 if (log_blksize > 512) {
1352 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1353 if (HFS_MOUNT_DEBUG) {
1354 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1355 }
1356 retval = ENXIO;
1357 goto error_exit;
1358 }
1359 /* Get the count of physical blocks. */
1360 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1361 if (HFS_MOUNT_DEBUG) {
1362 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1363 }
1364 retval = ENXIO;
1365 goto error_exit;
1366 }
1367 }
1368 /*
1369 * At this point:
1370 * minblksize is the minimum physical block size
1371 * log_blksize has our preferred physical block size
1372 * log_blkcnt has the total number of physical blocks
1373 */
1374
1375 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1376 if ((retval = (int)buf_meta_bread(devvp,
1377 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1378 phys_blksize, cred, &bp))) {
1379 if (HFS_MOUNT_DEBUG) {
1380 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1381 }
1382 goto error_exit;
1383 }
1384 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1385 if (mdbp == NULL) {
1386 retval = ENOMEM;
1387 if (HFS_MOUNT_DEBUG) {
1388 printf("hfs_mountfs: MALLOC failed\n");
1389 }
1390 goto error_exit;
1391 }
1392 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1393 buf_brelse(bp);
1394 bp = NULL;
1395
1396 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1397 if (hfsmp == NULL) {
1398 if (HFS_MOUNT_DEBUG) {
1399 printf("hfs_mountfs: MALLOC (2) failed\n");
1400 }
1401 retval = ENOMEM;
1402 goto error_exit;
1403 }
1404 bzero(hfsmp, sizeof(struct hfsmount));
1405
1406 hfs_chashinit_finish(hfsmp);
1407
1408 /*
1409 * See if the disk is a solid state device. We need this to decide what to do about
1410 * hotfiles.
1411 */
1412 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1413 if (isssd) {
1414 hfsmp->hfs_flags |= HFS_SSD;
1415 }
1416 }
1417
1418
1419 /*
1420 * Init the volume information structure
1421 */
1422
1423 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1424 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1425 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1426 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1427 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1428
1429 vfs_setfsprivate(mp, hfsmp);
1430 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1431 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1432 hfsmp->hfs_devvp = devvp;
1433 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1434 hfsmp->hfs_logical_block_size = log_blksize;
1435 hfsmp->hfs_logical_block_count = log_blkcnt;
1436 hfsmp->hfs_physical_block_size = phys_blksize;
1437 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1438 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1439 if (ronly)
1440 hfsmp->hfs_flags |= HFS_READ_ONLY;
1441 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1442 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1443
1444 #if QUOTA
1445 for (i = 0; i < MAXQUOTAS; i++)
1446 dqfileinit(&hfsmp->hfs_qfiles[i]);
1447 #endif
1448
1449 if (args) {
1450 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1451 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1452 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1453 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1454 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1455 if (args->hfs_mask != (mode_t)VNOVAL) {
1456 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1457 if (args->flags & HFSFSMNT_NOXONFILES) {
1458 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1459 } else {
1460 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1461 }
1462 } else {
1463 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1464 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1465 }
1466 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1467 mntwrapper = 1;
1468 } else {
1469 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1470 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1471 hfsmp->hfs_uid = UNKNOWNUID;
1472 hfsmp->hfs_gid = UNKNOWNGID;
1473 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1474 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1475 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1476 }
1477 }
1478
1479 /* Find out if disk media is writable. */
1480 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1481 if (iswritable)
1482 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1483 else
1484 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1485 }
1486
1487 // record the current time at which we're mounting this volume
1488 struct timeval tv;
1489 microtime(&tv);
1490 hfsmp->hfs_mount_time = tv.tv_sec;
1491
1492 /* Mount a standard HFS disk */
1493 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1494 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1495
1496 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1497 if (vfs_isrdwr(mp)) {
1498 retval = EROFS;
1499 goto error_exit;
1500 }
1501
1502 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1503
1504 /* Treat it as if it's read-only and not writeable */
1505 hfsmp->hfs_flags |= HFS_READ_ONLY;
1506 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1507
1508 /* If only journal replay is requested, exit immediately */
1509 if (journal_replay_only) {
1510 retval = 0;
1511 goto error_exit;
1512 }
1513
1514 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1515 retval = EINVAL; /* Cannot root from HFS standard disks */
1516 goto error_exit;
1517 }
1518 /* HFS disks can only use 512 byte physical blocks */
1519 if (log_blksize > kHFSBlockSize) {
1520 log_blksize = kHFSBlockSize;
1521 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1522 retval = ENXIO;
1523 goto error_exit;
1524 }
1525 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1526 retval = ENXIO;
1527 goto error_exit;
1528 }
1529 hfsmp->hfs_logical_block_size = log_blksize;
1530 hfsmp->hfs_logical_block_count = log_blkcnt;
1531 hfsmp->hfs_physical_block_size = log_blksize;
1532 hfsmp->hfs_log_per_phys = 1;
1533 }
1534 if (args) {
1535 hfsmp->hfs_encoding = args->hfs_encoding;
1536 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1537
1538 /* establish the timezone */
1539 gTimeZone = args->hfs_timezone;
1540 }
1541
1542 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1543 &hfsmp->hfs_get_hfsname);
1544 if (retval)
1545 goto error_exit;
1546
1547 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1548 if (retval)
1549 (void) hfs_relconverter(hfsmp->hfs_encoding);
1550
1551 } else /* Mount an HFS Plus disk */ {
1552 HFSPlusVolumeHeader *vhp;
1553 off_t embeddedOffset;
1554 int jnl_disable = 0;
1555
1556 /* Get the embedded Volume Header */
1557 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1558 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1559 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1560 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1561
1562 /*
1563 * If the embedded volume doesn't start on a block
1564 * boundary, then switch the device to a 512-byte
1565 * block size so everything will line up on a block
1566 * boundary.
1567 */
1568 if ((embeddedOffset % log_blksize) != 0) {
1569 printf("hfs_mountfs: embedded volume offset not"
1570 " a multiple of physical block size (%d);"
1571 " switching to 512\n", log_blksize);
1572 log_blksize = 512;
1573 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1574 (caddr_t)&log_blksize, FWRITE, context)) {
1575
1576 if (HFS_MOUNT_DEBUG) {
1577 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1578 }
1579 retval = ENXIO;
1580 goto error_exit;
1581 }
1582 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1583 (caddr_t)&log_blkcnt, 0, context)) {
1584 if (HFS_MOUNT_DEBUG) {
1585 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1586 }
1587 retval = ENXIO;
1588 goto error_exit;
1589 }
1590 /* Note: relative block count adjustment */
1591 hfsmp->hfs_logical_block_count *=
1592 hfsmp->hfs_logical_block_size / log_blksize;
1593
1594 /* Update logical /physical block size */
1595 hfsmp->hfs_logical_block_size = log_blksize;
1596 hfsmp->hfs_physical_block_size = log_blksize;
1597 phys_blksize = log_blksize;
1598 hfsmp->hfs_log_per_phys = 1;
1599 }
1600
1601 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1602 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1603
1604 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1605
1606 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1607 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1608 phys_blksize, cred, &bp);
1609 if (retval) {
1610 if (HFS_MOUNT_DEBUG) {
1611 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1612 }
1613 goto error_exit;
1614 }
1615 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1616 buf_brelse(bp);
1617 bp = NULL;
1618 vhp = (HFSPlusVolumeHeader*) mdbp;
1619
1620 } else /* pure HFS+ */ {
1621 embeddedOffset = 0;
1622 vhp = (HFSPlusVolumeHeader*) mdbp;
1623 }
1624
1625 if (isroot) {
1626 hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1627 }
1628
1629 /*
1630 * On inconsistent disks, do not allow read-write mount
1631 * unless it is the boot volume being mounted. We also
1632 * always want to replay the journal if the journal_replay_only
1633 * flag is set because that will (most likely) get the
1634 * disk into a consistent state before fsck_hfs starts
1635 * looking at it.
1636 */
1637 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1638 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1639 && !journal_replay_only
1640 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1641
1642 if (HFS_MOUNT_DEBUG) {
1643 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1644 }
1645 retval = EINVAL;
1646 goto error_exit;
1647 }
1648
1649
1650 // XXXdbg
1651 //
1652 hfsmp->jnl = NULL;
1653 hfsmp->jvp = NULL;
1654 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1655 args->journal_disable) {
1656 jnl_disable = 1;
1657 }
1658
1659 //
1660 // We only initialize the journal here if the last person
1661 // to mount this volume was journaling aware. Otherwise
1662 // we delay journal initialization until later at the end
1663 // of hfs_MountHFSPlusVolume() because the last person who
1664 // mounted it could have messed things up behind our back
1665 // (so we need to go find the .journal file, make sure it's
1666 // the right size, re-sync up if it was moved, etc).
1667 //
1668 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1669 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1670 && !jnl_disable) {
1671
1672 // if we're able to init the journal, mark the mount
1673 // point as journaled.
1674 //
1675 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1676 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1677 } else {
1678 if (retval == EROFS) {
1679 // EROFS is a special error code that means the volume has an external
1680 // journal which we couldn't find. in that case we do not want to
1681 // rewrite the volume header - we'll just refuse to mount the volume.
1682 if (HFS_MOUNT_DEBUG) {
1683 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1684 }
1685 retval = EINVAL;
1686 goto error_exit;
1687 }
1688
1689 // if the journal failed to open, then set the lastMountedVersion
1690 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1691 // of just bailing out because the volume is journaled.
1692 if (!ronly) {
1693 if (HFS_MOUNT_DEBUG) {
1694 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1695 }
1696
1697 HFSPlusVolumeHeader *jvhp;
1698
1699 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1700
1701 if (mdb_offset == 0) {
1702 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1703 }
1704
1705 bp = NULL;
1706 retval = (int)buf_meta_bread(devvp,
1707 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1708 phys_blksize, cred, &bp);
1709 if (retval == 0) {
1710 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1711
1712 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1713 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1714 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1715 buf_bwrite(bp);
1716 } else {
1717 buf_brelse(bp);
1718 }
1719 bp = NULL;
1720 } else if (bp) {
1721 buf_brelse(bp);
1722 // clear this so the error exit path won't try to use it
1723 bp = NULL;
1724 }
1725 }
1726
1727 // if this isn't the root device just bail out.
1728 // If it is the root device we just continue on
1729 // in the hopes that fsck_hfs will be able to
1730 // fix any damage that exists on the volume.
1731 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1732 if (HFS_MOUNT_DEBUG) {
1733 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1734 }
1735 retval = EINVAL;
1736 goto error_exit;
1737 }
1738 }
1739 }
1740 // XXXdbg
1741
1742 /* Either the journal is replayed successfully, or there
1743 * was nothing to replay, or no journal exists. In any case,
1744 * return success.
1745 */
1746 if (journal_replay_only) {
1747 retval = 0;
1748 goto error_exit;
1749 }
1750
1751 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1752
1753 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1754 /*
1755 * If the backend didn't like our physical blocksize
1756 * then retry with physical blocksize of 512.
1757 */
1758 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1759 printf("hfs_mountfs: could not use physical block size "
1760 "(%d) switching to 512\n", log_blksize);
1761 log_blksize = 512;
1762 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1763 if (HFS_MOUNT_DEBUG) {
1764 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1765 }
1766 retval = ENXIO;
1767 goto error_exit;
1768 }
1769 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1770 if (HFS_MOUNT_DEBUG) {
1771 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1772 }
1773 retval = ENXIO;
1774 goto error_exit;
1775 }
1776 devvp->v_specsize = log_blksize;
1777 /* Note: relative block count adjustment (in case this is an embedded volume). */
1778 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1779 hfsmp->hfs_logical_block_size = log_blksize;
1780 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1781
1782 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1783 // close and re-open this with the new block size
1784 journal_close(hfsmp->jnl);
1785 hfsmp->jnl = NULL;
1786 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1787 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1788 } else {
1789 // if the journal failed to open, then set the lastMountedVersion
1790 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1791 // of just bailing out because the volume is journaled.
1792 if (!ronly) {
1793 if (HFS_MOUNT_DEBUG) {
1794 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1795 }
1796 HFSPlusVolumeHeader *jvhp;
1797
1798 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1799
1800 if (mdb_offset == 0) {
1801 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1802 }
1803
1804 bp = NULL;
1805 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1806 phys_blksize, cred, &bp);
1807 if (retval == 0) {
1808 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1809
1810 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1811 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1812 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1813 buf_bwrite(bp);
1814 } else {
1815 buf_brelse(bp);
1816 }
1817 bp = NULL;
1818 } else if (bp) {
1819 buf_brelse(bp);
1820 // clear this so the error exit path won't try to use it
1821 bp = NULL;
1822 }
1823 }
1824
1825 // if this isn't the root device just bail out.
1826 // If it is the root device we just continue on
1827 // in the hopes that fsck_hfs will be able to
1828 // fix any damage that exists on the volume.
1829 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1830 if (HFS_MOUNT_DEBUG) {
1831 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1832 }
1833 retval = EINVAL;
1834 goto error_exit;
1835 }
1836 }
1837 }
1838
1839 /* Try again with a smaller block size... */
1840 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1841 if (retval && HFS_MOUNT_DEBUG) {
1842 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1843 }
1844 }
1845 if (retval)
1846 (void) hfs_relconverter(0);
1847 }
1848
1849 // save off a snapshot of the mtime from the previous mount
1850 // (for matador).
1851 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1852
1853 if ( retval ) {
1854 if (HFS_MOUNT_DEBUG) {
1855 printf("hfs_mountfs: encountered failure %d \n", retval);
1856 }
1857 goto error_exit;
1858 }
1859
1860 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1861 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1862 vfs_setmaxsymlen(mp, 0);
1863
1864 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1865 #if NAMEDSTREAMS
1866 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1867 #endif
1868 if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1869 /* Tell VFS that we support directory hard links. */
1870 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1871 } else {
1872 /* HFS standard doesn't support extended readdir! */
1873 mount_set_noreaddirext (mp);
1874 }
1875
1876 if (args) {
1877 /*
1878 * Set the free space warning levels for a non-root volume:
1879 *
1880 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1881 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1882 * whichever is less. And last, set the "desired" freespace level to
1883 * to 3% of the volume size or 200MB, whichever is less.
1884 */
1885 hfsmp->hfs_freespace_notify_dangerlimit =
1886 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1887 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1888 hfsmp->hfs_freespace_notify_warninglimit =
1889 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1890 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1891 hfsmp->hfs_freespace_notify_desiredlevel =
1892 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1893 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1894 } else {
1895 /*
1896 * Set the free space warning levels for the root volume:
1897 *
1898 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1899 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1900 * whichever is less. And last, set the "desired" freespace level to
1901 * to 11% of the volume size or 1.25GB, whichever is less.
1902 */
1903 hfsmp->hfs_freespace_notify_dangerlimit =
1904 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1905 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1906 hfsmp->hfs_freespace_notify_warninglimit =
1907 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1908 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1909 hfsmp->hfs_freespace_notify_desiredlevel =
1910 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1911 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1912 };
1913
1914 /* Check if the file system exists on virtual device, like disk image */
1915 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1916 if (isvirtual) {
1917 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1918 }
1919 }
1920
1921 /* do not allow ejectability checks on the root device */
1922 if (isroot == 0) {
1923 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1924 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1925 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with.
1926 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1927 if (hfsmp->hfs_syncer == NULL) {
1928 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1929 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1930 }
1931 }
1932 }
1933
1934 #if CONFIG_HFS_ALLOC_RBTREE
1935 /*
1936 * We spawn a thread to create the pair of red-black trees for this volume.
1937 * However, in so doing, we must be careful to ensure that if this thread is still
1938 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1939 * we'll need to set a bit that indicates we're in progress building the trees here.
1940 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1941 * notifies the tree generation code that an unmount is waiting. Also mark the bit that
1942 * indicates the tree is live and operating.
1943 *
1944 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1945 */
1946
1947 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1948 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1949
1950 /* Initialize EOF counter so that the thread can assume it started at initial values */
1951 hfsmp->offset_block_end = 0;
1952 InitTree(hfsmp);
1953
1954 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1955 thread_deallocate(allocator_thread);
1956 }
1957
1958 #endif
1959
1960 /*
1961 * Start looking for free space to drop below this level and generate a
1962 * warning immediately if needed:
1963 */
1964 hfsmp->hfs_notification_conditions = 0;
1965 hfs_generate_volume_notifications(hfsmp);
1966
1967 if (ronly == 0) {
1968 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1969 }
1970 FREE(mdbp, M_TEMP);
1971 return (0);
1972
1973 error_exit:
1974 if (bp)
1975 buf_brelse(bp);
1976 if (mdbp)
1977 FREE(mdbp, M_TEMP);
1978
1979 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1980 vnode_clearmountedon(hfsmp->jvp);
1981 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1982 hfsmp->jvp = NULL;
1983 }
1984 if (hfsmp) {
1985 if (hfsmp->hfs_devvp) {
1986 vnode_rele(hfsmp->hfs_devvp);
1987 }
1988 hfs_delete_chash(hfsmp);
1989
1990 FREE(hfsmp, M_HFSMNT);
1991 vfs_setfsprivate(mp, NULL);
1992 }
1993 return (retval);
1994 }
1995
1996
1997 /*
1998 * Make a filesystem operational.
1999 * Nothing to do at the moment.
2000 */
2001 /* ARGSUSED */
2002 static int
2003 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2004 {
2005 return (0);
2006 }
2007
2008
2009 /*
2010 * unmount system call
2011 */
2012 int
2013 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2014 {
2015 struct proc *p = vfs_context_proc(context);
2016 struct hfsmount *hfsmp = VFSTOHFS(mp);
2017 int retval = E_NONE;
2018 int flags;
2019 int force;
2020 int started_tr = 0;
2021 int rb_used = 0;
2022
2023 flags = 0;
2024 force = 0;
2025 if (mntflags & MNT_FORCE) {
2026 flags |= FORCECLOSE;
2027 force = 1;
2028 }
2029
2030 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2031 return (retval);
2032
2033 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2034 (void) hfs_recording_suspend(hfsmp);
2035
2036 /*
2037 * Cancel any pending timers for this volume. Then wait for any timers
2038 * which have fired, but whose callbacks have not yet completed.
2039 */
2040 if (hfsmp->hfs_syncer)
2041 {
2042 struct timespec ts = {0, 100000000}; /* 0.1 seconds */
2043
2044 /*
2045 * Cancel any timers that have been scheduled, but have not
2046 * fired yet. NOTE: The kernel considers a timer complete as
2047 * soon as it starts your callback, so the kernel does not
2048 * keep track of the number of callbacks in progress.
2049 */
2050 if (thread_call_cancel(hfsmp->hfs_syncer))
2051 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2052 thread_call_free(hfsmp->hfs_syncer);
2053 hfsmp->hfs_syncer = NULL;
2054
2055 /*
2056 * This waits for all of the callbacks that were entered before
2057 * we did thread_call_cancel above, but have not completed yet.
2058 */
2059 while(hfsmp->hfs_sync_incomplete > 0)
2060 {
2061 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2062 }
2063
2064 if (hfsmp->hfs_sync_incomplete < 0)
2065 panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2066 }
2067
2068 #if CONFIG_HFS_ALLOC_RBTREE
2069 rb_used = hfs_teardown_allocator(hfsmp);
2070 #endif
2071
2072 /*
2073 * Flush out the b-trees, volume bitmap and Volume Header
2074 */
2075 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2076 retval = hfs_start_transaction(hfsmp);
2077 if (retval == 0) {
2078 started_tr = 1;
2079 } else if (!force) {
2080 goto err_exit;
2081 }
2082
2083 if (hfsmp->hfs_startup_vp) {
2084 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2085 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2086 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2087 if (retval && !force)
2088 goto err_exit;
2089 }
2090
2091 if (hfsmp->hfs_attribute_vp) {
2092 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2093 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2094 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2095 if (retval && !force)
2096 goto err_exit;
2097 }
2098
2099 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2100 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2101 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2102 if (retval && !force)
2103 goto err_exit;
2104
2105 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2106 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2107 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2108 if (retval && !force)
2109 goto err_exit;
2110
2111 if (hfsmp->hfs_allocation_vp) {
2112 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2113 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2114 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2115 if (retval && !force)
2116 goto err_exit;
2117 }
2118
2119 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2120 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2121 if (retval && !force)
2122 goto err_exit;
2123 }
2124
2125 /* If runtime corruption was detected, indicate that the volume
2126 * was not unmounted cleanly.
2127 */
2128 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2129 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2130 } else {
2131 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2132 }
2133
2134
2135 if (rb_used) {
2136 /* If the rb-tree was live, just set min_start to 0 */
2137 hfsmp->nextAllocation = 0;
2138 }
2139 else {
2140 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2141 int i;
2142 u_int32_t min_start = hfsmp->totalBlocks;
2143
2144 // set the nextAllocation pointer to the smallest free block number
2145 // we've seen so on the next mount we won't rescan unnecessarily
2146 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2147 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2148 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2149 min_start = hfsmp->vcbFreeExt[i].startBlock;
2150 }
2151 }
2152 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2153 if (min_start < hfsmp->nextAllocation) {
2154 hfsmp->nextAllocation = min_start;
2155 }
2156 }
2157 }
2158
2159
2160 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2161 if (retval) {
2162 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2163 if (!force)
2164 goto err_exit; /* could not flush everything */
2165 }
2166
2167 if (started_tr) {
2168 hfs_end_transaction(hfsmp);
2169 started_tr = 0;
2170 }
2171 }
2172
2173 if (hfsmp->jnl) {
2174 hfs_journal_flush(hfsmp, FALSE);
2175 }
2176
2177 /*
2178 * Invalidate our caches and release metadata vnodes
2179 */
2180 (void) hfsUnmount(hfsmp, p);
2181
2182 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2183 (void) hfs_relconverter(hfsmp->hfs_encoding);
2184
2185 // XXXdbg
2186 if (hfsmp->jnl) {
2187 journal_close(hfsmp->jnl);
2188 hfsmp->jnl = NULL;
2189 }
2190
2191 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2192
2193 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2194 vnode_clearmountedon(hfsmp->jvp);
2195 retval = VNOP_CLOSE(hfsmp->jvp,
2196 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2197 vfs_context_kernel());
2198 vnode_put(hfsmp->jvp);
2199 hfsmp->jvp = NULL;
2200 }
2201 // XXXdbg
2202
2203 /*
2204 * Last chance to dump unreferenced system files.
2205 */
2206 (void) vflush(mp, NULLVP, FORCECLOSE);
2207
2208 #if HFS_SPARSE_DEV
2209 /* Drop our reference on the backing fs (if any). */
2210 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2211 struct vnode * tmpvp;
2212
2213 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2214 tmpvp = hfsmp->hfs_backingfs_rootvp;
2215 hfsmp->hfs_backingfs_rootvp = NULLVP;
2216 vnode_rele(tmpvp);
2217 }
2218 #endif /* HFS_SPARSE_DEV */
2219 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2220 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2221 vnode_rele(hfsmp->hfs_devvp);
2222
2223 hfs_delete_chash(hfsmp);
2224 FREE(hfsmp, M_HFSMNT);
2225
2226 return (0);
2227
2228 err_exit:
2229 if (started_tr) {
2230 hfs_end_transaction(hfsmp);
2231 }
2232 return retval;
2233 }
2234
2235
2236 /*
2237 * Return the root of a filesystem.
2238 */
2239 static int
2240 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2241 {
2242 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2243 }
2244
2245
2246 /*
2247 * Do operations associated with quotas
2248 */
2249 #if !QUOTA
2250 static int
2251 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2252 {
2253 return (ENOTSUP);
2254 }
2255 #else
2256 static int
2257 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2258 {
2259 struct proc *p = vfs_context_proc(context);
2260 int cmd, type, error;
2261
2262 if (uid == ~0U)
2263 uid = kauth_cred_getuid(vfs_context_ucred(context));
2264 cmd = cmds >> SUBCMDSHIFT;
2265
2266 switch (cmd) {
2267 case Q_SYNC:
2268 case Q_QUOTASTAT:
2269 break;
2270 case Q_GETQUOTA:
2271 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2272 break;
2273 /* fall through */
2274 default:
2275 if ( (error = vfs_context_suser(context)) )
2276 return (error);
2277 }
2278
2279 type = cmds & SUBCMDMASK;
2280 if ((u_int)type >= MAXQUOTAS)
2281 return (EINVAL);
2282 if (vfs_busy(mp, LK_NOWAIT))
2283 return (0);
2284
2285 switch (cmd) {
2286
2287 case Q_QUOTAON:
2288 error = hfs_quotaon(p, mp, type, datap);
2289 break;
2290
2291 case Q_QUOTAOFF:
2292 error = hfs_quotaoff(p, mp, type);
2293 break;
2294
2295 case Q_SETQUOTA:
2296 error = hfs_setquota(mp, uid, type, datap);
2297 break;
2298
2299 case Q_SETUSE:
2300 error = hfs_setuse(mp, uid, type, datap);
2301 break;
2302
2303 case Q_GETQUOTA:
2304 error = hfs_getquota(mp, uid, type, datap);
2305 break;
2306
2307 case Q_SYNC:
2308 error = hfs_qsync(mp);
2309 break;
2310
2311 case Q_QUOTASTAT:
2312 error = hfs_quotastat(mp, type, datap);
2313 break;
2314
2315 default:
2316 error = EINVAL;
2317 break;
2318 }
2319 vfs_unbusy(mp);
2320
2321 return (error);
2322 }
2323 #endif /* QUOTA */
2324
2325 /* Subtype is composite of bits */
2326 #define HFS_SUBTYPE_JOURNALED 0x01
2327 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2328 /* bits 2 - 6 reserved */
2329 #define HFS_SUBTYPE_STANDARDHFS 0x80
2330
2331 /*
2332 * Get file system statistics.
2333 */
2334 int
2335 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2336 {
2337 ExtendedVCB *vcb = VFSTOVCB(mp);
2338 struct hfsmount *hfsmp = VFSTOHFS(mp);
2339 u_int32_t freeCNIDs;
2340 u_int16_t subtype = 0;
2341
2342 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2343
2344 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2345 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2346 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2347 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2348 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2349 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2350 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2351
2352 /*
2353 * Subtypes (flavors) for HFS
2354 * 0: Mac OS Extended
2355 * 1: Mac OS Extended (Journaled)
2356 * 2: Mac OS Extended (Case Sensitive)
2357 * 3: Mac OS Extended (Case Sensitive, Journaled)
2358 * 4 - 127: Reserved
2359 * 128: Mac OS Standard
2360 *
2361 */
2362 if (hfsmp->hfs_flags & HFS_STANDARD) {
2363 subtype = HFS_SUBTYPE_STANDARDHFS;
2364 } else /* HFS Plus */ {
2365 if (hfsmp->jnl)
2366 subtype |= HFS_SUBTYPE_JOURNALED;
2367 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2368 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2369 }
2370 sbp->f_fssubtype = subtype;
2371
2372 return (0);
2373 }
2374
2375
2376 //
2377 // XXXdbg -- this is a callback to be used by the journal to
2378 // get meta data blocks flushed out to disk.
2379 //
2380 // XXXdbg -- be smarter and don't flush *every* block on each
2381 // call. try to only flush some so we don't wind up
2382 // being too synchronous.
2383 //
2384 __private_extern__
2385 void
2386 hfs_sync_metadata(void *arg)
2387 {
2388 struct mount *mp = (struct mount *)arg;
2389 struct hfsmount *hfsmp;
2390 ExtendedVCB *vcb;
2391 buf_t bp;
2392 int retval;
2393 daddr64_t priIDSector;
2394 hfsmp = VFSTOHFS(mp);
2395 vcb = HFSTOVCB(hfsmp);
2396
2397 // now make sure the super block is flushed
2398 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2399 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2400
2401 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2402 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2403 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2404 if ((retval != 0 ) && (retval != ENXIO)) {
2405 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2406 (int)priIDSector, retval);
2407 }
2408
2409 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2410 buf_bwrite(bp);
2411 } else if (bp) {
2412 buf_brelse(bp);
2413 }
2414
2415 // the alternate super block...
2416 // XXXdbg - we probably don't need to do this each and every time.
2417 // hfs_btreeio.c:FlushAlternate() should flag when it was
2418 // written...
2419 if (hfsmp->hfs_alt_id_sector) {
2420 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2421 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2422 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2423 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2424 buf_bwrite(bp);
2425 } else if (bp) {
2426 buf_brelse(bp);
2427 }
2428 }
2429 }
2430
2431
2432 struct hfs_sync_cargs {
2433 kauth_cred_t cred;
2434 struct proc *p;
2435 int waitfor;
2436 int error;
2437 };
2438
2439
2440 static int
2441 hfs_sync_callback(struct vnode *vp, void *cargs)
2442 {
2443 struct cnode *cp;
2444 struct hfs_sync_cargs *args;
2445 int error;
2446
2447 args = (struct hfs_sync_cargs *)cargs;
2448
2449 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2450 return (VNODE_RETURNED);
2451 }
2452 cp = VTOC(vp);
2453
2454 if ((cp->c_flag & C_MODIFIED) ||
2455 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2456 vnode_hasdirtyblks(vp)) {
2457 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2458
2459 if (error)
2460 args->error = error;
2461 }
2462 hfs_unlock(cp);
2463 return (VNODE_RETURNED);
2464 }
2465
2466
2467
2468 /*
2469 * Go through the disk queues to initiate sandbagged IO;
2470 * go through the inodes to write those that have been modified;
2471 * initiate the writing of the super block if it has been modified.
2472 *
2473 * Note: we are always called with the filesystem marked `MPBUSY'.
2474 */
2475 int
2476 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2477 {
2478 struct proc *p = vfs_context_proc(context);
2479 struct cnode *cp;
2480 struct hfsmount *hfsmp;
2481 ExtendedVCB *vcb;
2482 struct vnode *meta_vp[4];
2483 int i;
2484 int error, allerror = 0;
2485 struct hfs_sync_cargs args;
2486
2487 hfsmp = VFSTOHFS(mp);
2488
2489 /*
2490 * hfs_changefs might be manipulating vnodes so back off
2491 */
2492 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2493 return (0);
2494
2495 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2496 return (EROFS);
2497
2498 /* skip over frozen volumes */
2499 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2500 return 0;
2501
2502 args.cred = kauth_cred_get();
2503 args.waitfor = waitfor;
2504 args.p = p;
2505 args.error = 0;
2506 /*
2507 * hfs_sync_callback will be called for each vnode
2508 * hung off of this mount point... the vnode will be
2509 * properly referenced and unreferenced around the callback
2510 */
2511 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2512
2513 if (args.error)
2514 allerror = args.error;
2515
2516 vcb = HFSTOVCB(hfsmp);
2517
2518 meta_vp[0] = vcb->extentsRefNum;
2519 meta_vp[1] = vcb->catalogRefNum;
2520 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2521 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2522
2523 /* Now sync our three metadata files */
2524 for (i = 0; i < 4; ++i) {
2525 struct vnode *btvp;
2526
2527 btvp = meta_vp[i];;
2528 if ((btvp==0) || (vnode_mount(btvp) != mp))
2529 continue;
2530
2531 /* XXX use hfs_systemfile_lock instead ? */
2532 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2533 cp = VTOC(btvp);
2534
2535 if (((cp->c_flag & C_MODIFIED) == 0) &&
2536 (cp->c_touch_acctime == 0) &&
2537 (cp->c_touch_chgtime == 0) &&
2538 (cp->c_touch_modtime == 0) &&
2539 vnode_hasdirtyblks(btvp) == 0) {
2540 hfs_unlock(VTOC(btvp));
2541 continue;
2542 }
2543 error = vnode_get(btvp);
2544 if (error) {
2545 hfs_unlock(VTOC(btvp));
2546 continue;
2547 }
2548 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2549 allerror = error;
2550
2551 hfs_unlock(cp);
2552 vnode_put(btvp);
2553 };
2554
2555 /*
2556 * Force stale file system control information to be flushed.
2557 */
2558 if (vcb->vcbSigWord == kHFSSigWord) {
2559 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2560 allerror = error;
2561 }
2562 }
2563 #if QUOTA
2564 hfs_qsync(mp);
2565 #endif /* QUOTA */
2566
2567 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2568
2569 /*
2570 * Write back modified superblock.
2571 */
2572 if (IsVCBDirty(vcb)) {
2573 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2574 if (error)
2575 allerror = error;
2576 }
2577
2578 if (hfsmp->jnl) {
2579 hfs_journal_flush(hfsmp, FALSE);
2580 }
2581
2582 {
2583 clock_sec_t secs;
2584 clock_usec_t usecs;
2585 uint64_t now;
2586
2587 clock_get_calendar_microtime(&secs, &usecs);
2588 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2589 hfsmp->hfs_last_sync_time = now;
2590 }
2591
2592 lck_rw_unlock_shared(&hfsmp->hfs_insync);
2593 return (allerror);
2594 }
2595
2596
2597 /*
2598 * File handle to vnode
2599 *
2600 * Have to be really careful about stale file handles:
2601 * - check that the cnode id is valid
2602 * - call hfs_vget() to get the locked cnode
2603 * - check for an unallocated cnode (i_mode == 0)
2604 * - check that the given client host has export rights and return
2605 * those rights via. exflagsp and credanonp
2606 */
2607 static int
2608 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2609 {
2610 struct hfsfid *hfsfhp;
2611 struct vnode *nvp;
2612 int result;
2613
2614 *vpp = NULL;
2615 hfsfhp = (struct hfsfid *)fhp;
2616
2617 if (fhlen < (int)sizeof(struct hfsfid))
2618 return (EINVAL);
2619
2620 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2621 if (result) {
2622 if (result == ENOENT)
2623 result = ESTALE;
2624 return result;
2625 }
2626
2627 /*
2628 * We used to use the create time as the gen id of the file handle,
2629 * but it is not static enough because it can change at any point
2630 * via system calls. We still don't have another volume ID or other
2631 * unique identifier to use for a generation ID across reboots that
2632 * persists until the file is removed. Using only the CNID exposes
2633 * us to the potential wrap-around case, but as of 2/2008, it would take
2634 * over 2 months to wrap around if the machine did nothing but allocate
2635 * CNIDs. Using some kind of wrap counter would only be effective if
2636 * each file had the wrap counter associated with it. For now,
2637 * we use only the CNID to identify the file as it's good enough.
2638 */
2639
2640 *vpp = nvp;
2641
2642 hfs_unlock(VTOC(nvp));
2643 return (0);
2644 }
2645
2646
2647 /*
2648 * Vnode pointer to File handle
2649 */
2650 /* ARGSUSED */
2651 static int
2652 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2653 {
2654 struct cnode *cp;
2655 struct hfsfid *hfsfhp;
2656
2657 if (ISHFS(VTOVCB(vp)))
2658 return (ENOTSUP); /* hfs standard is not exportable */
2659
2660 if (*fhlenp < (int)sizeof(struct hfsfid))
2661 return (EOVERFLOW);
2662
2663 cp = VTOC(vp);
2664 hfsfhp = (struct hfsfid *)fhp;
2665 /* only the CNID is used to identify the file now */
2666 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2667 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2668 *fhlenp = sizeof(struct hfsfid);
2669
2670 return (0);
2671 }
2672
2673
2674 /*
2675 * Initial HFS filesystems, done only once.
2676 */
2677 static int
2678 hfs_init(__unused struct vfsconf *vfsp)
2679 {
2680 static int done = 0;
2681
2682 if (done)
2683 return (0);
2684 done = 1;
2685 hfs_chashinit();
2686 hfs_converterinit();
2687
2688 BTReserveSetup();
2689
2690
2691 hfs_lock_attr = lck_attr_alloc_init();
2692 hfs_group_attr = lck_grp_attr_alloc_init();
2693 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2694 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2695 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2696
2697 #if HFS_COMPRESSION
2698 decmpfs_init();
2699 #endif
2700
2701 return (0);
2702 }
2703
2704 static int
2705 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2706 {
2707 struct hfsmount * hfsmp;
2708 char fstypename[MFSNAMELEN];
2709
2710 if (vp == NULL)
2711 return (EINVAL);
2712
2713 if (!vnode_isvroot(vp))
2714 return (EINVAL);
2715
2716 vnode_vfsname(vp, fstypename);
2717 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2718 return (EINVAL);
2719
2720 hfsmp = VTOHFS(vp);
2721
2722 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2723 return (EINVAL);
2724
2725 *hfsmpp = hfsmp;
2726
2727 return (0);
2728 }
2729
2730 // XXXdbg
2731 #include <sys/filedesc.h>
2732
2733 /*
2734 * HFS filesystem related variables.
2735 */
2736 int
2737 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2738 user_addr_t newp, size_t newlen, vfs_context_t context)
2739 {
2740 struct proc *p = vfs_context_proc(context);
2741 int error;
2742 struct hfsmount *hfsmp;
2743
2744 /* all sysctl names at this level are terminal */
2745
2746 if (name[0] == HFS_ENCODINGBIAS) {
2747 int bias;
2748
2749 bias = hfs_getencodingbias();
2750 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2751 if (error == 0 && newp)
2752 hfs_setencodingbias(bias);
2753 return (error);
2754
2755 } else if (name[0] == HFS_EXTEND_FS) {
2756 u_int64_t newsize;
2757 vnode_t vp = vfs_context_cwd(context);
2758
2759 if (newp == USER_ADDR_NULL || vp == NULLVP)
2760 return (EINVAL);
2761 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2762 return (error);
2763 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2764 if (error)
2765 return (error);
2766
2767 error = hfs_extendfs(hfsmp, newsize, context);
2768 return (error);
2769
2770 } else if (name[0] == HFS_ENCODINGHINT) {
2771 size_t bufsize;
2772 size_t bytes;
2773 u_int32_t hint;
2774 u_int16_t *unicode_name = NULL;
2775 char *filename = NULL;
2776
2777 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2778 return (EINVAL);
2779
2780 bufsize = MAX(newlen * 3, MAXPATHLEN);
2781 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2782 if (filename == NULL) {
2783 error = ENOMEM;
2784 goto encodinghint_exit;
2785 }
2786 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2787 if (filename == NULL) {
2788 error = ENOMEM;
2789 goto encodinghint_exit;
2790 }
2791
2792 error = copyin(newp, (caddr_t)filename, newlen);
2793 if (error == 0) {
2794 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2795 &bytes, bufsize, 0, UTF_DECOMPOSED);
2796 if (error == 0) {
2797 hint = hfs_pickencoding(unicode_name, bytes / 2);
2798 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2799 }
2800 }
2801
2802 encodinghint_exit:
2803 if (unicode_name)
2804 FREE(unicode_name, M_TEMP);
2805 if (filename)
2806 FREE(filename, M_TEMP);
2807 return (error);
2808
2809 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2810 // make the file system journaled...
2811 vnode_t vp = vfs_context_cwd(context);
2812 vnode_t jvp;
2813 ExtendedVCB *vcb;
2814 struct cat_attr jnl_attr, jinfo_attr;
2815 struct cat_fork jnl_fork, jinfo_fork;
2816 void *jnl = NULL;
2817 int lockflags;
2818
2819 /* Only root can enable journaling */
2820 if (!is_suser()) {
2821 return (EPERM);
2822 }
2823 if (vp == NULLVP)
2824 return EINVAL;
2825
2826 hfsmp = VTOHFS(vp);
2827 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2828 return EROFS;
2829 }
2830 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2831 printf("hfs: can't make a plain hfs volume journaled.\n");
2832 return EINVAL;
2833 }
2834
2835 if (hfsmp->jnl) {
2836 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2837 return EAGAIN;
2838 }
2839
2840 vcb = HFSTOVCB(hfsmp);
2841 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2842 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2843 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2844
2845 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2846 hfs_systemfile_unlock(hfsmp, lockflags);
2847 return EINVAL;
2848 }
2849 hfs_systemfile_unlock(hfsmp, lockflags);
2850
2851 // make sure these both exist!
2852 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2853 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2854
2855 return EINVAL;
2856 }
2857
2858 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2859
2860 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2861 (off_t)name[2], (off_t)name[3]);
2862
2863 //
2864 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2865 // enabling the journal on a separate device so it is safe
2866 // to just copy hfs_devvp here. If hfs_util gets the ability
2867 // to dynamically enable the journal on a separate device then
2868 // we will have to do the same thing as hfs_early_journal_init()
2869 // to locate and open the journal device.
2870 //
2871 jvp = hfsmp->hfs_devvp;
2872 jnl = journal_create(jvp,
2873 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2874 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2875 (off_t)((unsigned)name[3]),
2876 hfsmp->hfs_devvp,
2877 hfsmp->hfs_logical_block_size,
2878 0,
2879 0,
2880 hfs_sync_metadata, hfsmp->hfs_mp);
2881
2882 /*
2883 * Set up the trim callback function so that we can add
2884 * recently freed extents to the free extent cache once
2885 * the transaction that freed them is written to the
2886 * journal on disk.
2887 */
2888 if (jnl)
2889 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2890
2891 if (jnl == NULL) {
2892 printf("hfs: FAILED to create the journal!\n");
2893 if (jvp && jvp != hfsmp->hfs_devvp) {
2894 vnode_clearmountedon(jvp);
2895 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2896 }
2897 jvp = NULL;
2898
2899 return EINVAL;
2900 }
2901
2902 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2903
2904 /*
2905 * Flush all dirty metadata buffers.
2906 */
2907 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2908 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2909 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2910 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2911 if (hfsmp->hfs_attribute_vp)
2912 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2913
2914 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2915 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2916 hfsmp->jvp = jvp;
2917 hfsmp->jnl = jnl;
2918
2919 // save this off for the hack-y check in hfs_remove()
2920 hfsmp->jnl_start = (u_int32_t)name[2];
2921 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
2922 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2923 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
2924
2925 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2926
2927 hfs_unlock_global (hfsmp);
2928 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2929
2930 {
2931 fsid_t fsid;
2932
2933 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2934 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2935 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2936 }
2937 return 0;
2938 } else if (name[0] == HFS_DISABLE_JOURNALING) {
2939 // clear the journaling bit
2940 vnode_t vp = vfs_context_cwd(context);
2941
2942 /* Only root can disable journaling */
2943 if (!is_suser()) {
2944 return (EPERM);
2945 }
2946 if (vp == NULLVP)
2947 return EINVAL;
2948
2949 hfsmp = VTOHFS(vp);
2950
2951 /*
2952 * Disabling journaling is disallowed on volumes with directory hard links
2953 * because we have not tested the relevant code path.
2954 */
2955 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2956 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2957 return EPERM;
2958 }
2959
2960 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2961
2962 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2963
2964 // Lights out for you buddy!
2965 journal_close(hfsmp->jnl);
2966 hfsmp->jnl = NULL;
2967
2968 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2969 vnode_clearmountedon(hfsmp->jvp);
2970 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2971 vnode_put(hfsmp->jvp);
2972 }
2973 hfsmp->jvp = NULL;
2974 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2975 hfsmp->jnl_start = 0;
2976 hfsmp->hfs_jnlinfoblkid = 0;
2977 hfsmp->hfs_jnlfileid = 0;
2978
2979 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2980
2981 hfs_unlock_global (hfsmp);
2982
2983 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2984
2985 {
2986 fsid_t fsid;
2987
2988 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2989 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2990 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2991 }
2992 return 0;
2993 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2994 vnode_t vp = vfs_context_cwd(context);
2995 off_t jnl_start, jnl_size;
2996
2997 if (vp == NULLVP)
2998 return EINVAL;
2999
3000 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3001 if (proc_is64bit(current_proc()))
3002 return EINVAL;
3003
3004 hfsmp = VTOHFS(vp);
3005 if (hfsmp->jnl == NULL) {
3006 jnl_start = 0;
3007 jnl_size = 0;
3008 } else {
3009 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3010 jnl_size = (off_t)hfsmp->jnl_size;
3011 }
3012
3013 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3014 return error;
3015 }
3016 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3017 return error;
3018 }
3019
3020 return 0;
3021 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3022
3023 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3024
3025 } else if (name[0] == VFS_CTL_QUERY) {
3026 struct sysctl_req *req;
3027 union union_vfsidctl vc;
3028 struct mount *mp;
3029 struct vfsquery vq;
3030
3031 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3032
3033 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3034 if (error) return (error);
3035
3036 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3037 if (mp == NULL) return (ENOENT);
3038
3039 hfsmp = VFSTOHFS(mp);
3040 bzero(&vq, sizeof(vq));
3041 vq.vq_flags = hfsmp->hfs_notification_conditions;
3042 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3043 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3044 vnode_t devvp = NULL;
3045 int device_fd;
3046 if (namelen != 2) {
3047 return (EINVAL);
3048 }
3049 device_fd = name[1];
3050 error = file_vnode(device_fd, &devvp);
3051 if (error) {
3052 return error;
3053 }
3054 error = vnode_getwithref(devvp);
3055 if (error) {
3056 file_drop(device_fd);
3057 return error;
3058 }
3059 error = hfs_journal_replay(devvp, context);
3060 file_drop(device_fd);
3061 vnode_put(devvp);
3062 return error;
3063 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3064 hfs_resize_debug = 1;
3065 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3066 return 0;
3067 }
3068
3069 return (ENOTSUP);
3070 }
3071
3072 /*
3073 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3074 * the build_path ioctl. We use it to leverage the code below that updates
3075 * the origin list cache if necessary
3076 */
3077
3078 int
3079 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3080 {
3081 int error;
3082 int lockflags;
3083 struct hfsmount *hfsmp;
3084
3085 hfsmp = VFSTOHFS(mp);
3086
3087 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3088 if (error)
3089 return (error);
3090
3091 /*
3092 * ADLs may need to have their origin state updated
3093 * since build_path needs a valid parent. The same is true
3094 * for hardlinked files as well. There isn't a race window here
3095 * in re-acquiring the cnode lock since we aren't pulling any data
3096 * out of the cnode; instead, we're going to the catalog.
3097 */
3098 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3099 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3100 cnode_t *cp = VTOC(*vpp);
3101 struct cat_desc cdesc;
3102
3103 if (!hfs_haslinkorigin(cp)) {
3104 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3105 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3106 hfs_systemfile_unlock(hfsmp, lockflags);
3107 if (error == 0) {
3108 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3109 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3110 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3111 }
3112 cat_releasedesc(&cdesc);
3113 }
3114 }
3115 hfs_unlock(cp);
3116 }
3117 return (0);
3118 }
3119
3120
3121 /*
3122 * Look up an HFS object by ID.
3123 *
3124 * The object is returned with an iocount reference and the cnode locked.
3125 *
3126 * If the object is a file then it will represent the data fork.
3127 */
3128 int
3129 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3130 {
3131 struct vnode *vp = NULLVP;
3132 struct cat_desc cndesc;
3133 struct cat_attr cnattr;
3134 struct cat_fork cnfork;
3135 u_int32_t linkref = 0;
3136 int error;
3137
3138 /* Check for cnids that should't be exported. */
3139 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3140 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3141 return (ENOENT);
3142 }
3143 /* Don't export our private directories. */
3144 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3145 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3146 return (ENOENT);
3147 }
3148 /*
3149 * Check the hash first
3150 */
3151 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3152 if (vp) {
3153 *vpp = vp;
3154 return(0);
3155 }
3156
3157 bzero(&cndesc, sizeof(cndesc));
3158 bzero(&cnattr, sizeof(cnattr));
3159 bzero(&cnfork, sizeof(cnfork));
3160
3161 /*
3162 * Not in hash, lookup in catalog
3163 */
3164 if (cnid == kHFSRootParentID) {
3165 static char hfs_rootname[] = "/";
3166
3167 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3168 cndesc.cd_namelen = 1;
3169 cndesc.cd_parentcnid = kHFSRootParentID;
3170 cndesc.cd_cnid = kHFSRootFolderID;
3171 cndesc.cd_flags = CD_ISDIR;
3172
3173 cnattr.ca_fileid = kHFSRootFolderID;
3174 cnattr.ca_linkcount = 1;
3175 cnattr.ca_entries = 1;
3176 cnattr.ca_dircount = 1;
3177 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3178 } else {
3179 int lockflags;
3180 cnid_t pid;
3181 const char *nameptr;
3182
3183 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3184 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3185 hfs_systemfile_unlock(hfsmp, lockflags);
3186
3187 if (error) {
3188 *vpp = NULL;
3189 return (error);
3190 }
3191
3192 /*
3193 * Check for a raw hardlink inode and save its linkref.
3194 */
3195 pid = cndesc.cd_parentcnid;
3196 nameptr = (const char *)cndesc.cd_nameptr;
3197
3198 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3199 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3200 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3201
3202 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3203 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3204 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3205
3206 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3207 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3208 *vpp = NULL;
3209 cat_releasedesc(&cndesc);
3210 return (ENOENT); /* open unlinked file */
3211 }
3212 }
3213
3214 /*
3215 * Finish initializing cnode descriptor for hardlinks.
3216 *
3217 * We need a valid name and parent for reverse lookups.
3218 */
3219 if (linkref) {
3220 cnid_t nextlinkid;
3221 cnid_t prevlinkid;
3222 struct cat_desc linkdesc;
3223 int lockflags;
3224
3225 cnattr.ca_linkref = linkref;
3226
3227 /*
3228 * Pick up the first link in the chain and get a descriptor for it.
3229 * This allows blind volfs paths to work for hardlinks.
3230 */
3231 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) &&
3232 (nextlinkid != 0)) {
3233 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3234 error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3235 hfs_systemfile_unlock(hfsmp, lockflags);
3236 if (error == 0) {
3237 cat_releasedesc(&cndesc);
3238 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3239 }
3240 }
3241 }
3242
3243 if (linkref) {
3244 int newvnode_flags = 0;
3245
3246 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3247 &cnfork, &vp, &newvnode_flags);
3248 if (error == 0) {
3249 VTOC(vp)->c_flag |= C_HARDLINK;
3250 vnode_setmultipath(vp);
3251 }
3252 } else {
3253 struct componentname cn;
3254 int newvnode_flags = 0;
3255
3256 /* Supply hfs_getnewvnode with a component name. */
3257 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3258 cn.cn_nameiop = LOOKUP;
3259 cn.cn_flags = ISLASTCN | HASBUF;
3260 cn.cn_context = NULL;
3261 cn.cn_pnlen = MAXPATHLEN;
3262 cn.cn_nameptr = cn.cn_pnbuf;
3263 cn.cn_namelen = cndesc.cd_namelen;
3264 cn.cn_hash = 0;
3265 cn.cn_consume = 0;
3266 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3267
3268 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3269 &cnfork, &vp, &newvnode_flags);
3270
3271 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3272 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3273 }
3274 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3275 }
3276 cat_releasedesc(&cndesc);
3277
3278 *vpp = vp;
3279 if (vp && skiplock) {
3280 hfs_unlock(VTOC(vp));
3281 }
3282 return (error);
3283 }
3284
3285
3286 /*
3287 * Flush out all the files in a filesystem.
3288 */
3289 static int
3290 #if QUOTA
3291 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3292 #else
3293 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3294 #endif /* QUOTA */
3295 {
3296 struct hfsmount *hfsmp;
3297 struct vnode *skipvp = NULLVP;
3298 int error;
3299 #if QUOTA
3300 int quotafilecnt;
3301 int i;
3302 #endif
3303
3304 hfsmp = VFSTOHFS(mp);
3305
3306 #if QUOTA
3307 /*
3308 * The open quota files have an indirect reference on
3309 * the root directory vnode. We must account for this
3310 * extra reference when doing the intial vflush.
3311 */
3312 quotafilecnt = 0;
3313 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3314
3315 /* Find out how many quota files we have open. */
3316 for (i = 0; i < MAXQUOTAS; i++) {
3317 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3318 ++quotafilecnt;
3319 }
3320
3321 /* Obtain the root vnode so we can skip over it. */
3322 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3323 }
3324 #endif /* QUOTA */
3325
3326 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3327 if (error != 0)
3328 return(error);
3329
3330 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3331
3332 #if QUOTA
3333 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3334 if (skipvp) {
3335 /*
3336 * See if there are additional references on the
3337 * root vp besides the ones obtained from the open
3338 * quota files and the hfs_chash_getvnode call above.
3339 */
3340 if ((error == 0) &&
3341 (vnode_isinuse(skipvp, quotafilecnt))) {
3342 error = EBUSY; /* root directory is still open */
3343 }
3344 hfs_unlock(VTOC(skipvp));
3345 vnode_put(skipvp);
3346 }
3347 if (error && (flags & FORCECLOSE) == 0)
3348 return (error);
3349
3350 for (i = 0; i < MAXQUOTAS; i++) {
3351 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3352 continue;
3353 hfs_quotaoff(p, mp, i);
3354 }
3355 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3356 }
3357 #endif /* QUOTA */
3358
3359 return (error);
3360 }
3361
3362 /*
3363 * Update volume encoding bitmap (HFS Plus only)
3364 */
3365 __private_extern__
3366 void
3367 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3368 {
3369 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3370 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3371
3372 u_int32_t index;
3373
3374 switch (encoding) {
3375 case kTextEncodingMacUkrainian:
3376 index = kIndexMacUkrainian;
3377 break;
3378 case kTextEncodingMacFarsi:
3379 index = kIndexMacFarsi;
3380 break;
3381 default:
3382 index = encoding;
3383 break;
3384 }
3385
3386 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3387 HFS_MOUNT_LOCK(hfsmp, TRUE)
3388 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3389 MarkVCBDirty(hfsmp);
3390 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3391 }
3392 }
3393
3394 /*
3395 * Update volume stats
3396 *
3397 * On journal volumes this will cause a volume header flush
3398 */
3399 int
3400 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3401 {
3402 struct timeval tv;
3403
3404 microtime(&tv);
3405
3406 lck_mtx_lock(&hfsmp->hfs_mutex);
3407
3408 MarkVCBDirty(hfsmp);
3409 hfsmp->hfs_mtime = tv.tv_sec;
3410
3411 switch (op) {
3412 case VOL_UPDATE:
3413 break;
3414 case VOL_MKDIR:
3415 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3416 ++hfsmp->hfs_dircount;
3417 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3418 ++hfsmp->vcbNmRtDirs;
3419 break;
3420 case VOL_RMDIR:
3421 if (hfsmp->hfs_dircount != 0)
3422 --hfsmp->hfs_dircount;
3423 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3424 --hfsmp->vcbNmRtDirs;
3425 break;
3426 case VOL_MKFILE:
3427 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3428 ++hfsmp->hfs_filecount;
3429 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3430 ++hfsmp->vcbNmFls;
3431 break;
3432 case VOL_RMFILE:
3433 if (hfsmp->hfs_filecount != 0)
3434 --hfsmp->hfs_filecount;
3435 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3436 --hfsmp->vcbNmFls;
3437 break;
3438 }
3439
3440 lck_mtx_unlock(&hfsmp->hfs_mutex);
3441
3442 if (hfsmp->jnl) {
3443 hfs_flushvolumeheader(hfsmp, 0, 0);
3444 }
3445
3446 return (0);
3447 }
3448
3449
3450 static int
3451 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3452 {
3453 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3454 struct filefork *fp;
3455 HFSMasterDirectoryBlock *mdb;
3456 struct buf *bp = NULL;
3457 int retval;
3458 int sectorsize;
3459 ByteCount namelen;
3460
3461 sectorsize = hfsmp->hfs_logical_block_size;
3462 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3463 if (retval) {
3464 if (bp)
3465 buf_brelse(bp);
3466 return retval;
3467 }
3468
3469 lck_mtx_lock(&hfsmp->hfs_mutex);
3470
3471 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3472
3473 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3474 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3475 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3476 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3477 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3478 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3479 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3480 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3481
3482 namelen = strlen((char *)vcb->vcbVN);
3483 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3484 /* Retry with MacRoman in case that's how it was exported. */
3485 if (retval)
3486 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3487
3488 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3489 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3490 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3491 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3492 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3493
3494 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3495
3496 fp = VTOF(vcb->extentsRefNum);
3497 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3498 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3499 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3500 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3501 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3502 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3503 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3504 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3505 FTOC(fp)->c_flag &= ~C_MODIFIED;
3506
3507 fp = VTOF(vcb->catalogRefNum);
3508 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3509 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3510 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3511 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3512 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3513 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3514 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3515 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3516 FTOC(fp)->c_flag &= ~C_MODIFIED;
3517
3518 MarkVCBClean( vcb );
3519
3520 lck_mtx_unlock(&hfsmp->hfs_mutex);
3521
3522 /* If requested, flush out the alternate MDB */
3523 if (altflush) {
3524 struct buf *alt_bp = NULL;
3525
3526 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3527 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3528
3529 (void) VNOP_BWRITE(alt_bp);
3530 } else if (alt_bp)
3531 buf_brelse(alt_bp);
3532 }
3533
3534 if (waitfor != MNT_WAIT)
3535 buf_bawrite(bp);
3536 else
3537 retval = VNOP_BWRITE(bp);
3538
3539 return (retval);
3540 }
3541
3542 /*
3543 * Flush any dirty in-memory mount data to the on-disk
3544 * volume header.
3545 *
3546 * Note: the on-disk volume signature is intentionally
3547 * not flushed since the on-disk "H+" and "HX" signatures
3548 * are always stored in-memory as "H+".
3549 */
3550 int
3551 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3552 {
3553 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3554 struct filefork *fp;
3555 HFSPlusVolumeHeader *volumeHeader, *altVH;
3556 int retval;
3557 struct buf *bp, *alt_bp;
3558 int i;
3559 daddr64_t priIDSector;
3560 int critical;
3561 u_int16_t signature;
3562 u_int16_t hfsversion;
3563
3564 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3565 return(0);
3566 }
3567 if (hfsmp->hfs_flags & HFS_STANDARD) {
3568 return hfs_flushMDB(hfsmp, waitfor, altflush);
3569 }
3570 critical = altflush;
3571 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3572 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3573
3574 if (hfs_start_transaction(hfsmp) != 0) {
3575 return EINVAL;
3576 }
3577
3578 bp = NULL;
3579 alt_bp = NULL;
3580
3581 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3582 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3583 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3584 if (retval) {
3585 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3586 goto err_exit;
3587 }
3588
3589 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3590 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3591
3592 /*
3593 * Sanity check what we just read. If it's bad, try the alternate
3594 * instead.
3595 */
3596 signature = SWAP_BE16 (volumeHeader->signature);
3597 hfsversion = SWAP_BE16 (volumeHeader->version);
3598 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3599 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3600 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3601 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3602 vcb->vcbVN, signature, hfsversion,
3603 SWAP_BE32 (volumeHeader->blockSize),
3604 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3605 hfs_mark_volume_inconsistent(hfsmp);
3606
3607 if (hfsmp->hfs_alt_id_sector) {
3608 retval = buf_meta_bread(hfsmp->hfs_devvp,
3609 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3610 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3611 if (retval) {
3612 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3613 goto err_exit;
3614 }
3615
3616 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3617 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3618 signature = SWAP_BE16(altVH->signature);
3619 hfsversion = SWAP_BE16(altVH->version);
3620
3621 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3622 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3623 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3624 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3625 vcb->vcbVN, signature, hfsversion,
3626 SWAP_BE32(altVH->blockSize));
3627 retval = EIO;
3628 goto err_exit;
3629 }
3630
3631 /* The alternate is plausible, so use it. */
3632 bcopy(altVH, volumeHeader, kMDBSize);
3633 buf_brelse(alt_bp);
3634 alt_bp = NULL;
3635 } else {
3636 /* No alternate VH, nothing more we can do. */
3637 retval = EIO;
3638 goto err_exit;
3639 }
3640 }
3641
3642 if (hfsmp->jnl) {
3643 journal_modify_block_start(hfsmp->jnl, bp);
3644 }
3645
3646 /*
3647 * For embedded HFS+ volumes, update create date if it changed
3648 * (ie from a setattrlist call)
3649 */
3650 if ((vcb->hfsPlusIOPosOffset != 0) &&
3651 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3652 struct buf *bp2;
3653 HFSMasterDirectoryBlock *mdb;
3654
3655 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3656 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3657 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3658 if (retval) {
3659 if (bp2)
3660 buf_brelse(bp2);
3661 retval = 0;
3662 } else {
3663 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3664 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3665
3666 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3667 {
3668 if (hfsmp->jnl) {
3669 journal_modify_block_start(hfsmp->jnl, bp2);
3670 }
3671
3672 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3673
3674 if (hfsmp->jnl) {
3675 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3676 } else {
3677 (void) VNOP_BWRITE(bp2); /* write out the changes */
3678 }
3679 }
3680 else
3681 {
3682 buf_brelse(bp2); /* just release it */
3683 }
3684 }
3685 }
3686
3687 lck_mtx_lock(&hfsmp->hfs_mutex);
3688
3689 /* Note: only update the lower 16 bits worth of attributes */
3690 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3691 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3692 if (hfsmp->jnl) {
3693 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3694 } else {
3695 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3696 }
3697 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3698 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3699 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3700 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3701 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
3702 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
3703 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3704 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3705 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3706 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3707 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3708 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3709 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3710
3711 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3712 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3713 critical = 1;
3714 }
3715
3716 /*
3717 * System files are only dirty when altflush is set.
3718 */
3719 if (altflush == 0) {
3720 goto done;
3721 }
3722
3723 /* Sync Extents over-flow file meta data */
3724 fp = VTOF(vcb->extentsRefNum);
3725 if (FTOC(fp)->c_flag & C_MODIFIED) {
3726 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3727 volumeHeader->extentsFile.extents[i].startBlock =
3728 SWAP_BE32 (fp->ff_extents[i].startBlock);
3729 volumeHeader->extentsFile.extents[i].blockCount =
3730 SWAP_BE32 (fp->ff_extents[i].blockCount);
3731 }
3732 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3733 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3734 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3735 FTOC(fp)->c_flag &= ~C_MODIFIED;
3736 }
3737
3738 /* Sync Catalog file meta data */
3739 fp = VTOF(vcb->catalogRefNum);
3740 if (FTOC(fp)->c_flag & C_MODIFIED) {
3741 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3742 volumeHeader->catalogFile.extents[i].startBlock =
3743 SWAP_BE32 (fp->ff_extents[i].startBlock);
3744 volumeHeader->catalogFile.extents[i].blockCount =
3745 SWAP_BE32 (fp->ff_extents[i].blockCount);
3746 }
3747 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3748 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3749 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3750 FTOC(fp)->c_flag &= ~C_MODIFIED;
3751 }
3752
3753 /* Sync Allocation file meta data */
3754 fp = VTOF(vcb->allocationsRefNum);
3755 if (FTOC(fp)->c_flag & C_MODIFIED) {
3756 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3757 volumeHeader->allocationFile.extents[i].startBlock =
3758 SWAP_BE32 (fp->ff_extents[i].startBlock);
3759 volumeHeader->allocationFile.extents[i].blockCount =
3760 SWAP_BE32 (fp->ff_extents[i].blockCount);
3761 }
3762 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3763 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3764 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3765 FTOC(fp)->c_flag &= ~C_MODIFIED;
3766 }
3767
3768 /* Sync Attribute file meta data */
3769 if (hfsmp->hfs_attribute_vp) {
3770 fp = VTOF(hfsmp->hfs_attribute_vp);
3771 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3772 volumeHeader->attributesFile.extents[i].startBlock =
3773 SWAP_BE32 (fp->ff_extents[i].startBlock);
3774 volumeHeader->attributesFile.extents[i].blockCount =
3775 SWAP_BE32 (fp->ff_extents[i].blockCount);
3776 }
3777 FTOC(fp)->c_flag &= ~C_MODIFIED;
3778 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3779 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3780 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3781 }
3782
3783 /* Sync Startup file meta data */
3784 if (hfsmp->hfs_startup_vp) {
3785 fp = VTOF(hfsmp->hfs_startup_vp);
3786 if (FTOC(fp)->c_flag & C_MODIFIED) {
3787 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3788 volumeHeader->startupFile.extents[i].startBlock =
3789 SWAP_BE32 (fp->ff_extents[i].startBlock);
3790 volumeHeader->startupFile.extents[i].blockCount =
3791 SWAP_BE32 (fp->ff_extents[i].blockCount);
3792 }
3793 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3794 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3795 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3796 FTOC(fp)->c_flag &= ~C_MODIFIED;
3797 }
3798 }
3799
3800 done:
3801 MarkVCBClean(hfsmp);
3802 lck_mtx_unlock(&hfsmp->hfs_mutex);
3803
3804 /* If requested, flush out the alternate volume header */
3805 if (altflush && hfsmp->hfs_alt_id_sector) {
3806 if (buf_meta_bread(hfsmp->hfs_devvp,
3807 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3808 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3809 if (hfsmp->jnl) {
3810 journal_modify_block_start(hfsmp->jnl, alt_bp);
3811 }
3812
3813 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3814 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3815 kMDBSize);
3816
3817 if (hfsmp->jnl) {
3818 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3819 } else {
3820 (void) VNOP_BWRITE(alt_bp);
3821 }
3822 } else if (alt_bp)
3823 buf_brelse(alt_bp);
3824 }
3825
3826 if (hfsmp->jnl) {
3827 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3828 } else {
3829 if (waitfor != MNT_WAIT)
3830 buf_bawrite(bp);
3831 else {
3832 retval = VNOP_BWRITE(bp);
3833 /* When critical data changes, flush the device cache */
3834 if (critical && (retval == 0)) {
3835 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3836 NULL, FWRITE, NULL);
3837 }
3838 }
3839 }
3840 hfs_end_transaction(hfsmp);
3841
3842 return (retval);
3843
3844 err_exit:
3845 if (alt_bp)
3846 buf_brelse(alt_bp);
3847 if (bp)
3848 buf_brelse(bp);
3849 hfs_end_transaction(hfsmp);
3850 return retval;
3851 }
3852
3853
3854 /*
3855 * Extend a file system.
3856 */
3857 int
3858 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3859 {
3860 struct proc *p = vfs_context_proc(context);
3861 kauth_cred_t cred = vfs_context_ucred(context);
3862 struct vnode *vp;
3863 struct vnode *devvp;
3864 struct buf *bp;
3865 struct filefork *fp = NULL;
3866 ExtendedVCB *vcb;
3867 struct cat_fork forkdata;
3868 u_int64_t oldsize;
3869 u_int64_t newblkcnt;
3870 u_int64_t prev_phys_block_count;
3871 u_int32_t addblks;
3872 u_int64_t sectorcnt;
3873 u_int32_t sectorsize;
3874 u_int32_t phys_sectorsize;
3875 daddr64_t prev_alt_sector;
3876 daddr_t bitmapblks;
3877 int lockflags = 0;
3878 int error;
3879 int64_t oldBitmapSize;
3880 Boolean usedExtendFileC = false;
3881 int transaction_begun = 0;
3882
3883 devvp = hfsmp->hfs_devvp;
3884 vcb = HFSTOVCB(hfsmp);
3885
3886 /*
3887 * - HFS Plus file systems only.
3888 * - Journaling must be enabled.
3889 * - No embedded volumes.
3890 */
3891 if ((vcb->vcbSigWord == kHFSSigWord) ||
3892 (hfsmp->jnl == NULL) ||
3893 (vcb->hfsPlusIOPosOffset != 0)) {
3894 return (EPERM);
3895 }
3896 /*
3897 * If extending file system by non-root, then verify
3898 * ownership and check permissions.
3899 */
3900 if (suser(cred, NULL)) {
3901 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3902
3903 if (error)
3904 return (error);
3905 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3906 if (error == 0) {
3907 error = hfs_write_access(vp, cred, p, false);
3908 }
3909 hfs_unlock(VTOC(vp));
3910 vnode_put(vp);
3911 if (error)
3912 return (error);
3913
3914 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3915 if (error)
3916 return (error);
3917 }
3918 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3919 return (ENXIO);
3920 }
3921 if (sectorsize != hfsmp->hfs_logical_block_size) {
3922 return (ENXIO);
3923 }
3924 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3925 return (ENXIO);
3926 }
3927 if ((sectorsize * sectorcnt) < newsize) {
3928 printf("hfs_extendfs: not enough space on device\n");
3929 return (ENOSPC);
3930 }
3931 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3932 if (error) {
3933 if ((error != ENOTSUP) && (error != ENOTTY)) {
3934 return (ENXIO);
3935 }
3936 /* If ioctl is not supported, force physical and logical sector size to be same */
3937 phys_sectorsize = sectorsize;
3938 }
3939 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3940
3941 /*
3942 * Validate new size.
3943 */
3944 if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3945 printf("hfs_extendfs: invalid size\n");
3946 return (EINVAL);
3947 }
3948 newblkcnt = newsize / vcb->blockSize;
3949 if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3950 return (EOVERFLOW);
3951
3952 addblks = newblkcnt - vcb->totalBlocks;
3953
3954 if (hfs_resize_debug) {
3955 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3956 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3957 }
3958 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3959
3960 HFS_MOUNT_LOCK(hfsmp, TRUE);
3961 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3962 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3963 error = EALREADY;
3964 goto out;
3965 }
3966 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3967 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3968
3969 /*
3970 * Enclose changes inside a transaction.
3971 */
3972 if (hfs_start_transaction(hfsmp) != 0) {
3973 error = EINVAL;
3974 goto out;
3975 }
3976 transaction_begun = 1;
3977
3978 /*
3979 * Note: we take the attributes lock in case we have an attribute data vnode
3980 * which needs to change size.
3981 */
3982 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3983 vp = vcb->allocationsRefNum;
3984 fp = VTOF(vp);
3985 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3986
3987 /*
3988 * Calculate additional space required (if any) by allocation bitmap.
3989 */
3990 oldBitmapSize = fp->ff_size;
3991 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3992 if (bitmapblks > (daddr_t)fp->ff_blocks)
3993 bitmapblks -= fp->ff_blocks;
3994 else
3995 bitmapblks = 0;
3996
3997 /*
3998 * The allocation bitmap can contain unused bits that are beyond end of
3999 * current volume's allocation blocks. Usually they are supposed to be
4000 * zero'ed out but there can be cases where they might be marked as used.
4001 * After extending the file system, those bits can represent valid
4002 * allocation blocks, so we mark all the bits from the end of current
4003 * volume to end of allocation bitmap as "free".
4004 */
4005 BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4006 (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4007
4008 if (bitmapblks > 0) {
4009 daddr64_t blkno;
4010 daddr_t blkcnt;
4011 off_t bytesAdded;
4012
4013 /*
4014 * Get the bitmap's current size (in allocation blocks) so we know
4015 * where to start zero filling once the new space is added. We've
4016 * got to do this before the bitmap is grown.
4017 */
4018 blkno = (daddr64_t)fp->ff_blocks;
4019
4020 /*
4021 * Try to grow the allocation file in the normal way, using allocation
4022 * blocks already existing in the file system. This way, we might be
4023 * able to grow the bitmap contiguously, or at least in the metadata
4024 * zone.
4025 */
4026 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4027 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4028 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4029
4030 if (error == 0) {
4031 usedExtendFileC = true;
4032 } else {
4033 /*
4034 * If the above allocation failed, fall back to allocating the new
4035 * extent of the bitmap from the space we're going to add. Since those
4036 * blocks don't yet belong to the file system, we have to update the
4037 * extent list directly, and manually adjust the file size.
4038 */
4039 bytesAdded = 0;
4040 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4041 if (error) {
4042 printf("hfs_extendfs: error %d adding extents\n", error);
4043 goto out;
4044 }
4045 fp->ff_blocks += bitmapblks;
4046 VTOC(vp)->c_blocks = fp->ff_blocks;
4047 VTOC(vp)->c_flag |= C_MODIFIED;
4048 }
4049
4050 /*
4051 * Update the allocation file's size to include the newly allocated
4052 * blocks. Note that ExtendFileC doesn't do this, which is why this
4053 * statement is outside the above "if" statement.
4054 */
4055 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4056
4057 /*
4058 * Zero out the new bitmap blocks.
4059 */
4060 {
4061
4062 bp = NULL;
4063 blkcnt = bitmapblks;
4064 while (blkcnt > 0) {
4065 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4066 if (error) {
4067 if (bp) {
4068 buf_brelse(bp);
4069 }
4070 break;
4071 }
4072 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4073 buf_markaged(bp);
4074 error = (int)buf_bwrite(bp);
4075 if (error)
4076 break;
4077 --blkcnt;
4078 ++blkno;
4079 }
4080 }
4081 if (error) {
4082 printf("hfs_extendfs: error %d clearing blocks\n", error);
4083 goto out;
4084 }
4085 /*
4086 * Mark the new bitmap space as allocated.
4087 *
4088 * Note that ExtendFileC will have marked any blocks it allocated, so
4089 * this is only needed if we used AddFileExtent. Also note that this
4090 * has to come *after* the zero filling of new blocks in the case where
4091 * we used AddFileExtent (since the part of the bitmap we're touching
4092 * is in those newly allocated blocks).
4093 */
4094 if (!usedExtendFileC) {
4095 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4096 if (error) {
4097 printf("hfs_extendfs: error %d setting bitmap\n", error);
4098 goto out;
4099 }
4100 vcb->freeBlocks -= bitmapblks;
4101 }
4102 }
4103 /*
4104 * Mark the new alternate VH as allocated.
4105 */
4106 if (vcb->blockSize == 512)
4107 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4108 else
4109 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4110 if (error) {
4111 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4112 goto out;
4113 }
4114 /*
4115 * Mark the old alternate VH as free.
4116 */
4117 if (vcb->blockSize == 512)
4118 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4119 else
4120 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4121 /*
4122 * Adjust file system variables for new space.
4123 */
4124 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4125 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4126
4127 vcb->totalBlocks += addblks;
4128 vcb->freeBlocks += addblks;
4129 hfsmp->hfs_logical_block_count = newsize / sectorsize;
4130 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4131 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4132 MarkVCBDirty(vcb);
4133 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4134 if (error) {
4135 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4136 /*
4137 * Restore to old state.
4138 */
4139 if (usedExtendFileC) {
4140 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4141 FTOC(fp)->c_fileid, false);
4142 } else {
4143 fp->ff_blocks -= bitmapblks;
4144 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4145 /*
4146 * No need to mark the excess blocks free since those bitmap blocks
4147 * are no longer part of the bitmap. But we do need to undo the
4148 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4149 */
4150 vcb->freeBlocks += bitmapblks;
4151 }
4152 vcb->totalBlocks -= addblks;
4153 vcb->freeBlocks -= addblks;
4154 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4155 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4156 MarkVCBDirty(vcb);
4157 if (vcb->blockSize == 512) {
4158 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4159 hfs_mark_volume_inconsistent(hfsmp);
4160 }
4161 } else {
4162 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4163 hfs_mark_volume_inconsistent(hfsmp);
4164 }
4165 }
4166 goto out;
4167 }
4168 /*
4169 * Invalidate the old alternate volume header.
4170 */
4171 bp = NULL;
4172 if (prev_alt_sector) {
4173 if (buf_meta_bread(hfsmp->hfs_devvp,
4174 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4175 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4176 journal_modify_block_start(hfsmp->jnl, bp);
4177
4178 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4179
4180 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4181 } else if (bp) {
4182 buf_brelse(bp);
4183 }
4184 }
4185
4186 /*
4187 * Update the metadata zone size based on current volume size
4188 */
4189 hfs_metadatazone_init(hfsmp, false);
4190
4191 /*
4192 * Adjust the size of hfsmp->hfs_attrdata_vp
4193 */
4194 if (hfsmp->hfs_attrdata_vp) {
4195 struct cnode *attr_cp;
4196 struct filefork *attr_fp;
4197
4198 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4199 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4200 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4201
4202 attr_cp->c_blocks = newblkcnt;
4203 attr_fp->ff_blocks = newblkcnt;
4204 attr_fp->ff_extents[0].blockCount = newblkcnt;
4205 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4206 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4207 vnode_put(hfsmp->hfs_attrdata_vp);
4208 }
4209 }
4210
4211 /*
4212 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4213 * locks in the middle of these operations like we do in the truncate case
4214 * where we have to relocate files, we can only update the red-black tree
4215 * if there were actual changes made to the bitmap. Also, we can't really scan the
4216 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4217 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4218 * not currently controlled by the tree.
4219 *
4220 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4221 */
4222
4223 if (error == 0) {
4224 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4225 }
4226
4227 /* Log successful extending */
4228 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4229 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4230
4231 out:
4232 if (error && fp) {
4233 /* Restore allocation fork. */
4234 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4235 VTOC(vp)->c_blocks = fp->ff_blocks;
4236
4237 }
4238
4239 HFS_MOUNT_LOCK(hfsmp, TRUE);
4240 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4241 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4242 if (lockflags) {
4243 hfs_systemfile_unlock(hfsmp, lockflags);
4244 }
4245 if (transaction_begun) {
4246 hfs_end_transaction(hfsmp);
4247 }
4248
4249 return MacToVFSError(error);
4250 }
4251
4252 #define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4253
4254 /*
4255 * Truncate a file system (while still mounted).
4256 */
4257 int
4258 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4259 {
4260 struct buf *bp = NULL;
4261 u_int64_t oldsize;
4262 u_int32_t newblkcnt;
4263 u_int32_t reclaimblks = 0;
4264 int lockflags = 0;
4265 int transaction_begun = 0;
4266 Boolean updateFreeBlocks = false;
4267 Boolean disable_sparse = false;
4268 int error = 0;
4269
4270 lck_mtx_lock(&hfsmp->hfs_mutex);
4271 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4272 lck_mtx_unlock(&hfsmp->hfs_mutex);
4273 return (EALREADY);
4274 }
4275 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4276 hfsmp->hfs_resize_blocksmoved = 0;
4277 hfsmp->hfs_resize_totalblocks = 0;
4278 hfsmp->hfs_resize_progress = 0;
4279 lck_mtx_unlock(&hfsmp->hfs_mutex);
4280
4281 /*
4282 * - Journaled HFS Plus volumes only.
4283 * - No embedded volumes.
4284 */
4285 if ((hfsmp->jnl == NULL) ||
4286 (hfsmp->hfsPlusIOPosOffset != 0)) {
4287 error = EPERM;
4288 goto out;
4289 }
4290 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4291 newblkcnt = newsize / hfsmp->blockSize;
4292 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4293
4294 if (hfs_resize_debug) {
4295 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4296 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4297 }
4298
4299 /* Make sure new size is valid. */
4300 if ((newsize < HFS_MIN_SIZE) ||
4301 (newsize >= oldsize) ||
4302 (newsize % hfsmp->hfs_logical_block_size) ||
4303 (newsize % hfsmp->hfs_physical_block_size)) {
4304 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4305 error = EINVAL;
4306 goto out;
4307 }
4308
4309 /*
4310 * Make sure that the file system has enough free blocks reclaim.
4311 *
4312 * Before resize, the disk is divided into four zones -
4313 * A. Allocated_Stationary - These are allocated blocks that exist
4314 * before the new end of disk. These blocks will not be
4315 * relocated or modified during resize.
4316 * B. Free_Stationary - These are free blocks that exist before the
4317 * new end of disk. These blocks can be used for any new
4318 * allocations during resize, including allocation for relocating
4319 * data from the area of disk being reclaimed.
4320 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4321 * beyond the new end of disk. These blocks need to be reclaimed
4322 * during resize by allocating equal number of blocks in Free
4323 * Stationary zone and copying the data.
4324 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4325 * new end of disk. Nothing special needs to be done to reclaim
4326 * them.
4327 *
4328 * Total number of blocks on the disk before resize:
4329 * ------------------------------------------------
4330 * Total Blocks = Allocated_Stationary + Free_Stationary +
4331 * Allocated_To-Reclaim + Free_To-Reclaim
4332 *
4333 * Total number of blocks that need to be reclaimed:
4334 * ------------------------------------------------
4335 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4336 *
4337 * Note that the check below also makes sure that we have enough space
4338 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4339 * Therefore we do not need to check total number of blocks to relocate
4340 * later in the code.
4341 *
4342 * The condition below gets converted to:
4343 *
4344 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4345 *
4346 * which is equivalent to:
4347 *
4348 * Allocated To-Reclaim >= Free Stationary
4349 */
4350 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4351 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4352 error = ENOSPC;
4353 goto out;
4354 }
4355
4356 /* Start with a clean journal. */
4357 hfs_journal_flush(hfsmp, TRUE);
4358
4359 if (hfs_start_transaction(hfsmp) != 0) {
4360 error = EINVAL;
4361 goto out;
4362 }
4363 transaction_begun = 1;
4364
4365 /* Take the bitmap lock to update the alloc limit field */
4366 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4367
4368 /*
4369 * Prevent new allocations from using the part we're trying to truncate.
4370 *
4371 * NOTE: allocLimit is set to the allocation block number where the new
4372 * alternate volume header will be. That way there will be no files to
4373 * interfere with allocating the new alternate volume header, and no files
4374 * in the allocation blocks beyond (i.e. the blocks we're trying to
4375 * truncate away.
4376 *
4377 * Also shrink the red-black tree if needed.
4378 */
4379 if (hfsmp->blockSize == 512) {
4380 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4381 }
4382 else {
4383 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4384 }
4385
4386 /* Sparse devices use first fit allocation which is not ideal
4387 * for volume resize which requires best fit allocation. If a
4388 * sparse device is being truncated, disable the sparse device
4389 * property temporarily for the duration of resize. Also reset
4390 * the free extent cache so that it is rebuilt as sorted by
4391 * totalBlocks instead of startBlock.
4392 *
4393 * Note that this will affect all allocations on the volume and
4394 * ideal fix would be just to modify resize-related allocations,
4395 * but it will result in complexity like handling of two free
4396 * extent caches sorted differently, etc. So we stick to this
4397 * solution for now.
4398 */
4399 HFS_MOUNT_LOCK(hfsmp, TRUE);
4400 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4401 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4402 ResetVCBFreeExtCache(hfsmp);
4403 disable_sparse = true;
4404 }
4405
4406 /*
4407 * Update the volume free block count to reflect the total number
4408 * of free blocks that will exist after a successful resize.
4409 * Relocation of extents will result in no net change in the total
4410 * free space on the disk. Therefore the code that allocates
4411 * space for new extent and deallocates the old extent explicitly
4412 * prevents updating the volume free block count. It will also
4413 * prevent false disk full error when the number of blocks in
4414 * an extent being relocated is more than the free blocks that
4415 * will exist after the volume is resized.
4416 */
4417 hfsmp->freeBlocks -= reclaimblks;
4418 updateFreeBlocks = true;
4419 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4420
4421 if (lockflags) {
4422 hfs_systemfile_unlock(hfsmp, lockflags);
4423 lockflags = 0;
4424 }
4425
4426 /*
4427 * Update the metadata zone size to match the new volume size,
4428 * and if it too less, metadata zone might be disabled.
4429 */
4430 hfs_metadatazone_init(hfsmp, false);
4431
4432 /*
4433 * If some files have blocks at or beyond the location of the
4434 * new alternate volume header, recalculate free blocks and
4435 * reclaim blocks. Otherwise just update free blocks count.
4436 *
4437 * The current allocLimit is set to the location of new alternate
4438 * volume header, and reclaimblks are the total number of blocks
4439 * that need to be reclaimed. So the check below is really
4440 * ignoring the blocks allocated for old alternate volume header.
4441 */
4442 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4443 /*
4444 * hfs_reclaimspace will use separate transactions when
4445 * relocating files (so we don't overwhelm the journal).
4446 */
4447 hfs_end_transaction(hfsmp);
4448 transaction_begun = 0;
4449
4450 /* Attempt to reclaim some space. */
4451 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4452 if (error != 0) {
4453 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4454 error = ENOSPC;
4455 goto out;
4456 }
4457 if (hfs_start_transaction(hfsmp) != 0) {
4458 error = EINVAL;
4459 goto out;
4460 }
4461 transaction_begun = 1;
4462
4463 /* Check if we're clear now. */
4464 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4465 if (error != 0) {
4466 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4467 error = EAGAIN; /* tell client to try again */
4468 goto out;
4469 }
4470 }
4471
4472 /*
4473 * Note: we take the attributes lock in case we have an attribute data vnode
4474 * which needs to change size.
4475 */
4476 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4477
4478 /*
4479 * Allocate last 1KB for alternate volume header.
4480 */
4481 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4482 if (error) {
4483 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4484 goto out;
4485 }
4486
4487 /*
4488 * Mark the old alternate volume header as free.
4489 * We don't bother shrinking allocation bitmap file.
4490 */
4491 if (hfsmp->blockSize == 512)
4492 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4493 else
4494 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4495
4496 /*
4497 * Invalidate the existing alternate volume header.
4498 *
4499 * Don't include this in a transaction (don't call journal_modify_block)
4500 * since this block will be outside of the truncated file system!
4501 */
4502 if (hfsmp->hfs_alt_id_sector) {
4503 error = buf_meta_bread(hfsmp->hfs_devvp,
4504 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4505 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4506 if (error == 0) {
4507 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4508 (void) VNOP_BWRITE(bp);
4509 } else {
4510 if (bp) {
4511 buf_brelse(bp);
4512 }
4513 }
4514 bp = NULL;
4515 }
4516
4517 /* Log successful shrinking. */
4518 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4519 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4520
4521 /*
4522 * Adjust file system variables and flush them to disk.
4523 */
4524 hfsmp->totalBlocks = newblkcnt;
4525 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4526 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4527 MarkVCBDirty(hfsmp);
4528 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4529 if (error)
4530 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4531
4532 /*
4533 * Adjust the size of hfsmp->hfs_attrdata_vp
4534 */
4535 if (hfsmp->hfs_attrdata_vp) {
4536 struct cnode *cp;
4537 struct filefork *fp;
4538
4539 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4540 cp = VTOC(hfsmp->hfs_attrdata_vp);
4541 fp = VTOF(hfsmp->hfs_attrdata_vp);
4542
4543 cp->c_blocks = newblkcnt;
4544 fp->ff_blocks = newblkcnt;
4545 fp->ff_extents[0].blockCount = newblkcnt;
4546 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4547 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4548 vnode_put(hfsmp->hfs_attrdata_vp);
4549 }
4550 }
4551
4552 out:
4553 /*
4554 * Update the allocLimit to acknowledge the last one or two blocks now.
4555 * Add it to the tree as well if necessary.
4556 */
4557 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4558
4559 HFS_MOUNT_LOCK(hfsmp, TRUE);
4560 if (disable_sparse == true) {
4561 /* Now that resize is completed, set the volume to be sparse
4562 * device again so that all further allocations will be first
4563 * fit instead of best fit. Reset free extent cache so that
4564 * it is rebuilt.
4565 */
4566 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4567 ResetVCBFreeExtCache(hfsmp);
4568 }
4569
4570 if (error && (updateFreeBlocks == true)) {
4571 hfsmp->freeBlocks += reclaimblks;
4572 }
4573
4574 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4575 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4576 }
4577 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4578 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4579
4580 /* On error, reset the metadata zone for original volume size */
4581 if (error && (updateFreeBlocks == true)) {
4582 hfs_metadatazone_init(hfsmp, false);
4583 }
4584
4585 if (lockflags) {
4586 hfs_systemfile_unlock(hfsmp, lockflags);
4587 }
4588 if (transaction_begun) {
4589 hfs_end_transaction(hfsmp);
4590 hfs_journal_flush(hfsmp, FALSE);
4591 /* Just to be sure, sync all data to the disk */
4592 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4593 }
4594
4595 return MacToVFSError(error);
4596 }
4597
4598
4599 /*
4600 * Invalidate the physical block numbers associated with buffer cache blocks
4601 * in the given extent of the given vnode.
4602 */
4603 struct hfs_inval_blk_no {
4604 daddr64_t sectorStart;
4605 daddr64_t sectorCount;
4606 };
4607 static int
4608 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4609 {
4610 daddr64_t blkno;
4611 struct hfs_inval_blk_no *args;
4612
4613 blkno = buf_blkno(bp);
4614 args = args_in;
4615
4616 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4617 buf_setblkno(bp, buf_lblkno(bp));
4618
4619 return BUF_RETURNED;
4620 }
4621 static void
4622 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4623 {
4624 struct hfs_inval_blk_no args;
4625 args.sectorStart = sectorStart;
4626 args.sectorCount = sectorCount;
4627
4628 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4629 }
4630
4631
4632 /*
4633 * Copy the contents of an extent to a new location. Also invalidates the
4634 * physical block number of any buffer cache block in the copied extent
4635 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4636 * determine the new physical block number).
4637 */
4638 static int
4639 hfs_copy_extent(
4640 struct hfsmount *hfsmp,
4641 struct vnode *vp, /* The file whose extent is being copied. */
4642 u_int32_t oldStart, /* The start of the source extent. */
4643 u_int32_t newStart, /* The start of the destination extent. */
4644 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4645 vfs_context_t context)
4646 {
4647 int err = 0;
4648 size_t bufferSize;
4649 void *buffer = NULL;
4650 struct vfsioattr ioattr;
4651 buf_t bp = NULL;
4652 off_t resid;
4653 size_t ioSize;
4654 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4655 daddr64_t srcSector, destSector;
4656 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4657 #if CONFIG_PROTECT
4658 int cpenabled = 0;
4659 #endif
4660
4661 /*
4662 * Sanity check that we have locked the vnode of the file we're copying.
4663 *
4664 * But since hfs_systemfile_lock() doesn't actually take the lock on
4665 * the allocation file if a journal is active, ignore the check if the
4666 * file being copied is the allocation file.
4667 */
4668 struct cnode *cp = VTOC(vp);
4669 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4670 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4671
4672 #if CONFIG_PROTECT
4673 /* Prepare the CP blob and get it ready for use */
4674 if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4675 cp_fs_protected (hfsmp->hfs_mp)) {
4676 int cp_err = 0;
4677 cp_err = cp_handle_relocate (cp);
4678 if (cp_err) {
4679 /*
4680 * can't copy the file because we couldn't set up keys.
4681 * bail out
4682 */
4683 return cp_err;
4684 }
4685 else {
4686 cpenabled = 1;
4687 }
4688 }
4689 #endif
4690
4691 /*
4692 * Determine the I/O size to use
4693 *
4694 * NOTE: Many external drives will result in an ioSize of 128KB.
4695 * TODO: Should we use a larger buffer, doing several consecutive
4696 * reads, then several consecutive writes?
4697 */
4698 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4699 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4700 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4701 return ENOMEM;
4702
4703 /* Get a buffer for doing the I/O */
4704 bp = buf_alloc(hfsmp->hfs_devvp);
4705 buf_setdataptr(bp, (uintptr_t)buffer);
4706
4707 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4708 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4709 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4710 while (resid > 0) {
4711 ioSize = MIN(bufferSize, (size_t) resid);
4712 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4713
4714 /* Prepare the buffer for reading */
4715 buf_reset(bp, B_READ);
4716 buf_setsize(bp, ioSize);
4717 buf_setcount(bp, ioSize);
4718 buf_setblkno(bp, srcSector);
4719 buf_setlblkno(bp, srcSector);
4720
4721 /* Attach the CP to the buffer */
4722 #if CONFIG_PROTECT
4723 if (cpenabled) {
4724 buf_setcpaddr (bp, cp->c_cpentry);
4725 }
4726 #endif
4727
4728 /* Do the read */
4729 err = VNOP_STRATEGY(bp);
4730 if (!err)
4731 err = buf_biowait(bp);
4732 if (err) {
4733 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4734 break;
4735 }
4736
4737 /* Prepare the buffer for writing */
4738 buf_reset(bp, B_WRITE);
4739 buf_setsize(bp, ioSize);
4740 buf_setcount(bp, ioSize);
4741 buf_setblkno(bp, destSector);
4742 buf_setlblkno(bp, destSector);
4743 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4744 buf_markfua(bp);
4745
4746 #if CONFIG_PROTECT
4747 /* Attach the CP to the buffer */
4748 if (cpenabled) {
4749 buf_setcpaddr (bp, cp->c_cpentry);
4750 }
4751 #endif
4752
4753 /* Do the write */
4754 vnode_startwrite(hfsmp->hfs_devvp);
4755 err = VNOP_STRATEGY(bp);
4756 if (!err)
4757 err = buf_biowait(bp);
4758 if (err) {
4759 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4760 break;
4761 }
4762
4763 resid -= ioSize;
4764 srcSector += ioSizeSectors;
4765 destSector += ioSizeSectors;
4766 }
4767 if (bp)
4768 buf_free(bp);
4769 if (buffer)
4770 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4771
4772 /* Make sure all writes have been flushed to disk. */
4773 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4774 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4775 if (err) {
4776 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4777 err = 0; /* Don't fail the copy. */
4778 }
4779 }
4780
4781 if (!err)
4782 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4783
4784 return err;
4785 }
4786
4787
4788 /* Structure to store state of reclaiming extents from a
4789 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
4790 * initializes the values in this structure which are then
4791 * used by code that reclaims and splits the extents.
4792 */
4793 struct hfs_reclaim_extent_info {
4794 struct vnode *vp;
4795 u_int32_t fileID;
4796 u_int8_t forkType;
4797 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
4798 u_int8_t is_sysfile; /* Extent belongs to system file */
4799 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
4800 u_int8_t extent_index;
4801 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
4802 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
4803 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
4804 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
4805 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
4806 union record {
4807 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4808 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
4809 } record;
4810 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
4811 * For catalog extent record, points to the correct
4812 * extent information in filefork. For overflow extent
4813 * record, or xattr record, points to extent record
4814 * in the structure above
4815 */
4816 struct cat_desc *dirlink_desc;
4817 struct cat_attr *dirlink_attr;
4818 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
4819 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
4820 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4821 * use it for writing updated extent record
4822 */
4823 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
4824 u_int16_t recordlen;
4825 int overflow_count; /* For debugging, counter for overflow extent record */
4826 FCB *fcb; /* Pointer to the current btree being traversed */
4827 };
4828
4829 /*
4830 * Split the current extent into two extents, with first extent
4831 * to contain given number of allocation blocks. Splitting of
4832 * extent creates one new extent entry which can result in
4833 * shifting of many entries through all the extent records of a
4834 * file, and/or creating a new extent record in the overflow
4835 * extent btree.
4836 *
4837 * Example:
4838 * The diagram below represents two consecutive extent records,
4839 * for simplicity, lets call them record X and X+1 respectively.
4840 * Interesting extent entries have been denoted by letters.
4841 * If the letter is unchanged before and after split, it means
4842 * that the extent entry was not modified during the split.
4843 * A '.' means that the entry remains unchanged after the split
4844 * and is not relevant for our example. A '0' means that the
4845 * extent entry is empty.
4846 *
4847 * If there isn't sufficient contiguous free space to relocate
4848 * an extent (extent "C" below), we will have to break the one
4849 * extent into multiple smaller extents, and relocate each of
4850 * the smaller extents individually. The way we do this is by
4851 * finding the largest contiguous free space that is currently
4852 * available (N allocation blocks), and then convert extent "C"
4853 * into two extents, C1 and C2, that occupy exactly the same
4854 * allocation blocks as extent C. Extent C1 is the first
4855 * N allocation blocks of extent C, and extent C2 is the remainder
4856 * of extent C. Then we can relocate extent C1 since we know
4857 * we have enough contiguous free space to relocate it in its
4858 * entirety. We then repeat the process starting with extent C2.
4859 *
4860 * In record X, only the entries following entry C are shifted, and
4861 * the original entry C is replaced with two entries C1 and C2 which
4862 * are actually two extent entries for contiguous allocation blocks.
4863 *
4864 * Note that the entry E from record X is shifted into record X+1 as
4865 * the new first entry. Since the first entry of record X+1 is updated,
4866 * the FABN will also get updated with the blockCount of entry E.
4867 * This also results in shifting of all extent entries in record X+1.
4868 * Note that the number of empty entries after the split has been
4869 * changed from 3 to 2.
4870 *
4871 * Before:
4872 * record X record X+1
4873 * ---------------------===--------- ---------------------------------
4874 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
4875 * ---------------------===--------- ---------------------------------
4876 *
4877 * After:
4878 * ---------------------=======----- ---------------------------------
4879 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
4880 * ---------------------=======----- ---------------------------------
4881 *
4882 * C1.startBlock = C.startBlock
4883 * C1.blockCount = N
4884 *
4885 * C2.startBlock = C.startBlock + N
4886 * C2.blockCount = C.blockCount - N
4887 *
4888 * FABN = old FABN - E.blockCount
4889 *
4890 * Inputs:
4891 * extent_info - This is the structure that contains state about
4892 * the current file, extent, and extent record that
4893 * is being relocated. This structure is shared
4894 * among code that traverses through all the extents
4895 * of the file, code that relocates extents, and
4896 * code that splits the extent.
4897 * Output:
4898 * Zero on success, non-zero on failure.
4899 */
4900 static int
4901 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4902 {
4903 int error = 0;
4904 int index = extent_info->extent_index;
4905 int i;
4906 HFSPlusExtentDescriptor shift_extent;
4907 HFSPlusExtentDescriptor last_extent;
4908 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4909 HFSPlusExtentRecord *extents_rec = NULL;
4910 HFSPlusExtentKey *extents_key = NULL;
4911 HFSPlusAttrRecord *xattr_rec = NULL;
4912 HFSPlusAttrKey *xattr_key = NULL;
4913 struct BTreeIterator iterator;
4914 struct FSBufferDescriptor btdata;
4915 uint16_t reclen;
4916 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
4917 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
4918 Boolean create_record = false;
4919 Boolean is_xattr;
4920
4921 is_xattr = extent_info->is_xattr;
4922 extents = extent_info->extents;
4923
4924 if (hfs_resize_debug) {
4925 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4926 }
4927
4928 /* Determine the starting allocation block number for the following
4929 * overflow extent record, if any, before the current record
4930 * gets modified.
4931 */
4932 read_recStartBlock = extent_info->recStartBlock;
4933 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4934 if (extents[i].blockCount == 0) {
4935 break;
4936 }
4937 read_recStartBlock += extents[i].blockCount;
4938 }
4939
4940 /* Shift and split */
4941 if (index == kHFSPlusExtentDensity-1) {
4942 /* The new extent created after split will go into following overflow extent record */
4943 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4944 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4945
4946 /* Last extent in the record will be split, so nothing to shift */
4947 } else {
4948 /* Splitting of extents can result in at most of one
4949 * extent entry to be shifted into following overflow extent
4950 * record. So, store the last extent entry for later.
4951 */
4952 shift_extent = extents[kHFSPlusExtentDensity-1];
4953
4954 /* Start shifting extent information from the end of the extent
4955 * record to the index where we want to insert the new extent.
4956 * Note that kHFSPlusExtentDensity-1 is already saved above, and
4957 * does not need to be shifted. The extent entry that is being
4958 * split does not get shifted.
4959 */
4960 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4961 if (hfs_resize_debug) {
4962 if (extents[i].blockCount) {
4963 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4964 }
4965 }
4966 extents[i+1] = extents[i];
4967 }
4968 }
4969
4970 if (index == kHFSPlusExtentDensity-1) {
4971 /* The second half of the extent being split will be the overflow
4972 * entry that will go into following overflow extent record. The
4973 * value has been stored in 'shift_extent' above, so there is
4974 * nothing to be done here.
4975 */
4976 } else {
4977 /* Update the values in the second half of the extent being split
4978 * before updating the first half of the split. Note that the
4979 * extent to split or first half of the split is at index 'index'
4980 * and a new extent or second half of the split will be inserted at
4981 * 'index+1' or into following overflow extent record.
4982 */
4983 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
4984 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
4985 }
4986 /* Update the extent being split, only the block count will change */
4987 extents[index].blockCount = newBlockCount;
4988
4989 if (hfs_resize_debug) {
4990 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
4991 if (index != kHFSPlusExtentDensity-1) {
4992 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
4993 } else {
4994 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
4995 }
4996 }
4997
4998 /* If the newly split extent is for large EAs or in overflow extent
4999 * record, so update it directly in the btree using the iterator
5000 * information from the shared extent_info structure
5001 */
5002 if (extent_info->catalog_fp == NULL) {
5003 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5004 &(extent_info->btdata), extent_info->recordlen);
5005 if (error) {
5006 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5007 goto out;
5008 }
5009 }
5010
5011 /* No extent entry to be shifted into another extent overflow record */
5012 if (shift_extent.blockCount == 0) {
5013 if (hfs_resize_debug) {
5014 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5015 }
5016 error = 0;
5017 goto out;
5018 }
5019
5020 /* The overflow extent entry has to be shifted into an extent
5021 * overflow record. This would mean that we have to shift
5022 * extent entries from all overflow records by one. We will
5023 * start iteration from the first record to the last record,
5024 * and shift the extent entry from one record to another.
5025 * We might have to create a new record for the last extent
5026 * entry for the file.
5027 */
5028
5029 /* Initialize iterator to search the next record */
5030 bzero(&iterator, sizeof(iterator));
5031 if (is_xattr) {
5032 /* Copy the key from the iterator that was to update the modified attribute record. */
5033 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5034 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5035 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5036
5037 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5038 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5039 if (xattr_rec == NULL) {
5040 error = ENOMEM;
5041 goto out;
5042 }
5043 btdata.bufferAddress = xattr_rec;
5044 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5045 btdata.itemCount = 1;
5046 extents = xattr_rec->overflowExtents.extents;
5047 } else {
5048 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5049 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5050 extents_key->forkType = extent_info->forkType;
5051 extents_key->fileID = extent_info->fileID;
5052 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5053
5054 MALLOC(extents_rec, HFSPlusExtentRecord *,
5055 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5056 if (extents_rec == NULL) {
5057 error = ENOMEM;
5058 goto out;
5059 }
5060 btdata.bufferAddress = extents_rec;
5061 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5062 btdata.itemCount = 1;
5063 extents = extents_rec[0];
5064 }
5065
5066 /* An extent entry still needs to be shifted into following overflow
5067 * extent record. This will result in the starting allocation block
5068 * number of the extent record being changed which is part of the key
5069 * for the extent record. Since the extent record key is changing,
5070 * the record can not be updated, instead has to be deleted and
5071 * inserted again.
5072 */
5073 while (shift_extent.blockCount) {
5074 if (hfs_resize_debug) {
5075 printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5076 }
5077
5078 /* Search if there is any existing overflow extent record.
5079 * For this, the logical start block number in the key is
5080 * the value calculated based on the logical start block
5081 * number of the current extent record and the total number
5082 * of blocks existing in the current extent record.
5083 */
5084 if (is_xattr) {
5085 xattr_key->startBlock = read_recStartBlock;
5086 } else {
5087 extents_key->startBlock = read_recStartBlock;
5088 }
5089 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5090 if (error) {
5091 if (error != btNotFound) {
5092 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5093 goto out;
5094 }
5095 create_record = true;
5096 }
5097
5098 /* The extra extent entry from the previous record is being inserted
5099 * as the first entry in the current extent record. This will change
5100 * the file allocation block number (FABN) of the current extent
5101 * record, which is the startBlock value from the extent record key.
5102 * Since one extra entry is being inserted in the record, the new
5103 * FABN for the record will less than old FABN by the number of blocks
5104 * in the new extent entry being inserted at the start. We have to
5105 * do this before we update read_recStartBlock to point at the
5106 * startBlock of the following record.
5107 */
5108 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5109 if (hfs_resize_debug) {
5110 if (create_record) {
5111 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5112 }
5113 }
5114
5115 /* Now update the read_recStartBlock to account for total number
5116 * of blocks in this extent record. It will now point to the
5117 * starting allocation block number for the next extent record.
5118 */
5119 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5120 if (extents[i].blockCount == 0) {
5121 break;
5122 }
5123 read_recStartBlock += extents[i].blockCount;
5124 }
5125
5126 if (create_record == true) {
5127 /* Initialize new record content with only one extent entry */
5128 bzero(extents, sizeof(HFSPlusExtentRecord));
5129 /* The new record will contain only one extent entry */
5130 extents[0] = shift_extent;
5131 /* There are no more overflow extents to be shifted */
5132 shift_extent.startBlock = shift_extent.blockCount = 0;
5133
5134 if (is_xattr) {
5135 xattr_rec->recordType = kHFSPlusAttrExtents;
5136 xattr_rec->overflowExtents.reserved = 0;
5137 reclen = sizeof(HFSPlusAttrExtents);
5138 } else {
5139 reclen = sizeof(HFSPlusExtentRecord);
5140 }
5141 } else {
5142 /* The overflow extent entry from previous record will be
5143 * the first entry in this extent record. If the last
5144 * extent entry in this record is valid, it will be shifted
5145 * into the following extent record as its first entry. So
5146 * save the last entry before shifting entries in current
5147 * record.
5148 */
5149 last_extent = extents[kHFSPlusExtentDensity-1];
5150
5151 /* Shift all entries by one index towards the end */
5152 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5153 extents[i+1] = extents[i];
5154 }
5155
5156 /* Overflow extent entry saved from previous record
5157 * is now the first entry in the current record.
5158 */
5159 extents[0] = shift_extent;
5160
5161 if (hfs_resize_debug) {
5162 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5163 }
5164
5165 /* The last entry from current record will be the
5166 * overflow entry which will be the first entry for
5167 * the following extent record.
5168 */
5169 shift_extent = last_extent;
5170
5171 /* Since the key->startBlock is being changed for this record,
5172 * it should be deleted and inserted with the new key.
5173 */
5174 error = BTDeleteRecord(extent_info->fcb, &iterator);
5175 if (error) {
5176 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5177 goto out;
5178 }
5179 if (hfs_resize_debug) {
5180 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5181 }
5182 }
5183
5184 /* Insert the newly created or modified extent record */
5185 bzero(&iterator.hint, sizeof(iterator.hint));
5186 if (is_xattr) {
5187 xattr_key->startBlock = write_recStartBlock;
5188 } else {
5189 extents_key->startBlock = write_recStartBlock;
5190 }
5191 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5192 if (error) {
5193 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5194 goto out;
5195 }
5196 if (hfs_resize_debug) {
5197 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5198 }
5199 }
5200 BTFlushPath(extent_info->fcb);
5201 out:
5202 if (extents_rec) {
5203 FREE (extents_rec, M_TEMP);
5204 }
5205 if (xattr_rec) {
5206 FREE (xattr_rec, M_TEMP);
5207 }
5208 return error;
5209 }
5210
5211
5212 /*
5213 * Relocate an extent if it lies beyond the expected end of volume.
5214 *
5215 * This function is called for every extent of the file being relocated.
5216 * It allocates space for relocation, copies the data, deallocates
5217 * the old extent, and update corresponding on-disk extent. If the function
5218 * does not find contiguous space to relocate an extent, it splits the
5219 * extent in smaller size to be able to relocate it out of the area of
5220 * disk being reclaimed. As an optimization, if an extent lies partially
5221 * in the area of the disk being reclaimed, it is split so that we only
5222 * have to relocate the area that was overlapping with the area of disk
5223 * being reclaimed.
5224 *
5225 * Note that every extent is relocated in its own transaction so that
5226 * they do not overwhelm the journal. This function handles the extent
5227 * record that exists in the catalog record, extent record from overflow
5228 * extents btree, and extents for large EAs.
5229 *
5230 * Inputs:
5231 * extent_info - This is the structure that contains state about
5232 * the current file, extent, and extent record that
5233 * is being relocated. This structure is shared
5234 * among code that traverses through all the extents
5235 * of the file, code that relocates extents, and
5236 * code that splits the extent.
5237 */
5238 static int
5239 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5240 {
5241 int error = 0;
5242 int index;
5243 struct cnode *cp;
5244 u_int32_t oldStartBlock;
5245 u_int32_t oldBlockCount;
5246 u_int32_t newStartBlock;
5247 u_int32_t newBlockCount;
5248 u_int32_t alloc_flags;
5249 int blocks_allocated = false;
5250
5251 index = extent_info->extent_index;
5252 cp = VTOC(extent_info->vp);
5253
5254 oldStartBlock = extent_info->extents[index].startBlock;
5255 oldBlockCount = extent_info->extents[index].blockCount;
5256
5257 if (0 && hfs_resize_debug) {
5258 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5259 }
5260
5261 /* Check if the current extent lies completely within allocLimit */
5262 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5263 extent_info->cur_blockCount += oldBlockCount;
5264 return error;
5265 }
5266
5267 /* Every extent should be relocated in its own transaction
5268 * to make sure that we don't overflow the journal buffer.
5269 */
5270 error = hfs_start_transaction(hfsmp);
5271 if (error) {
5272 return error;
5273 }
5274 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5275
5276 /* Check if the extent lies partially in the area to reclaim,
5277 * i.e. it starts before allocLimit and ends beyond allocLimit.
5278 * We have already skipped extents that lie completely within
5279 * allocLimit in the check above, so we only check for the
5280 * startBlock. If it lies partially, split it so that we
5281 * only relocate part of the extent.
5282 */
5283 if (oldStartBlock < allocLimit) {
5284 newBlockCount = allocLimit - oldStartBlock;
5285 error = hfs_split_extent(extent_info, newBlockCount);
5286 if (error == 0) {
5287 /* After successful split, the current extent does not
5288 * need relocation, so just return back.
5289 */
5290 goto out;
5291 }
5292 /* Ignore error and try relocating the entire extent instead */
5293 }
5294
5295 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5296 if (extent_info->is_sysfile) {
5297 alloc_flags |= HFS_ALLOC_METAZONE;
5298 }
5299
5300 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5301 &newStartBlock, &newBlockCount);
5302 if ((extent_info->is_sysfile == false) &&
5303 ((error == dskFulErr) || (error == ENOSPC))) {
5304 /* For non-system files, try reallocating space in metadata zone */
5305 alloc_flags |= HFS_ALLOC_METAZONE;
5306 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5307 alloc_flags, &newStartBlock, &newBlockCount);
5308 }
5309 if ((error == dskFulErr) || (error == ENOSPC)) {
5310 /* We did not find desired contiguous space for this extent.
5311 * So try to allocate the maximum contiguous space available.
5312 */
5313 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5314
5315 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5316 alloc_flags, &newStartBlock, &newBlockCount);
5317 if (error) {
5318 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5319 goto out;
5320 }
5321 blocks_allocated = true;
5322
5323 error = hfs_split_extent(extent_info, newBlockCount);
5324 if (error) {
5325 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5326 goto out;
5327 }
5328 oldBlockCount = newBlockCount;
5329 }
5330 if (error) {
5331 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5332 goto out;
5333 }
5334 blocks_allocated = true;
5335
5336 /* Copy data from old location to new location */
5337 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5338 newStartBlock, newBlockCount, context);
5339 if (error) {
5340 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5341 goto out;
5342 }
5343
5344 /* Update the extent record with the new start block information */
5345 extent_info->extents[index].startBlock = newStartBlock;
5346
5347 /* Sync the content back to the disk */
5348 if (extent_info->catalog_fp) {
5349 /* Update the extents in catalog record */
5350 if (extent_info->is_dirlink) {
5351 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5352 extent_info->dirlink_desc, extent_info->dirlink_attr,
5353 &(extent_info->dirlink_fork->ff_data));
5354 } else {
5355 cp->c_flag |= C_MODIFIED;
5356 /* If this is a system file, sync volume headers on disk */
5357 if (extent_info->is_sysfile) {
5358 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5359 }
5360 }
5361 } else {
5362 /* Replace record for extents overflow or extents-based xattrs */
5363 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5364 &(extent_info->btdata), extent_info->recordlen);
5365 }
5366 if (error) {
5367 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5368 goto out;
5369 }
5370
5371 /* Deallocate the old extent */
5372 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5373 if (error) {
5374 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5375 goto out;
5376 }
5377 extent_info->blocks_relocated += newBlockCount;
5378
5379 if (hfs_resize_debug) {
5380 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5381 }
5382
5383 out:
5384 if (error != 0) {
5385 if (blocks_allocated == true) {
5386 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5387 }
5388 } else {
5389 /* On success, increment the total allocation blocks processed */
5390 extent_info->cur_blockCount += newBlockCount;
5391 }
5392
5393 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5394
5395 /* For a non-system file, if an extent entry from catalog record
5396 * was modified, sync the in-memory changes to the catalog record
5397 * on disk before ending the transaction.
5398 */
5399 if ((error == 0) &&
5400 (extent_info->overflow_count < kHFSPlusExtentDensity) &&
5401 (extent_info->is_sysfile == false)) {
5402 (void) hfs_update(extent_info->vp, MNT_WAIT);
5403 }
5404
5405 hfs_end_transaction(hfsmp);
5406
5407 return error;
5408 }
5409
5410 /* Report intermediate progress during volume resize */
5411 static void
5412 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5413 {
5414 u_int32_t cur_progress;
5415
5416 hfs_resize_progress(hfsmp, &cur_progress);
5417 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5418 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5419 hfsmp->hfs_resize_progress = cur_progress;
5420 }
5421 return;
5422 }
5423
5424 /*
5425 * Reclaim space at the end of a volume for given file and forktype.
5426 *
5427 * This routine attempts to move any extent which contains allocation blocks
5428 * at or after "allocLimit." A separate transaction is used for every extent
5429 * that needs to be moved. If there is not contiguous space available for
5430 * moving an extent, it can be split into smaller extents. The contents of
5431 * any moved extents are read and written via the volume's device vnode --
5432 * NOT via "vp." During the move, moved blocks which are part of a transaction
5433 * have their physical block numbers invalidated so they will eventually be
5434 * written to their new locations.
5435 *
5436 * This function is also called for directory hard links. Directory hard links
5437 * are regular files with no data fork and resource fork that contains alias
5438 * information for backward compatibility with pre-Leopard systems. However
5439 * non-Mac OS X implementation can add/modify data fork or resource fork
5440 * information to directory hard links, so we check, and if required, relocate
5441 * both data fork and resource fork.
5442 *
5443 * Inputs:
5444 * hfsmp The volume being resized.
5445 * vp The vnode for the system file.
5446 * fileID ID of the catalog record that needs to be relocated
5447 * forktype The type of fork that needs relocated,
5448 * kHFSResourceForkType for resource fork,
5449 * kHFSDataForkType for data fork
5450 * allocLimit Allocation limit for the new volume size,
5451 * do not use this block or beyond. All extents
5452 * that use this block or any blocks beyond this limit
5453 * will be relocated.
5454 *
5455 * Side Effects:
5456 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5457 * blocks that were relocated.
5458 */
5459 static int
5460 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5461 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5462 {
5463 int error = 0;
5464 struct hfs_reclaim_extent_info *extent_info;
5465 int i;
5466 int lockflags = 0;
5467 struct cnode *cp;
5468 struct filefork *fp;
5469 int took_truncate_lock = false;
5470 int release_desc = false;
5471 HFSPlusExtentKey *key;
5472
5473 /* If there is no vnode for this file, then there's nothing to do. */
5474 if (vp == NULL) {
5475 return 0;
5476 }
5477
5478 cp = VTOC(vp);
5479
5480 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5481 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5482 if (extent_info == NULL) {
5483 return ENOMEM;
5484 }
5485 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5486 extent_info->vp = vp;
5487 extent_info->fileID = fileID;
5488 extent_info->forkType = forktype;
5489 extent_info->is_sysfile = vnode_issystem(vp);
5490 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5491 extent_info->is_dirlink = true;
5492 }
5493 /* We always need allocation bitmap and extent btree lock */
5494 lockflags = SFL_BITMAP | SFL_EXTENTS;
5495 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5496 lockflags |= SFL_CATALOG;
5497 } else if (fileID == kHFSAttributesFileID) {
5498 lockflags |= SFL_ATTRIBUTE;
5499 } else if (fileID == kHFSStartupFileID) {
5500 lockflags |= SFL_STARTUP;
5501 }
5502 extent_info->lockflags = lockflags;
5503 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5504
5505 /* Flush data associated with current file on disk.
5506 *
5507 * If the current vnode is directory hard link, no flushing of
5508 * journal or vnode is required. The current kernel does not
5509 * modify data/resource fork of directory hard links, so nothing
5510 * will be in the cache. If a directory hard link is newly created,
5511 * the resource fork data is written directly using devvp and
5512 * the code that actually relocates data (hfs_copy_extent()) also
5513 * uses devvp for its I/O --- so they will see a consistent copy.
5514 */
5515 if (extent_info->is_sysfile) {
5516 /* If the current vnode is system vnode, flush journal
5517 * to make sure that all data is written to the disk.
5518 */
5519 error = hfs_journal_flush(hfsmp, TRUE);
5520 if (error) {
5521 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5522 goto out;
5523 }
5524 } else if (extent_info->is_dirlink == false) {
5525 /* Flush all blocks associated with this regular file vnode.
5526 * Normally there should not be buffer cache blocks for regular
5527 * files, but for objects like symlinks, we can have buffer cache
5528 * blocks associated with the vnode. Therefore we call
5529 * buf_flushdirtyblks() also.
5530 */
5531 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5532
5533 hfs_unlock(cp);
5534 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5535 took_truncate_lock = true;
5536 (void) cluster_push(vp, 0);
5537 error = hfs_lock(cp, HFS_FORCE_LOCK);
5538 if (error) {
5539 goto out;
5540 }
5541
5542 /* If the file no longer exists, nothing left to do */
5543 if (cp->c_flag & C_NOEXISTS) {
5544 error = 0;
5545 goto out;
5546 }
5547
5548 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5549 * be copying consistent bits. (Otherwise, it's possible that an async
5550 * write will complete to the old extent after we read from it. That
5551 * could lead to corruption.)
5552 */
5553 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5554 if (error) {
5555 goto out;
5556 }
5557 }
5558
5559 if (hfs_resize_debug) {
5560 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5561 }
5562
5563 if (extent_info->is_dirlink) {
5564 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5565 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5566 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5567 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5568 MALLOC(extent_info->dirlink_fork, struct filefork *,
5569 sizeof(struct filefork), M_TEMP, M_WAITOK);
5570 if ((extent_info->dirlink_desc == NULL) ||
5571 (extent_info->dirlink_attr == NULL) ||
5572 (extent_info->dirlink_fork == NULL)) {
5573 error = ENOMEM;
5574 goto out;
5575 }
5576
5577 /* Lookup catalog record for directory hard link and
5578 * create a fake filefork for the value looked up from
5579 * the disk.
5580 */
5581 fp = extent_info->dirlink_fork;
5582 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5583 extent_info->dirlink_fork->ff_cp = cp;
5584 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5585 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5586 extent_info->dirlink_desc, extent_info->dirlink_attr,
5587 &(extent_info->dirlink_fork->ff_data));
5588 hfs_systemfile_unlock(hfsmp, lockflags);
5589 if (error) {
5590 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5591 goto out;
5592 }
5593 release_desc = true;
5594 } else {
5595 fp = VTOF(vp);
5596 }
5597
5598 extent_info->catalog_fp = fp;
5599 extent_info->recStartBlock = 0;
5600 extent_info->extents = extent_info->catalog_fp->ff_extents;
5601 /* Relocate extents from the catalog record */
5602 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5603 if (fp->ff_extents[i].blockCount == 0) {
5604 break;
5605 }
5606 extent_info->extent_index = i;
5607 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5608 if (error) {
5609 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5610 goto out;
5611 }
5612 }
5613
5614 /* If the number of allocation blocks processed for reclaiming
5615 * are less than total number of blocks for the file, continuing
5616 * working on overflow extents record.
5617 */
5618 if (fp->ff_blocks <= extent_info->cur_blockCount) {
5619 if (0 && hfs_resize_debug) {
5620 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5621 }
5622 goto out;
5623 }
5624
5625 if (hfs_resize_debug) {
5626 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5627 }
5628
5629 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5630 if (extent_info->iterator == NULL) {
5631 error = ENOMEM;
5632 goto out;
5633 }
5634 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5635 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5636 key->keyLength = kHFSPlusExtentKeyMaximumLength;
5637 key->forkType = forktype;
5638 key->fileID = fileID;
5639 key->startBlock = extent_info->cur_blockCount;
5640
5641 extent_info->btdata.bufferAddress = extent_info->record.overflow;
5642 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5643 extent_info->btdata.itemCount = 1;
5644
5645 extent_info->catalog_fp = NULL;
5646
5647 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5648 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5649 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5650 &(extent_info->btdata), &(extent_info->recordlen),
5651 extent_info->iterator);
5652 hfs_systemfile_unlock(hfsmp, lockflags);
5653 while (error == 0) {
5654 extent_info->overflow_count++;
5655 extent_info->recStartBlock = key->startBlock;
5656 extent_info->extents = extent_info->record.overflow;
5657 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5658 if (extent_info->record.overflow[i].blockCount == 0) {
5659 goto out;
5660 }
5661 extent_info->extent_index = i;
5662 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5663 if (error) {
5664 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5665 goto out;
5666 }
5667 }
5668
5669 /* Look for more overflow records */
5670 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5671 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5672 extent_info->iterator, &(extent_info->btdata),
5673 &(extent_info->recordlen));
5674 hfs_systemfile_unlock(hfsmp, lockflags);
5675 if (error) {
5676 break;
5677 }
5678 /* Stop when we encounter a different file or fork. */
5679 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5680 break;
5681 }
5682 }
5683 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5684 error = 0;
5685 }
5686
5687 out:
5688 /* If any blocks were relocated, account them and report progress */
5689 if (extent_info->blocks_relocated) {
5690 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5691 hfs_truncatefs_progress(hfsmp);
5692 if (fileID < kHFSFirstUserCatalogNodeID) {
5693 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5694 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5695 }
5696 }
5697 if (extent_info->iterator) {
5698 FREE(extent_info->iterator, M_TEMP);
5699 }
5700 if (release_desc == true) {
5701 cat_releasedesc(extent_info->dirlink_desc);
5702 }
5703 if (extent_info->dirlink_desc) {
5704 FREE(extent_info->dirlink_desc, M_TEMP);
5705 }
5706 if (extent_info->dirlink_attr) {
5707 FREE(extent_info->dirlink_attr, M_TEMP);
5708 }
5709 if (extent_info->dirlink_fork) {
5710 FREE(extent_info->dirlink_fork, M_TEMP);
5711 }
5712 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5713 (void) hfs_update(vp, MNT_WAIT);
5714 }
5715 if (took_truncate_lock) {
5716 hfs_unlock_truncate(cp, 0);
5717 }
5718 if (extent_info) {
5719 FREE(extent_info, M_TEMP);
5720 }
5721 if (hfs_resize_debug) {
5722 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5723 }
5724
5725 return error;
5726 }
5727
5728
5729 /*
5730 * This journal_relocate callback updates the journal info block to point
5731 * at the new journal location. This write must NOT be done using the
5732 * transaction. We must write the block immediately. We must also force
5733 * it to get to the media so that the new journal location will be seen by
5734 * the replay code before we can safely let journaled blocks be written
5735 * to their normal locations.
5736 *
5737 * The tests for journal_uses_fua below are mildly hacky. Since the journal
5738 * and the file system are both on the same device, I'm leveraging what
5739 * the journal has decided about FUA.
5740 */
5741 struct hfs_journal_relocate_args {
5742 struct hfsmount *hfsmp;
5743 vfs_context_t context;
5744 u_int32_t newStartBlock;
5745 };
5746
5747 static errno_t
5748 hfs_journal_relocate_callback(void *_args)
5749 {
5750 int error;
5751 struct hfs_journal_relocate_args *args = _args;
5752 struct hfsmount *hfsmp = args->hfsmp;
5753 buf_t bp;
5754 JournalInfoBlock *jibp;
5755
5756 error = buf_meta_bread(hfsmp->hfs_devvp,
5757 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5758 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5759 if (error) {
5760 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5761 if (bp) {
5762 buf_brelse(bp);
5763 }
5764 return error;
5765 }
5766 jibp = (JournalInfoBlock*) buf_dataptr(bp);
5767 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5768 jibp->size = SWAP_BE64(hfsmp->jnl_size);
5769 if (journal_uses_fua(hfsmp->jnl))
5770 buf_markfua(bp);
5771 error = buf_bwrite(bp);
5772 if (error) {
5773 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5774 return error;
5775 }
5776 if (!journal_uses_fua(hfsmp->jnl)) {
5777 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5778 if (error) {
5779 printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5780 error = 0; /* Don't fail the operation. */
5781 }
5782 }
5783
5784 return error;
5785 }
5786
5787
5788 static int
5789 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5790 {
5791 int error;
5792 int journal_err;
5793 int lockflags;
5794 u_int32_t oldStartBlock;
5795 u_int32_t newStartBlock;
5796 u_int32_t oldBlockCount;
5797 u_int32_t newBlockCount;
5798 struct cat_desc journal_desc;
5799 struct cat_attr journal_attr;
5800 struct cat_fork journal_fork;
5801 struct hfs_journal_relocate_args callback_args;
5802
5803 if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5804 /* The journal does not require relocation */
5805 return 0;
5806 }
5807
5808 error = hfs_start_transaction(hfsmp);
5809 if (error) {
5810 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5811 return error;
5812 }
5813 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5814
5815 oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5816
5817 /* TODO: Allow the journal to change size based on the new volume size. */
5818 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5819 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5820 &newStartBlock, &newBlockCount);
5821 if (error) {
5822 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5823 goto fail;
5824 }
5825 if (newBlockCount != oldBlockCount) {
5826 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5827 goto free_fail;
5828 }
5829
5830 error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5831 if (error) {
5832 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5833 goto free_fail;
5834 }
5835
5836 /* Update the catalog record for .journal */
5837 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5838 if (error) {
5839 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5840 goto free_fail;
5841 }
5842 oldStartBlock = journal_fork.cf_extents[0].startBlock;
5843 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5844 journal_fork.cf_extents[0].startBlock = newStartBlock;
5845 journal_fork.cf_extents[0].blockCount = newBlockCount;
5846 journal_fork.cf_blocks = newBlockCount;
5847 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
5848 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
5849 if (error) {
5850 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
5851 goto free_fail;
5852 }
5853 callback_args.hfsmp = hfsmp;
5854 callback_args.context = context;
5855 callback_args.newStartBlock = newStartBlock;
5856
5857 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
5858 (off_t)newBlockCount*hfsmp->blockSize, 0,
5859 hfs_journal_relocate_callback, &callback_args);
5860 if (error) {
5861 /* NOTE: journal_relocate will mark the journal invalid. */
5862 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
5863 goto fail;
5864 }
5865 hfsmp->jnl_start = newStartBlock;
5866 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
5867
5868 hfs_systemfile_unlock(hfsmp, lockflags);
5869 error = hfs_end_transaction(hfsmp);
5870 if (error) {
5871 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
5872 }
5873
5874 /* Account for the blocks relocated and print progress */
5875 hfsmp->hfs_resize_blocksmoved += oldBlockCount;
5876 hfs_truncatefs_progress(hfsmp);
5877 if (!error) {
5878 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
5879 oldBlockCount, hfsmp->vcbVN);
5880 if (hfs_resize_debug) {
5881 printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5882 }
5883 }
5884 return error;
5885
5886 free_fail:
5887 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5888 if (journal_err) {
5889 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5890 hfs_mark_volume_inconsistent(hfsmp);
5891 }
5892 fail:
5893 hfs_systemfile_unlock(hfsmp, lockflags);
5894 (void) hfs_end_transaction(hfsmp);
5895 if (hfs_resize_debug) {
5896 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
5897 }
5898 return error;
5899 }
5900
5901
5902 /*
5903 * Move the journal info block to a new location. We have to make sure the
5904 * new copy of the journal info block gets to the media first, then change
5905 * the field in the volume header and the catalog record.
5906 */
5907 static int
5908 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5909 {
5910 int error;
5911 int journal_err;
5912 int lockflags;
5913 u_int32_t oldBlock;
5914 u_int32_t newBlock;
5915 u_int32_t blockCount;
5916 struct cat_desc jib_desc;
5917 struct cat_attr jib_attr;
5918 struct cat_fork jib_fork;
5919 buf_t old_bp, new_bp;
5920
5921 if (hfsmp->vcbJinfoBlock <= allocLimit) {
5922 /* The journal info block does not require relocation */
5923 return 0;
5924 }
5925
5926 error = hfs_start_transaction(hfsmp);
5927 if (error) {
5928 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
5929 return error;
5930 }
5931 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5932
5933 error = BlockAllocate(hfsmp, 1, 1, 1,
5934 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5935 &newBlock, &blockCount);
5936 if (error) {
5937 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
5938 goto fail;
5939 }
5940 if (blockCount != 1) {
5941 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
5942 goto free_fail;
5943 }
5944 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
5945 if (error) {
5946 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
5947 goto free_fail;
5948 }
5949
5950 /* Copy the old journal info block content to the new location */
5951 error = buf_meta_bread(hfsmp->hfs_devvp,
5952 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5953 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
5954 if (error) {
5955 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
5956 if (old_bp) {
5957 buf_brelse(old_bp);
5958 }
5959 goto free_fail;
5960 }
5961 new_bp = buf_getblk(hfsmp->hfs_devvp,
5962 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5963 hfsmp->blockSize, 0, 0, BLK_META);
5964 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
5965 buf_brelse(old_bp);
5966 if (journal_uses_fua(hfsmp->jnl))
5967 buf_markfua(new_bp);
5968 error = buf_bwrite(new_bp);
5969 if (error) {
5970 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
5971 goto free_fail;
5972 }
5973 if (!journal_uses_fua(hfsmp->jnl)) {
5974 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5975 if (error) {
5976 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5977 /* Don't fail the operation. */
5978 }
5979 }
5980
5981 /* Update the catalog record for .journal_info_block */
5982 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
5983 if (error) {
5984 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5985 goto fail;
5986 }
5987 oldBlock = jib_fork.cf_extents[0].startBlock;
5988 jib_fork.cf_size = hfsmp->blockSize;
5989 jib_fork.cf_extents[0].startBlock = newBlock;
5990 jib_fork.cf_extents[0].blockCount = 1;
5991 jib_fork.cf_blocks = 1;
5992 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
5993 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
5994 if (error) {
5995 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
5996 goto fail;
5997 }
5998
5999 /* Update the pointer to the journal info block in the volume header. */
6000 hfsmp->vcbJinfoBlock = newBlock;
6001 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6002 if (error) {
6003 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6004 goto fail;
6005 }
6006 hfs_systemfile_unlock(hfsmp, lockflags);
6007 error = hfs_end_transaction(hfsmp);
6008 if (error) {
6009 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6010 }
6011 error = hfs_journal_flush(hfsmp, FALSE);
6012 if (error) {
6013 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6014 }
6015
6016 /* Account for the block relocated and print progress */
6017 hfsmp->hfs_resize_blocksmoved += 1;
6018 hfs_truncatefs_progress(hfsmp);
6019 if (!error) {
6020 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6021 hfsmp->vcbVN);
6022 if (hfs_resize_debug) {
6023 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6024 }
6025 }
6026 return error;
6027
6028 free_fail:
6029 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6030 if (journal_err) {
6031 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6032 hfs_mark_volume_inconsistent(hfsmp);
6033 }
6034
6035 fail:
6036 hfs_systemfile_unlock(hfsmp, lockflags);
6037 (void) hfs_end_transaction(hfsmp);
6038 if (hfs_resize_debug) {
6039 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6040 }
6041 return error;
6042 }
6043
6044
6045 /*
6046 * This function traverses through all extended attribute records for a given
6047 * fileID, and calls function that reclaims data blocks that exist in the
6048 * area of the disk being reclaimed which in turn is responsible for allocating
6049 * new space, copying extent data, deallocating new space, and if required,
6050 * splitting the extent.
6051 *
6052 * Note: The caller has already acquired the cnode lock on the file. Therefore
6053 * we are assured that no other thread would be creating/deleting/modifying
6054 * extended attributes for this file.
6055 *
6056 * Side Effects:
6057 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6058 * blocks that were relocated.
6059 *
6060 * Returns:
6061 * 0 on success, non-zero on failure.
6062 */
6063 static int
6064 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6065 {
6066 int error = 0;
6067 struct hfs_reclaim_extent_info *extent_info;
6068 int i;
6069 HFSPlusAttrKey *key;
6070 int *lockflags;
6071
6072 if (hfs_resize_debug) {
6073 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6074 }
6075
6076 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6077 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6078 if (extent_info == NULL) {
6079 return ENOMEM;
6080 }
6081 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6082 extent_info->vp = vp;
6083 extent_info->fileID = fileID;
6084 extent_info->is_xattr = true;
6085 extent_info->is_sysfile = vnode_issystem(vp);
6086 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6087 lockflags = &(extent_info->lockflags);
6088 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6089
6090 /* Initialize iterator from the extent_info structure */
6091 MALLOC(extent_info->iterator, struct BTreeIterator *,
6092 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6093 if (extent_info->iterator == NULL) {
6094 error = ENOMEM;
6095 goto out;
6096 }
6097 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6098
6099 /* Build attribute key */
6100 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6101 error = hfs_buildattrkey(fileID, NULL, key);
6102 if (error) {
6103 goto out;
6104 }
6105
6106 /* Initialize btdata from extent_info structure. Note that the
6107 * buffer pointer actually points to the xattr record from the
6108 * extent_info structure itself.
6109 */
6110 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6111 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6112 extent_info->btdata.itemCount = 1;
6113
6114 /*
6115 * Sync all extent-based attribute data to the disk.
6116 *
6117 * All extent-based attribute data I/O is performed via cluster
6118 * I/O using a virtual file that spans across entire file system
6119 * space.
6120 */
6121 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6122 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6123 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6124 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6125 if (error) {
6126 goto out;
6127 }
6128
6129 /* Search for extended attribute for current file. This
6130 * will place the iterator before the first matching record.
6131 */
6132 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6133 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6134 &(extent_info->btdata), &(extent_info->recordlen),
6135 extent_info->iterator);
6136 hfs_systemfile_unlock(hfsmp, *lockflags);
6137 if (error) {
6138 if (error != btNotFound) {
6139 goto out;
6140 }
6141 /* btNotFound is expected here, so just mask it */
6142 error = 0;
6143 }
6144
6145 while (1) {
6146 /* Iterate to the next record */
6147 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6148 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6149 extent_info->iterator, &(extent_info->btdata),
6150 &(extent_info->recordlen));
6151 hfs_systemfile_unlock(hfsmp, *lockflags);
6152
6153 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6154 if (error || key->fileID != fileID) {
6155 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6156 error = 0;
6157 }
6158 break;
6159 }
6160
6161 /* We only care about extent-based EAs */
6162 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6163 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6164 continue;
6165 }
6166
6167 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6168 extent_info->overflow_count = 0;
6169 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6170 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6171 extent_info->overflow_count++;
6172 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6173 }
6174
6175 extent_info->recStartBlock = key->startBlock;
6176 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6177 if (extent_info->extents[i].blockCount == 0) {
6178 break;
6179 }
6180 extent_info->extent_index = i;
6181 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6182 if (error) {
6183 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6184 goto out;
6185 }
6186 }
6187 }
6188
6189 out:
6190 /* If any blocks were relocated, account them and report progress */
6191 if (extent_info->blocks_relocated) {
6192 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6193 hfs_truncatefs_progress(hfsmp);
6194 }
6195 if (extent_info->iterator) {
6196 FREE(extent_info->iterator, M_TEMP);
6197 }
6198 if (extent_info) {
6199 FREE(extent_info, M_TEMP);
6200 }
6201 if (hfs_resize_debug) {
6202 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6203 }
6204 return error;
6205 }
6206
6207 /*
6208 * Reclaim any extent-based extended attributes allocation blocks from
6209 * the area of the disk that is being truncated.
6210 *
6211 * The function traverses the attribute btree to find out the fileIDs
6212 * of the extended attributes that need to be relocated. For every
6213 * file whose large EA requires relocation, it looks up the cnode and
6214 * calls hfs_reclaim_xattr() to do all the work for allocating
6215 * new space, copying data, deallocating old space, and if required,
6216 * splitting the extents.
6217 *
6218 * Inputs:
6219 * allocLimit - starting block of the area being reclaimed
6220 *
6221 * Returns:
6222 * returns 0 on success, non-zero on failure.
6223 */
6224 static int
6225 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6226 {
6227 int error = 0;
6228 FCB *fcb;
6229 struct BTreeIterator *iterator = NULL;
6230 struct FSBufferDescriptor btdata;
6231 HFSPlusAttrKey *key;
6232 HFSPlusAttrRecord rec;
6233 int lockflags = 0;
6234 cnid_t prev_fileid = 0;
6235 struct vnode *vp;
6236 int need_relocate;
6237 int btree_operation;
6238 u_int32_t files_moved = 0;
6239 u_int32_t prev_blocksmoved;
6240 int i;
6241
6242 fcb = VTOF(hfsmp->hfs_attribute_vp);
6243 /* Store the value to print total blocks moved by this function in end */
6244 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6245
6246 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6247 return ENOMEM;
6248 }
6249 bzero(iterator, sizeof(*iterator));
6250 key = (HFSPlusAttrKey *)&iterator->key;
6251 btdata.bufferAddress = &rec;
6252 btdata.itemSize = sizeof(rec);
6253 btdata.itemCount = 1;
6254
6255 need_relocate = false;
6256 btree_operation = kBTreeFirstRecord;
6257 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6258 while (1) {
6259 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6260 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6261 hfs_systemfile_unlock(hfsmp, lockflags);
6262 if (error) {
6263 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6264 error = 0;
6265 }
6266 break;
6267 }
6268 btree_operation = kBTreeNextRecord;
6269
6270 /* If the extents of current fileID were already relocated, skip it */
6271 if (prev_fileid == key->fileID) {
6272 continue;
6273 }
6274
6275 /* Check if any of the extents in the current record need to be relocated */
6276 need_relocate = false;
6277 switch(rec.recordType) {
6278 case kHFSPlusAttrForkData:
6279 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6280 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6281 break;
6282 }
6283 if ((rec.forkData.theFork.extents[i].startBlock +
6284 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6285 need_relocate = true;
6286 break;
6287 }
6288 }
6289 break;
6290
6291 case kHFSPlusAttrExtents:
6292 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6293 if (rec.overflowExtents.extents[i].blockCount == 0) {
6294 break;
6295 }
6296 if ((rec.overflowExtents.extents[i].startBlock +
6297 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6298 need_relocate = true;
6299 break;
6300 }
6301 }
6302 break;
6303 };
6304
6305 /* Continue iterating to next attribute record */
6306 if (need_relocate == false) {
6307 continue;
6308 }
6309
6310 /* Look up the vnode for corresponding file. The cnode
6311 * will be locked which will ensure that no one modifies
6312 * the xattrs when we are relocating them.
6313 *
6314 * We want to allow open-unlinked files to be moved,
6315 * so provide allow_deleted == 1 for hfs_vget().
6316 */
6317 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6318 continue;
6319 }
6320
6321 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6322 hfs_unlock(VTOC(vp));
6323 vnode_put(vp);
6324 if (error) {
6325 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6326 break;
6327 }
6328 prev_fileid = key->fileID;
6329 files_moved++;
6330 }
6331
6332 if (files_moved) {
6333 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6334 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6335 files_moved, hfsmp->vcbVN);
6336 }
6337
6338 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6339 return error;
6340 }
6341
6342 /*
6343 * Reclaim blocks from regular files.
6344 *
6345 * This function iterates over all the record in catalog btree looking
6346 * for files with extents that overlap into the space we're trying to
6347 * free up. If a file extent requires relocation, it looks up the vnode
6348 * and calls function to relocate the data.
6349 *
6350 * Returns:
6351 * Zero on success, non-zero on failure.
6352 */
6353 static int
6354 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6355 {
6356 int error;
6357 FCB *fcb;
6358 struct BTreeIterator *iterator = NULL;
6359 struct FSBufferDescriptor btdata;
6360 int btree_operation;
6361 int lockflags;
6362 struct HFSPlusCatalogFile filerec;
6363 struct vnode *vp;
6364 struct vnode *rvp;
6365 struct filefork *datafork;
6366 u_int32_t files_moved = 0;
6367 u_int32_t prev_blocksmoved;
6368
6369 fcb = VTOF(hfsmp->hfs_catalog_vp);
6370 /* Store the value to print total blocks moved by this function at the end */
6371 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6372
6373 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6374 return ENOMEM;
6375 }
6376 bzero(iterator, sizeof(*iterator));
6377
6378 btdata.bufferAddress = &filerec;
6379 btdata.itemSize = sizeof(filerec);
6380 btdata.itemCount = 1;
6381
6382 btree_operation = kBTreeFirstRecord;
6383 while (1) {
6384 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6385 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6386 hfs_systemfile_unlock(hfsmp, lockflags);
6387 if (error) {
6388 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6389 error = 0;
6390 }
6391 break;
6392 }
6393 btree_operation = kBTreeNextRecord;
6394
6395 if (filerec.recordType != kHFSPlusFileRecord) {
6396 continue;
6397 }
6398
6399 /* Check if any of the extents require relocation */
6400 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6401 continue;
6402 }
6403
6404 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6405 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6406 continue;
6407 }
6408
6409 /* If data fork exists or item is a directory hard link, relocate blocks */
6410 datafork = VTOF(vp);
6411 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6412 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6413 kHFSDataForkType, allocLimit, context);
6414 if (error) {
6415 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6416 hfs_unlock(VTOC(vp));
6417 vnode_put(vp);
6418 break;
6419 }
6420 }
6421
6422 /* If resource fork exists or item is a directory hard link, relocate blocks */
6423 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6424 if (vnode_isdir(vp)) {
6425 /* Resource fork vnode lookup is invalid for directory hard link.
6426 * So we fake data fork vnode as resource fork vnode.
6427 */
6428 rvp = vp;
6429 } else {
6430 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6431 if (error) {
6432 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6433 hfs_unlock(VTOC(vp));
6434 vnode_put(vp);
6435 break;
6436 }
6437 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6438 }
6439
6440 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6441 kHFSResourceForkType, allocLimit, context);
6442 if (error) {
6443 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6444 hfs_unlock(VTOC(vp));
6445 vnode_put(vp);
6446 break;
6447 }
6448 }
6449
6450 /* The file forks were relocated successfully, now drop the
6451 * cnode lock and vnode reference, and continue iterating to
6452 * next catalog record.
6453 */
6454 hfs_unlock(VTOC(vp));
6455 vnode_put(vp);
6456 files_moved++;
6457 }
6458
6459 if (files_moved) {
6460 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6461 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6462 files_moved, hfsmp->vcbVN);
6463 }
6464
6465 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6466 return error;
6467 }
6468
6469 /*
6470 * Reclaim space at the end of a file system.
6471 *
6472 * Inputs -
6473 * allocLimit - start block of the space being reclaimed
6474 * reclaimblks - number of allocation blocks to reclaim
6475 */
6476 static int
6477 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6478 {
6479 int error = 0;
6480
6481 /*
6482 * Preflight the bitmap to find out total number of blocks that need
6483 * relocation.
6484 *
6485 * Note: Since allocLimit is set to the location of new alternate volume
6486 * header, the check below does not account for blocks allocated for old
6487 * alternate volume header.
6488 */
6489 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6490 if (error) {
6491 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6492 return error;
6493 }
6494 if (hfs_resize_debug) {
6495 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6496 }
6497
6498 /* Relocate extents of the Allocation file if they're in the way. */
6499 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6500 kHFSDataForkType, allocLimit, context);
6501 if (error) {
6502 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6503 return error;
6504 }
6505
6506 /* Relocate extents of the Extents B-tree if they're in the way. */
6507 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6508 kHFSDataForkType, allocLimit, context);
6509 if (error) {
6510 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6511 return error;
6512 }
6513
6514 /* Relocate extents of the Catalog B-tree if they're in the way. */
6515 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6516 kHFSDataForkType, allocLimit, context);
6517 if (error) {
6518 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6519 return error;
6520 }
6521
6522 /* Relocate extents of the Attributes B-tree if they're in the way. */
6523 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6524 kHFSDataForkType, allocLimit, context);
6525 if (error) {
6526 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6527 return error;
6528 }
6529
6530 /* Relocate extents of the Startup File if there is one and they're in the way. */
6531 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6532 kHFSDataForkType, allocLimit, context);
6533 if (error) {
6534 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6535 return error;
6536 }
6537
6538 /*
6539 * We need to make sure the alternate volume header gets flushed if we moved
6540 * any extents in the volume header. But we need to do that before
6541 * shrinking the size of the volume, or else the journal code will panic
6542 * with an invalid (too large) block number.
6543 *
6544 * Note that blks_moved will be set if ANY extent was moved, even
6545 * if it was just an overflow extent. In this case, the journal_flush isn't
6546 * strictly required, but shouldn't hurt.
6547 */
6548 if (hfsmp->hfs_resize_blocksmoved) {
6549 hfs_journal_flush(hfsmp, FALSE);
6550 }
6551
6552 /* Relocate journal file blocks if they're in the way. */
6553 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6554 if (error) {
6555 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6556 return error;
6557 }
6558
6559 /* Relocate journal info block blocks if they're in the way. */
6560 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6561 if (error) {
6562 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6563 return error;
6564 }
6565
6566 /* Reclaim extents from catalog file records */
6567 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6568 if (error) {
6569 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6570 return error;
6571 }
6572
6573 /* Reclaim extents from extent-based extended attributes, if any */
6574 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6575 if (error) {
6576 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6577 return error;
6578 }
6579
6580 return error;
6581 }
6582
6583
6584 /*
6585 * Check if there are any extents (including overflow extents) that overlap
6586 * into the disk space that is being reclaimed.
6587 *
6588 * Output -
6589 * true - One of the extents need to be relocated
6590 * false - No overflow extents need to be relocated, or there was an error
6591 */
6592 static int
6593 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6594 {
6595 struct BTreeIterator * iterator = NULL;
6596 struct FSBufferDescriptor btdata;
6597 HFSPlusExtentRecord extrec;
6598 HFSPlusExtentKey *extkeyptr;
6599 FCB *fcb;
6600 int overlapped = false;
6601 int i, j;
6602 int error;
6603 int lockflags = 0;
6604 u_int32_t endblock;
6605
6606 /* Check if data fork overlaps the target space */
6607 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6608 if (filerec->dataFork.extents[i].blockCount == 0) {
6609 break;
6610 }
6611 endblock = filerec->dataFork.extents[i].startBlock +
6612 filerec->dataFork.extents[i].blockCount;
6613 if (endblock > allocLimit) {
6614 overlapped = true;
6615 goto out;
6616 }
6617 }
6618
6619 /* Check if resource fork overlaps the target space */
6620 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6621 if (filerec->resourceFork.extents[j].blockCount == 0) {
6622 break;
6623 }
6624 endblock = filerec->resourceFork.extents[j].startBlock +
6625 filerec->resourceFork.extents[j].blockCount;
6626 if (endblock > allocLimit) {
6627 overlapped = true;
6628 goto out;
6629 }
6630 }
6631
6632 /* Return back if there are no overflow extents for this file */
6633 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6634 goto out;
6635 }
6636
6637 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6638 return 0;
6639 }
6640 bzero(iterator, sizeof(*iterator));
6641 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6642 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6643 extkeyptr->forkType = 0;
6644 extkeyptr->fileID = filerec->fileID;
6645 extkeyptr->startBlock = 0;
6646
6647 btdata.bufferAddress = &extrec;
6648 btdata.itemSize = sizeof(extrec);
6649 btdata.itemCount = 1;
6650
6651 fcb = VTOF(hfsmp->hfs_extents_vp);
6652
6653 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6654
6655 /* This will position the iterator just before the first overflow
6656 * extent record for given fileID. It will always return btNotFound,
6657 * so we special case the error code.
6658 */
6659 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6660 if (error && (error != btNotFound)) {
6661 goto out;
6662 }
6663
6664 /* BTIterateRecord() might return error if the btree is empty, and
6665 * therefore we return that the extent does not overflow to the caller
6666 */
6667 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6668 while (error == 0) {
6669 /* Stop when we encounter a different file. */
6670 if (extkeyptr->fileID != filerec->fileID) {
6671 break;
6672 }
6673 /* Check if any of the forks exist in the target space. */
6674 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6675 if (extrec[i].blockCount == 0) {
6676 break;
6677 }
6678 endblock = extrec[i].startBlock + extrec[i].blockCount;
6679 if (endblock > allocLimit) {
6680 overlapped = true;
6681 goto out;
6682 }
6683 }
6684 /* Look for more records. */
6685 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6686 }
6687
6688 out:
6689 if (lockflags) {
6690 hfs_systemfile_unlock(hfsmp, lockflags);
6691 }
6692 if (iterator) {
6693 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6694 }
6695 return overlapped;
6696 }
6697
6698
6699 /*
6700 * Calculate the progress of a file system resize operation.
6701 */
6702 __private_extern__
6703 int
6704 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6705 {
6706 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6707 return (ENXIO);
6708 }
6709
6710 if (hfsmp->hfs_resize_totalblocks > 0) {
6711 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6712 } else {
6713 *progress = 0;
6714 }
6715
6716 return (0);
6717 }
6718
6719
6720 /*
6721 * Creates a UUID from a unique "name" in the HFS UUID Name space.
6722 * See version 3 UUID.
6723 */
6724 static void
6725 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6726 {
6727 MD5_CTX md5c;
6728 uint8_t rawUUID[8];
6729
6730 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6731 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6732
6733 MD5Init( &md5c );
6734 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6735 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6736 MD5Final( result, &md5c );
6737
6738 result[6] = 0x30 | ( result[6] & 0x0F );
6739 result[8] = 0x80 | ( result[8] & 0x3F );
6740 }
6741
6742 /*
6743 * Get file system attributes.
6744 */
6745 static int
6746 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6747 {
6748 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6749 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6750 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6751
6752 ExtendedVCB *vcb = VFSTOVCB(mp);
6753 struct hfsmount *hfsmp = VFSTOHFS(mp);
6754 u_int32_t freeCNIDs;
6755
6756 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6757
6758 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6759 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6760 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6761 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6762 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6763 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6764 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6765 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6766 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6767 /* XXX needs clarification */
6768 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6769 /* Maximum files is constrained by total blocks. */
6770 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6771 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6772
6773 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6774 fsap->f_fsid.val[1] = vfs_typenum(mp);
6775 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6776
6777 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6778 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6779
6780 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6781 vol_capabilities_attr_t *cap;
6782
6783 cap = &fsap->f_capabilities;
6784
6785 if (hfsmp->hfs_flags & HFS_STANDARD) {
6786 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6787 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6788 VOL_CAP_FMT_CASE_PRESERVING |
6789 VOL_CAP_FMT_FAST_STATFS |
6790 VOL_CAP_FMT_HIDDEN_FILES |
6791 VOL_CAP_FMT_PATH_FROM_ID;
6792 } else {
6793 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6794 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6795 VOL_CAP_FMT_SYMBOLICLINKS |
6796 VOL_CAP_FMT_HARDLINKS |
6797 VOL_CAP_FMT_JOURNAL |
6798 VOL_CAP_FMT_ZERO_RUNS |
6799 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6800 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6801 VOL_CAP_FMT_CASE_PRESERVING |
6802 VOL_CAP_FMT_FAST_STATFS |
6803 VOL_CAP_FMT_2TB_FILESIZE |
6804 VOL_CAP_FMT_HIDDEN_FILES |
6805 #if HFS_COMPRESSION
6806 VOL_CAP_FMT_PATH_FROM_ID |
6807 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6808 #else
6809 VOL_CAP_FMT_PATH_FROM_ID;
6810 #endif
6811 }
6812 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6813 VOL_CAP_INT_SEARCHFS |
6814 VOL_CAP_INT_ATTRLIST |
6815 VOL_CAP_INT_NFSEXPORT |
6816 VOL_CAP_INT_READDIRATTR |
6817 VOL_CAP_INT_EXCHANGEDATA |
6818 VOL_CAP_INT_ALLOCATE |
6819 VOL_CAP_INT_VOL_RENAME |
6820 VOL_CAP_INT_ADVLOCK |
6821 VOL_CAP_INT_FLOCK |
6822 #if NAMEDSTREAMS
6823 VOL_CAP_INT_EXTENDED_ATTR |
6824 VOL_CAP_INT_NAMEDSTREAMS;
6825 #else
6826 VOL_CAP_INT_EXTENDED_ATTR;
6827 #endif
6828 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6829 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6830
6831 cap->valid[VOL_CAPABILITIES_FORMAT] =
6832 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6833 VOL_CAP_FMT_SYMBOLICLINKS |
6834 VOL_CAP_FMT_HARDLINKS |
6835 VOL_CAP_FMT_JOURNAL |
6836 VOL_CAP_FMT_JOURNAL_ACTIVE |
6837 VOL_CAP_FMT_NO_ROOT_TIMES |
6838 VOL_CAP_FMT_SPARSE_FILES |
6839 VOL_CAP_FMT_ZERO_RUNS |
6840 VOL_CAP_FMT_CASE_SENSITIVE |
6841 VOL_CAP_FMT_CASE_PRESERVING |
6842 VOL_CAP_FMT_FAST_STATFS |
6843 VOL_CAP_FMT_2TB_FILESIZE |
6844 VOL_CAP_FMT_OPENDENYMODES |
6845 VOL_CAP_FMT_HIDDEN_FILES |
6846 #if HFS_COMPRESSION
6847 VOL_CAP_FMT_PATH_FROM_ID |
6848 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6849 #else
6850 VOL_CAP_FMT_PATH_FROM_ID;
6851 #endif
6852 cap->valid[VOL_CAPABILITIES_INTERFACES] =
6853 VOL_CAP_INT_SEARCHFS |
6854 VOL_CAP_INT_ATTRLIST |
6855 VOL_CAP_INT_NFSEXPORT |
6856 VOL_CAP_INT_READDIRATTR |
6857 VOL_CAP_INT_EXCHANGEDATA |
6858 VOL_CAP_INT_COPYFILE |
6859 VOL_CAP_INT_ALLOCATE |
6860 VOL_CAP_INT_VOL_RENAME |
6861 VOL_CAP_INT_ADVLOCK |
6862 VOL_CAP_INT_FLOCK |
6863 VOL_CAP_INT_MANLOCK |
6864 #if NAMEDSTREAMS
6865 VOL_CAP_INT_EXTENDED_ATTR |
6866 VOL_CAP_INT_NAMEDSTREAMS;
6867 #else
6868 VOL_CAP_INT_EXTENDED_ATTR;
6869 #endif
6870 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
6871 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
6872 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
6873 }
6874 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
6875 vol_attributes_attr_t *attrp = &fsap->f_attributes;
6876
6877 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6878 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6879 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
6880 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6881 attrp->validattr.forkattr = 0;
6882
6883 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6884 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6885 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
6886 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6887 attrp->nativeattr.forkattr = 0;
6888 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
6889 }
6890 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
6891 fsap->f_create_time.tv_nsec = 0;
6892 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
6893 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
6894 fsap->f_modify_time.tv_nsec = 0;
6895 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
6896
6897 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
6898 fsap->f_backup_time.tv_nsec = 0;
6899 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
6900 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
6901 u_int16_t subtype = 0;
6902
6903 /*
6904 * Subtypes (flavors) for HFS
6905 * 0: Mac OS Extended
6906 * 1: Mac OS Extended (Journaled)
6907 * 2: Mac OS Extended (Case Sensitive)
6908 * 3: Mac OS Extended (Case Sensitive, Journaled)
6909 * 4 - 127: Reserved
6910 * 128: Mac OS Standard
6911 *
6912 */
6913 if (hfsmp->hfs_flags & HFS_STANDARD) {
6914 subtype = HFS_SUBTYPE_STANDARDHFS;
6915 } else /* HFS Plus */ {
6916 if (hfsmp->jnl)
6917 subtype |= HFS_SUBTYPE_JOURNALED;
6918 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
6919 subtype |= HFS_SUBTYPE_CASESENSITIVE;
6920 }
6921 fsap->f_fssubtype = subtype;
6922 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
6923 }
6924
6925 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
6926 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
6927 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
6928 }
6929 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
6930 hfs_getvoluuid(hfsmp, fsap->f_uuid);
6931 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
6932 }
6933 return (0);
6934 }
6935
6936 /*
6937 * Perform a volume rename. Requires the FS' root vp.
6938 */
6939 static int
6940 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
6941 {
6942 ExtendedVCB *vcb = VTOVCB(vp);
6943 struct cnode *cp = VTOC(vp);
6944 struct hfsmount *hfsmp = VTOHFS(vp);
6945 struct cat_desc to_desc;
6946 struct cat_desc todir_desc;
6947 struct cat_desc new_desc;
6948 cat_cookie_t cookie;
6949 int lockflags;
6950 int error = 0;
6951 char converted_volname[256];
6952 size_t volname_length = 0;
6953 size_t conv_volname_length = 0;
6954
6955
6956 /*
6957 * Ignore attempts to rename a volume to a zero-length name.
6958 */
6959 if (name[0] == 0)
6960 return(0);
6961
6962 bzero(&to_desc, sizeof(to_desc));
6963 bzero(&todir_desc, sizeof(todir_desc));
6964 bzero(&new_desc, sizeof(new_desc));
6965 bzero(&cookie, sizeof(cookie));
6966
6967 todir_desc.cd_parentcnid = kHFSRootParentID;
6968 todir_desc.cd_cnid = kHFSRootFolderID;
6969 todir_desc.cd_flags = CD_ISDIR;
6970
6971 to_desc.cd_nameptr = (const u_int8_t *)name;
6972 to_desc.cd_namelen = strlen(name);
6973 to_desc.cd_parentcnid = kHFSRootParentID;
6974 to_desc.cd_cnid = cp->c_cnid;
6975 to_desc.cd_flags = CD_ISDIR;
6976
6977 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
6978 if ((error = hfs_start_transaction(hfsmp)) == 0) {
6979 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
6980 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
6981
6982 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
6983
6984 /*
6985 * If successful, update the name in the VCB, ensure it's terminated.
6986 */
6987 if (!error) {
6988 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6989 volname_length = strlen ((const char*)vcb->vcbVN);
6990 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
6991 /* Send the volume name down to CoreStorage if necessary */
6992 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
6993 if (error == 0) {
6994 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
6995 }
6996 error = 0;
6997 }
6998
6999 hfs_systemfile_unlock(hfsmp, lockflags);
7000 cat_postflight(hfsmp, &cookie, p);
7001
7002 if (error)
7003 MarkVCBDirty(vcb);
7004 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7005 }
7006 hfs_end_transaction(hfsmp);
7007 }
7008 if (!error) {
7009 /* Release old allocated name buffer */
7010 if (cp->c_desc.cd_flags & CD_HASBUF) {
7011 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7012
7013 cp->c_desc.cd_nameptr = 0;
7014 cp->c_desc.cd_namelen = 0;
7015 cp->c_desc.cd_flags &= ~CD_HASBUF;
7016 vfs_removename(tmp_name);
7017 }
7018 /* Update cnode's catalog descriptor */
7019 replace_desc(cp, &new_desc);
7020 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7021 cp->c_touch_chgtime = TRUE;
7022 }
7023
7024 hfs_unlock(cp);
7025 }
7026
7027 return(error);
7028 }
7029
7030 /*
7031 * Get file system attributes.
7032 */
7033 static int
7034 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7035 {
7036 kauth_cred_t cred = vfs_context_ucred(context);
7037 int error = 0;
7038
7039 /*
7040 * Must be superuser or owner of filesystem to change volume attributes
7041 */
7042 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7043 return(EACCES);
7044
7045 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7046 vnode_t root_vp;
7047
7048 error = hfs_vfs_root(mp, &root_vp, context);
7049 if (error)
7050 goto out;
7051
7052 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7053 (void) vnode_put(root_vp);
7054 if (error)
7055 goto out;
7056
7057 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7058 }
7059
7060 out:
7061 return error;
7062 }
7063
7064 /* If a runtime corruption is detected, set the volume inconsistent
7065 * bit in the volume attributes. The volume inconsistent bit is a persistent
7066 * bit which represents that the volume is corrupt and needs repair.
7067 * The volume inconsistent bit can be set from the kernel when it detects
7068 * runtime corruption or from file system repair utilities like fsck_hfs when
7069 * a repair operation fails. The bit should be cleared only from file system
7070 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7071 */
7072 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7073 {
7074 HFS_MOUNT_LOCK(hfsmp, TRUE);
7075 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7076 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7077 MarkVCBDirty(hfsmp);
7078 }
7079 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7080 /* Log information to ASL log */
7081 fslog_fs_corrupt(hfsmp->hfs_mp);
7082 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7083 }
7084 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7085 }
7086
7087 /* Replay the journal on the device node provided. Returns zero if
7088 * journal replay succeeded or no journal was supposed to be replayed.
7089 */
7090 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7091 {
7092 int retval = 0;
7093 struct mount *mp = NULL;
7094 struct hfs_mount_args *args = NULL;
7095
7096 /* Replay allowed only on raw devices */
7097 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7098 retval = EINVAL;
7099 goto out;
7100 }
7101
7102 /* Create dummy mount structures */
7103 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7104 if (mp == NULL) {
7105 retval = ENOMEM;
7106 goto out;
7107 }
7108 bzero(mp, sizeof(struct mount));
7109 mount_lock_init(mp);
7110
7111 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7112 if (args == NULL) {
7113 retval = ENOMEM;
7114 goto out;
7115 }
7116 bzero(args, sizeof(struct hfs_mount_args));
7117
7118 retval = hfs_mountfs(devvp, mp, args, 1, context);
7119 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7120
7121 /* FSYNC the devnode to be sure all data has been flushed */
7122 retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7123
7124 out:
7125 if (mp) {
7126 mount_lock_destroy(mp);
7127 FREE(mp, M_TEMP);
7128 }
7129 if (args) {
7130 FREE(args, M_TEMP);
7131 }
7132 return retval;
7133 }
7134
7135 /*
7136 * hfs vfs operations.
7137 */
7138 struct vfsops hfs_vfsops = {
7139 hfs_mount,
7140 hfs_start,
7141 hfs_unmount,
7142 hfs_vfs_root,
7143 hfs_quotactl,
7144 hfs_vfs_getattr, /* was hfs_statfs */
7145 hfs_sync,
7146 hfs_vfs_vget,
7147 hfs_fhtovp,
7148 hfs_vptofh,
7149 hfs_init,
7150 hfs_sysctl,
7151 hfs_vfs_setattr,
7152 {NULL}
7153 };