]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
4e5b76b1457d9b84ebf44a1c14dc11869f6af7ce
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91
92 #include <kern/locks.h>
93
94 #include <vfs/vfs_journal.h>
95
96 #include <miscfs/specfs/specdev.h>
97 #include <hfs/hfs_mount.h>
98
99 #include <libkern/crypto/md5.h>
100 #include <uuid/uuid.h>
101
102 #include "hfs.h"
103 #include "hfs_catalog.h"
104 #include "hfs_cnode.h"
105 #include "hfs_dbg.h"
106 #include "hfs_endian.h"
107 #include "hfs_hotfiles.h"
108 #include "hfs_quota.h"
109 #include "hfs_btreeio.h"
110
111 #include "hfscommon/headers/FileMgrInternal.h"
112 #include "hfscommon/headers/BTreesInternal.h"
113
114 #if CONFIG_PROTECT
115 #include <sys/cprotect.h>
116 #endif
117
118 #if CONFIG_HFS_ALLOC_RBTREE
119 #include "hfscommon/headers/HybridAllocator.h"
120 #endif
121
122 #define HFS_MOUNT_DEBUG 1
123
124 #if HFS_DIAGNOSTIC
125 int hfs_dbg_all = 0;
126 int hfs_dbg_err = 0;
127 #endif
128
129 /* Enable/disable debugging code for live volume resizing */
130 int hfs_resize_debug = 0;
131
132 lck_grp_attr_t * hfs_group_attr;
133 lck_attr_t * hfs_lock_attr;
134 lck_grp_t * hfs_mutex_group;
135 lck_grp_t * hfs_rwlock_group;
136 lck_grp_t * hfs_spinlock_group;
137
138 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
139 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
140
141 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
142 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
143
144 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
145 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
146 static int hfs_flushfiles(struct mount *, int, struct proc *);
147 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
148 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
149 static int hfs_init(struct vfsconf *vfsp);
150 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
151 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
152 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
153 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
154 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
155 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
156 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
157
158 void hfs_initialize_allocator (struct hfsmount *hfsmp);
159 int hfs_teardown_allocator (struct hfsmount *hfsmp);
160
161 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
162 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
163 int hfs_reload(struct mount *mp);
164 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
165 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
166 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
167 user_addr_t newp, size_t newlen, vfs_context_t context);
168 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
169
170 /*
171 * Called by vfs_mountroot when mounting HFS Plus as root.
172 */
173
174 int
175 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
176 {
177 struct hfsmount *hfsmp;
178 ExtendedVCB *vcb;
179 struct vfsstatfs *vfsp;
180 int error;
181
182 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
183 if (HFS_MOUNT_DEBUG) {
184 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
185 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
186 }
187 return (error);
188 }
189
190 /* Init hfsmp */
191 hfsmp = VFSTOHFS(mp);
192
193 hfsmp->hfs_uid = UNKNOWNUID;
194 hfsmp->hfs_gid = UNKNOWNGID;
195 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
196 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
197
198 /* Establish the free block reserve. */
199 vcb = HFSTOVCB(hfsmp);
200 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
201 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
202
203 vfsp = vfs_statfs(mp);
204 (void)hfs_statfs(mp, vfsp, NULL);
205
206 return (0);
207 }
208
209
210 /*
211 * VFS Operations.
212 *
213 * mount system call
214 */
215
216 int
217 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
218 {
219 struct proc *p = vfs_context_proc(context);
220 struct hfsmount *hfsmp = NULL;
221 struct hfs_mount_args args;
222 int retval = E_NONE;
223 u_int32_t cmdflags;
224
225 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
226 if (HFS_MOUNT_DEBUG) {
227 printf("hfs_mount: copyin returned %d for fs\n", retval);
228 }
229 return (retval);
230 }
231 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
232 if (cmdflags & MNT_UPDATE) {
233 hfsmp = VFSTOHFS(mp);
234
235 /* Reload incore data after an fsck. */
236 if (cmdflags & MNT_RELOAD) {
237 if (vfs_isrdonly(mp)) {
238 int error = hfs_reload(mp);
239 if (error && HFS_MOUNT_DEBUG) {
240 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
241 }
242 return error;
243 }
244 else {
245 if (HFS_MOUNT_DEBUG) {
246 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
247 }
248 return (EINVAL);
249 }
250 }
251
252 /* Change to a read-only file system. */
253 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
254 vfs_isrdonly(mp)) {
255 int flags;
256
257 /* Set flag to indicate that a downgrade to read-only
258 * is in progress and therefore block any further
259 * modifications to the file system.
260 */
261 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
262 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
263 hfsmp->hfs_downgrading_proc = current_thread();
264 hfs_unlock_global (hfsmp);
265
266 /* use VFS_SYNC to push out System (btree) files */
267 retval = VFS_SYNC(mp, MNT_WAIT, context);
268 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
269 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
270 hfsmp->hfs_downgrading_proc = NULL;
271 if (HFS_MOUNT_DEBUG) {
272 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
273 }
274 goto out;
275 }
276
277 flags = WRITECLOSE;
278 if (cmdflags & MNT_FORCE)
279 flags |= FORCECLOSE;
280
281 if ((retval = hfs_flushfiles(mp, flags, p))) {
282 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
283 hfsmp->hfs_downgrading_proc = NULL;
284 if (HFS_MOUNT_DEBUG) {
285 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
286 }
287 goto out;
288 }
289
290 /* mark the volume cleanly unmounted */
291 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
292 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
293 hfsmp->hfs_flags |= HFS_READ_ONLY;
294
295 /* also get the volume bitmap blocks */
296 if (!retval) {
297 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
298 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
299 } else {
300 vnode_get(hfsmp->hfs_devvp);
301 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
302 vnode_put(hfsmp->hfs_devvp);
303 }
304 }
305 if (retval) {
306 if (HFS_MOUNT_DEBUG) {
307 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
308 }
309 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
310 hfsmp->hfs_downgrading_proc = NULL;
311 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
312 goto out;
313 }
314 if (hfsmp->jnl) {
315 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
316
317 journal_close(hfsmp->jnl);
318 hfsmp->jnl = NULL;
319
320 // Note: we explicitly don't want to shutdown
321 // access to the jvp because we may need
322 // it later if we go back to being read-write.
323
324 hfs_unlock_global (hfsmp);
325 }
326
327 #if CONFIG_HFS_ALLOC_RBTREE
328 (void) hfs_teardown_allocator(hfsmp);
329 #endif
330 hfsmp->hfs_downgrading_proc = NULL;
331 }
332
333 /* Change to a writable file system. */
334 if (vfs_iswriteupgrade(mp)) {
335 #if CONFIG_HFS_ALLOC_RBTREE
336 thread_t allocator_thread;
337 #endif
338
339 /*
340 * On inconsistent disks, do not allow read-write mount
341 * unless it is the boot volume being mounted.
342 */
343 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
344 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
345 if (HFS_MOUNT_DEBUG) {
346 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
347 }
348 retval = EINVAL;
349 goto out;
350 }
351
352 // If the journal was shut-down previously because we were
353 // asked to be read-only, let's start it back up again now
354
355 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
356 && hfsmp->jnl == NULL
357 && hfsmp->jvp != NULL) {
358 int jflags;
359
360 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
361 jflags = JOURNAL_RESET;
362 } else {
363 jflags = 0;
364 }
365
366 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
367
368 hfsmp->jnl = journal_open(hfsmp->jvp,
369 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
370 hfsmp->jnl_size,
371 hfsmp->hfs_devvp,
372 hfsmp->hfs_logical_block_size,
373 jflags,
374 0,
375 hfs_sync_metadata, hfsmp->hfs_mp);
376
377 /*
378 * Set up the trim callback function so that we can add
379 * recently freed extents to the free extent cache once
380 * the transaction that freed them is written to the
381 * journal on disk.
382 */
383 if (hfsmp->jnl)
384 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
385
386 hfs_unlock_global (hfsmp);
387
388 if (hfsmp->jnl == NULL) {
389 if (HFS_MOUNT_DEBUG) {
390 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
391 }
392 retval = EINVAL;
393 goto out;
394 } else {
395 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
396 }
397
398 }
399
400 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
401 retval = hfs_erase_unused_nodes(hfsmp);
402 if (retval != E_NONE) {
403 if (HFS_MOUNT_DEBUG) {
404 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
405 }
406 goto out;
407 }
408
409 /* If this mount point was downgraded from read-write
410 * to read-only, clear that information as we are now
411 * moving back to read-write.
412 */
413 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
414 hfsmp->hfs_downgrading_proc = NULL;
415
416 /* mark the volume dirty (clear clean unmount bit) */
417 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
418
419 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
420 if (retval != E_NONE) {
421 if (HFS_MOUNT_DEBUG) {
422 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
423 }
424 goto out;
425 }
426
427 /* Only clear HFS_READ_ONLY after a successful write */
428 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
429
430
431 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
432 /* Setup private/hidden directories for hardlinks. */
433 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
434 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
435
436 hfs_remove_orphans(hfsmp);
437
438 /*
439 * Allow hot file clustering if conditions allow.
440 */
441 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
442 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
443 (void) hfs_recording_init(hfsmp);
444 }
445 /* Force ACLs on HFS+ file systems. */
446 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
447 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
448 }
449 }
450
451 #if CONFIG_HFS_ALLOC_RBTREE
452 /*
453 * Like the normal mount case, we need to handle creation of the allocation red-black tree
454 * if we're upgrading from read-only to read-write.
455 *
456 * We spawn a thread to create the pair of red-black trees for this volume.
457 * However, in so doing, we must be careful to ensure that if this thread is still
458 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
459 * we'll need to set a bit that indicates we're in progress building the trees here.
460 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
461 * notifies the tree generation code that an unmount is waiting. Also, mark the extent
462 * tree flags that the allocator is enabled for use before we spawn the thread that will start
463 * scanning the RB tree.
464 *
465 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
466 * which has not previously encountered a bad error on the red-black tree code. Also, don't
467 * try to re-build a tree that already exists.
468 */
469
470 if (hfsmp->extent_tree_flags == 0) {
471 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
472 /* Initialize EOF counter so that the thread can assume it started at initial values */
473 hfsmp->offset_block_end = 0;
474
475 InitTree(hfsmp);
476
477 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
478 thread_deallocate(allocator_thread);
479 }
480
481 #endif
482 }
483
484 /* Update file system parameters. */
485 retval = hfs_changefs(mp, &args);
486 if (retval && HFS_MOUNT_DEBUG) {
487 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
488 }
489
490 } else /* not an update request */ {
491
492 /* Set the mount flag to indicate that we support volfs */
493 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
494
495 retval = hfs_mountfs(devvp, mp, &args, 0, context);
496 if (retval && HFS_MOUNT_DEBUG) {
497 printf("hfs_mount: hfs_mountfs returned %d\n", retval);
498 }
499 #if CONFIG_PROTECT
500 /*
501 * If above mount call was successful, and this mount is content protection
502 * enabled, then verify the on-disk EA on the root to ensure that the filesystem
503 * is of a suitable vintage to allow the mount to proceed.
504 */
505 if ((retval == 0) && (cp_fs_protected (mp))) {
506 int err = 0;
507 struct cp_root_xattr xattr;
508 bzero (&xattr, sizeof(struct cp_root_xattr));
509 hfsmp = vfs_fsprivate(mp);
510
511 /* go get the EA to get the version information */
512 err = cp_getrootxattr (hfsmp, &xattr);
513 /* If there was no EA there, then write one out. */
514 if (err == ENOATTR) {
515 bzero(&xattr, sizeof(struct cp_root_xattr));
516 xattr.major_version = CP_CURRENT_MAJOR_VERS;
517 xattr.minor_version = CP_CURRENT_MINOR_VERS;
518 xattr.flags = 0;
519
520 err = cp_setrootxattr (hfsmp, &xattr);
521 }
522 /*
523 * For any other error, including having an out of date CP version in the
524 * EA, or for an error out of cp_setrootxattr, deny the mount
525 * and do not proceed further.
526 */
527 if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS) {
528 /* Deny the mount and tear down. */
529 retval = EPERM;
530 (void) hfs_unmount (mp, MNT_FORCE, context);
531 }
532 }
533 #endif
534 }
535 out:
536 if (retval == 0) {
537 (void)hfs_statfs(mp, vfs_statfs(mp), context);
538 }
539 return (retval);
540 }
541
542
543 struct hfs_changefs_cargs {
544 struct hfsmount *hfsmp;
545 int namefix;
546 int permfix;
547 int permswitch;
548 };
549
550 static int
551 hfs_changefs_callback(struct vnode *vp, void *cargs)
552 {
553 ExtendedVCB *vcb;
554 struct cnode *cp;
555 struct cat_desc cndesc;
556 struct cat_attr cnattr;
557 struct hfs_changefs_cargs *args;
558 int lockflags;
559 int error;
560
561 args = (struct hfs_changefs_cargs *)cargs;
562
563 cp = VTOC(vp);
564 vcb = HFSTOVCB(args->hfsmp);
565
566 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
567 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
568 hfs_systemfile_unlock(args->hfsmp, lockflags);
569 if (error) {
570 /*
571 * If we couldn't find this guy skip to the next one
572 */
573 if (args->namefix)
574 cache_purge(vp);
575
576 return (VNODE_RETURNED);
577 }
578 /*
579 * Get the real uid/gid and perm mask from disk.
580 */
581 if (args->permswitch || args->permfix) {
582 cp->c_uid = cnattr.ca_uid;
583 cp->c_gid = cnattr.ca_gid;
584 cp->c_mode = cnattr.ca_mode;
585 }
586 /*
587 * If we're switching name converters then...
588 * Remove the existing entry from the namei cache.
589 * Update name to one based on new encoder.
590 */
591 if (args->namefix) {
592 cache_purge(vp);
593 replace_desc(cp, &cndesc);
594
595 if (cndesc.cd_cnid == kHFSRootFolderID) {
596 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
597 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
598 }
599 } else {
600 cat_releasedesc(&cndesc);
601 }
602 return (VNODE_RETURNED);
603 }
604
605 /* Change fs mount parameters */
606 static int
607 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
608 {
609 int retval = 0;
610 int namefix, permfix, permswitch;
611 struct hfsmount *hfsmp;
612 ExtendedVCB *vcb;
613 hfs_to_unicode_func_t get_unicode_func;
614 unicode_to_hfs_func_t get_hfsname_func;
615 u_int32_t old_encoding = 0;
616 struct hfs_changefs_cargs cargs;
617 u_int32_t mount_flags;
618
619 hfsmp = VFSTOHFS(mp);
620 vcb = HFSTOVCB(hfsmp);
621 mount_flags = (unsigned int)vfs_flags(mp);
622
623 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
624
625 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
626 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
627 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
628 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
629
630 /* The root filesystem must operate with actual permissions: */
631 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
632 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
633 retval = EINVAL;
634 goto exit;
635 }
636 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
637 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
638 else
639 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
640
641 namefix = permfix = 0;
642
643 /*
644 * Tracking of hot files requires up-to-date access times. So if
645 * access time updates are disabled, we must also disable hot files.
646 */
647 if (mount_flags & MNT_NOATIME) {
648 (void) hfs_recording_suspend(hfsmp);
649 }
650
651 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
652 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
653 gTimeZone = args->hfs_timezone;
654 }
655
656 /* Change the default uid, gid and/or mask */
657 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
658 hfsmp->hfs_uid = args->hfs_uid;
659 if (vcb->vcbSigWord == kHFSPlusSigWord)
660 ++permfix;
661 }
662 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
663 hfsmp->hfs_gid = args->hfs_gid;
664 if (vcb->vcbSigWord == kHFSPlusSigWord)
665 ++permfix;
666 }
667 if (args->hfs_mask != (mode_t)VNOVAL) {
668 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
669 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
670 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
671 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
672 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
673 if (vcb->vcbSigWord == kHFSPlusSigWord)
674 ++permfix;
675 }
676 }
677
678 /* Change the hfs encoding value (hfs only) */
679 if ((vcb->vcbSigWord == kHFSSigWord) &&
680 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
681 (hfsmp->hfs_encoding != args->hfs_encoding)) {
682
683 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
684 if (retval)
685 goto exit;
686
687 /*
688 * Connect the new hfs_get_unicode converter but leave
689 * the old hfs_get_hfsname converter in place so that
690 * we can lookup existing vnodes to get their correctly
691 * encoded names.
692 *
693 * When we're all finished, we can then connect the new
694 * hfs_get_hfsname converter and release our interest
695 * in the old converters.
696 */
697 hfsmp->hfs_get_unicode = get_unicode_func;
698 old_encoding = hfsmp->hfs_encoding;
699 hfsmp->hfs_encoding = args->hfs_encoding;
700 ++namefix;
701 }
702
703 if (!(namefix || permfix || permswitch))
704 goto exit;
705
706 /* XXX 3762912 hack to support HFS filesystem 'owner' */
707 if (permfix)
708 vfs_setowner(mp,
709 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
710 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
711
712 /*
713 * For each active vnode fix things that changed
714 *
715 * Note that we can visit a vnode more than once
716 * and we can race with fsync.
717 *
718 * hfs_changefs_callback will be called for each vnode
719 * hung off of this mount point
720 *
721 * The vnode will be properly referenced and unreferenced
722 * around the callback
723 */
724 cargs.hfsmp = hfsmp;
725 cargs.namefix = namefix;
726 cargs.permfix = permfix;
727 cargs.permswitch = permswitch;
728
729 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
730
731 /*
732 * If we're switching name converters we can now
733 * connect the new hfs_get_hfsname converter and
734 * release our interest in the old converters.
735 */
736 if (namefix) {
737 hfsmp->hfs_get_hfsname = get_hfsname_func;
738 vcb->volumeNameEncodingHint = args->hfs_encoding;
739 (void) hfs_relconverter(old_encoding);
740 }
741 exit:
742 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
743 return (retval);
744 }
745
746
747 struct hfs_reload_cargs {
748 struct hfsmount *hfsmp;
749 int error;
750 };
751
752 static int
753 hfs_reload_callback(struct vnode *vp, void *cargs)
754 {
755 struct cnode *cp;
756 struct hfs_reload_cargs *args;
757 int lockflags;
758
759 args = (struct hfs_reload_cargs *)cargs;
760 /*
761 * flush all the buffers associated with this node
762 */
763 (void) buf_invalidateblks(vp, 0, 0, 0);
764
765 cp = VTOC(vp);
766 /*
767 * Remove any directory hints
768 */
769 if (vnode_isdir(vp))
770 hfs_reldirhints(cp, 0);
771
772 /*
773 * Re-read cnode data for all active vnodes (non-metadata files).
774 */
775 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
776 struct cat_fork *datafork;
777 struct cat_desc desc;
778
779 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
780
781 /* lookup by fileID since name could have changed */
782 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
783 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
784 hfs_systemfile_unlock(args->hfsmp, lockflags);
785 if (args->error) {
786 return (VNODE_RETURNED_DONE);
787 }
788
789 /* update cnode's catalog descriptor */
790 (void) replace_desc(cp, &desc);
791 }
792 return (VNODE_RETURNED);
793 }
794
795 /*
796 * Reload all incore data for a filesystem (used after running fsck on
797 * the root filesystem and finding things to fix). The filesystem must
798 * be mounted read-only.
799 *
800 * Things to do to update the mount:
801 * invalidate all cached meta-data.
802 * invalidate all inactive vnodes.
803 * invalidate all cached file data.
804 * re-read volume header from disk.
805 * re-load meta-file info (extents, file size).
806 * re-load B-tree header data.
807 * re-read cnode data for all active vnodes.
808 */
809 int
810 hfs_reload(struct mount *mountp)
811 {
812 register struct vnode *devvp;
813 struct buf *bp;
814 int error, i;
815 struct hfsmount *hfsmp;
816 struct HFSPlusVolumeHeader *vhp;
817 ExtendedVCB *vcb;
818 struct filefork *forkp;
819 struct cat_desc cndesc;
820 struct hfs_reload_cargs args;
821 daddr64_t priIDSector;
822
823 hfsmp = VFSTOHFS(mountp);
824 vcb = HFSTOVCB(hfsmp);
825
826 if (vcb->vcbSigWord == kHFSSigWord)
827 return (EINVAL); /* rooting from HFS is not supported! */
828
829 /*
830 * Invalidate all cached meta-data.
831 */
832 devvp = hfsmp->hfs_devvp;
833 if (buf_invalidateblks(devvp, 0, 0, 0))
834 panic("hfs_reload: dirty1");
835
836 args.hfsmp = hfsmp;
837 args.error = 0;
838 /*
839 * hfs_reload_callback will be called for each vnode
840 * hung off of this mount point that can't be recycled...
841 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
842 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
843 * properly referenced and unreferenced around the callback
844 */
845 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
846
847 if (args.error)
848 return (args.error);
849
850 /*
851 * Re-read VolumeHeader from disk.
852 */
853 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
854 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
855
856 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
857 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
858 hfsmp->hfs_physical_block_size, NOCRED, &bp);
859 if (error) {
860 if (bp != NULL)
861 buf_brelse(bp);
862 return (error);
863 }
864
865 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
866
867 /* Do a quick sanity check */
868 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
869 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
870 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
871 SWAP_BE16(vhp->version) != kHFSXVersion) ||
872 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
873 buf_brelse(bp);
874 return (EIO);
875 }
876
877 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
878 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
879 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
880 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
881 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
882 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
883 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
884 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
885 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
886 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
887 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
888 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
889 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
890 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
891 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
892
893 /*
894 * Re-load meta-file vnode data (extent info, file size, etc).
895 */
896 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
897 for (i = 0; i < kHFSPlusExtentDensity; i++) {
898 forkp->ff_extents[i].startBlock =
899 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
900 forkp->ff_extents[i].blockCount =
901 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
902 }
903 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
904 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
905 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
906
907
908 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
909 for (i = 0; i < kHFSPlusExtentDensity; i++) {
910 forkp->ff_extents[i].startBlock =
911 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
912 forkp->ff_extents[i].blockCount =
913 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
914 }
915 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
916 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
917 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
918
919 if (hfsmp->hfs_attribute_vp) {
920 forkp = VTOF(hfsmp->hfs_attribute_vp);
921 for (i = 0; i < kHFSPlusExtentDensity; i++) {
922 forkp->ff_extents[i].startBlock =
923 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
924 forkp->ff_extents[i].blockCount =
925 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
926 }
927 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
928 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
929 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
930 }
931
932 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
933 for (i = 0; i < kHFSPlusExtentDensity; i++) {
934 forkp->ff_extents[i].startBlock =
935 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
936 forkp->ff_extents[i].blockCount =
937 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
938 }
939 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
940 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
941 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
942
943 buf_brelse(bp);
944 vhp = NULL;
945
946 /*
947 * Re-load B-tree header data
948 */
949 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
950 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
951 return (error);
952
953 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
954 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
955 return (error);
956
957 if (hfsmp->hfs_attribute_vp) {
958 forkp = VTOF(hfsmp->hfs_attribute_vp);
959 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
960 return (error);
961 }
962
963 /* Reload the volume name */
964 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
965 return (error);
966 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
967 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
968 cat_releasedesc(&cndesc);
969
970 /* Re-establish private/hidden directories. */
971 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
972 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
973
974 /* In case any volume information changed to trigger a notification */
975 hfs_generate_volume_notifications(hfsmp);
976
977 return (0);
978 }
979
980
981
982 static void
983 hfs_syncer(void *arg0, void *unused)
984 {
985 #pragma unused(unused)
986
987 struct hfsmount *hfsmp = arg0;
988 clock_sec_t secs;
989 clock_usec_t usecs;
990 uint32_t delay = HFS_META_DELAY;
991 uint64_t now;
992 static int no_max=1;
993
994 clock_get_calendar_microtime(&secs, &usecs);
995 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
996
997 //
998 // If the amount of pending writes is more than our limit, wait
999 // for 2/3 of it to drain and then flush the journal.
1000 //
1001 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1002 int counter=0;
1003 uint64_t pending_io, start, rate = 0;
1004
1005 no_max = 0;
1006
1007 hfs_start_transaction(hfsmp); // so we hold off any new i/o's
1008
1009 pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1010
1011 clock_get_calendar_microtime(&secs, &usecs);
1012 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1013
1014 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1015 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1016 }
1017
1018 if (counter >= 500) {
1019 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1020 }
1021
1022 if (hfsmp->jnl) {
1023 journal_flush(hfsmp->jnl, FALSE);
1024 } else {
1025 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1026 }
1027
1028 clock_get_calendar_microtime(&secs, &usecs);
1029 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1030 hfsmp->hfs_last_sync_time = now;
1031 if (now != start) {
1032 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second
1033 }
1034
1035 hfs_end_transaction(hfsmp);
1036
1037 //
1038 // If a reasonable amount of time elapsed then check the
1039 // i/o rate. If it's taking less than 1 second or more
1040 // than 2 seconds, adjust hfs_max_pending_io so that we
1041 // will allow about 1.5 seconds of i/o to queue up.
1042 //
1043 if (((now - start) >= 300000) && (rate != 0)) {
1044 uint64_t scale = (pending_io * 100) / rate;
1045
1046 if (scale < 100 || scale > 200) {
1047 // set it so that it should take about 1.5 seconds to drain
1048 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1049 }
1050 }
1051
1052 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1053 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1054 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1055 && (hfsmp->hfs_active_threads == 0)
1056 && (hfsmp->hfs_global_lock_nesting == 0))) {
1057
1058 //
1059 // Flush the journal if more than 5 seconds elapsed since
1060 // the last sync OR we have not sync'ed recently and the
1061 // last sync request time was more than 100 milliseconds
1062 // ago and no one is in the middle of a transaction right
1063 // now. Else we defer the sync and reschedule it.
1064 //
1065 if (hfsmp->jnl) {
1066 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1067
1068 journal_flush(hfsmp->jnl, FALSE);
1069
1070 hfs_unlock_global (hfsmp);
1071 } else {
1072 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1073 }
1074
1075 clock_get_calendar_microtime(&secs, &usecs);
1076 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1077 hfsmp->hfs_last_sync_time = now;
1078
1079 } else if (hfsmp->hfs_active_threads == 0) {
1080 uint64_t deadline;
1081
1082 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1083 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1084
1085 // note: we intentionally return early here and do not
1086 // decrement the sync_scheduled and sync_incomplete
1087 // variables because we rescheduled the timer.
1088
1089 return;
1090 }
1091
1092 //
1093 // NOTE: we decrement these *after* we're done the journal_flush() since
1094 // it can take a significant amount of time and so we don't want more
1095 // callbacks scheduled until we're done this one.
1096 //
1097 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1098 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1099 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1100 }
1101
1102
1103 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1104
1105 /*
1106 * Initialization code for Red-Black Tree Allocator
1107 *
1108 * This function will build the two red-black trees necessary for allocating space
1109 * from the metadata zone as well as normal allocations. Currently, we use
1110 * an advisory read to get most of the data into the buffer cache.
1111 * This function is intended to be run in a separate thread so as not to slow down mount.
1112 *
1113 */
1114
1115 void
1116 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1117
1118 #if CONFIG_HFS_ALLOC_RBTREE
1119 u_int32_t err;
1120
1121 /*
1122 * Take the allocation file lock. Journal transactions will block until
1123 * we're done here.
1124 */
1125 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1126
1127 /*
1128 * GenerateTree assumes that the bitmap lock is held when you call the function.
1129 * It will drop and re-acquire the lock periodically as needed to let other allocations
1130 * through. It returns with the bitmap lock held. Since we only maintain one tree,
1131 * we don't need to specify a start block (always starts at 0).
1132 */
1133 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1134 if (err) {
1135 goto bailout;
1136 }
1137 /* Mark offset tree as built */
1138 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1139
1140 bailout:
1141 /*
1142 * GenerateTree may drop the bitmap lock during operation in order to give other
1143 * threads a chance to allocate blocks, but it will always return with the lock held, so
1144 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1145 */
1146 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1147 if (err != 0) {
1148 /* Wakeup any waiters on the allocation bitmap lock */
1149 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1150 }
1151
1152 hfs_systemfile_unlock(hfsmp, flags);
1153 #else
1154 #pragma unused (hfsmp)
1155 #endif
1156 }
1157
1158
1159 /*
1160 * Teardown code for the Red-Black Tree allocator.
1161 * This function consolidates the code which serializes with respect
1162 * to a thread that may be potentially still building the tree when we need to begin
1163 * tearing it down. Since the red-black tree may not be live when we enter this function
1164 * we return:
1165 * 1 -> Tree was live.
1166 * 0 -> Tree was not active at time of call.
1167 */
1168
1169 int
1170 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1171 int rb_used = 0;
1172
1173 #if CONFIG_HFS_ALLOC_RBTREE
1174
1175 int flags = 0;
1176
1177 /*
1178 * Check to see if the tree-generation is still on-going.
1179 * If it is, then block until it's done.
1180 */
1181
1182 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1183
1184
1185 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1186 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1187
1188 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1189 &hfsmp->extent_tree_flags, THREAD_UNINT);
1190 }
1191
1192 if (hfs_isrbtree_active (hfsmp)) {
1193 rb_used = 1;
1194
1195 /* Tear down the RB Trees while we have the bitmap locked */
1196 DestroyTrees(hfsmp);
1197
1198 }
1199
1200 hfs_systemfile_unlock(hfsmp, flags);
1201 #else
1202 #pragma unused (hfsmp)
1203 #endif
1204 return rb_used;
1205
1206 }
1207
1208
1209 static int hfs_root_unmounted_cleanly = 0;
1210
1211 SYSCTL_DECL(_vfs_generic);
1212 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1213
1214 /*
1215 * Common code for mount and mountroot
1216 */
1217 int
1218 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1219 int journal_replay_only, vfs_context_t context)
1220 {
1221 struct proc *p = vfs_context_proc(context);
1222 int retval = E_NONE;
1223 struct hfsmount *hfsmp = NULL;
1224 struct buf *bp;
1225 dev_t dev;
1226 HFSMasterDirectoryBlock *mdbp = NULL;
1227 int ronly;
1228 #if QUOTA
1229 int i;
1230 #endif
1231 int mntwrapper;
1232 kauth_cred_t cred;
1233 u_int64_t disksize;
1234 daddr64_t log_blkcnt;
1235 u_int32_t log_blksize;
1236 u_int32_t phys_blksize;
1237 u_int32_t minblksize;
1238 u_int32_t iswritable;
1239 daddr64_t mdb_offset;
1240 int isvirtual = 0;
1241 int isroot = 0;
1242 int isssd;
1243 #if CONFIG_HFS_ALLOC_RBTREE
1244 thread_t allocator_thread;
1245 #endif
1246
1247 if (args == NULL) {
1248 /* only hfs_mountroot passes us NULL as the 'args' argument */
1249 isroot = 1;
1250 }
1251
1252 ronly = vfs_isrdonly(mp);
1253 dev = vnode_specrdev(devvp);
1254 cred = p ? vfs_context_ucred(context) : NOCRED;
1255 mntwrapper = 0;
1256
1257 bp = NULL;
1258 hfsmp = NULL;
1259 mdbp = NULL;
1260 minblksize = kHFSBlockSize;
1261
1262 /* Advisory locking should be handled at the VFS layer */
1263 vfs_setlocklocal(mp);
1264
1265 /* Get the logical block size (treated as physical block size everywhere) */
1266 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1267 if (HFS_MOUNT_DEBUG) {
1268 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1269 }
1270 retval = ENXIO;
1271 goto error_exit;
1272 }
1273 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1274 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1275 retval = ENXIO;
1276 goto error_exit;
1277 }
1278
1279 /* Get the physical block size. */
1280 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1281 if (retval) {
1282 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1283 if (HFS_MOUNT_DEBUG) {
1284 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1285 }
1286 retval = ENXIO;
1287 goto error_exit;
1288 }
1289 /* If device does not support this ioctl, assume that physical
1290 * block size is same as logical block size
1291 */
1292 phys_blksize = log_blksize;
1293 }
1294 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1295 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1296 retval = ENXIO;
1297 goto error_exit;
1298 }
1299
1300 /* Switch to 512 byte sectors (temporarily) */
1301 if (log_blksize > 512) {
1302 u_int32_t size512 = 512;
1303
1304 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1305 if (HFS_MOUNT_DEBUG) {
1306 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1307 }
1308 retval = ENXIO;
1309 goto error_exit;
1310 }
1311 }
1312 /* Get the number of 512 byte physical blocks. */
1313 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1314 /* resetting block size may fail if getting block count did */
1315 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1316 if (HFS_MOUNT_DEBUG) {
1317 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1318 }
1319 retval = ENXIO;
1320 goto error_exit;
1321 }
1322 /* Compute an accurate disk size (i.e. within 512 bytes) */
1323 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1324
1325 /*
1326 * On Tiger it is not necessary to switch the device
1327 * block size to be 4k if there are more than 31-bits
1328 * worth of blocks but to insure compatibility with
1329 * pre-Tiger systems we have to do it.
1330 *
1331 * If the device size is not a multiple of 4K (8 * 512), then
1332 * switching the logical block size isn't going to help because
1333 * we will be unable to write the alternate volume header.
1334 * In this case, just leave the logical block size unchanged.
1335 */
1336 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1337 minblksize = log_blksize = 4096;
1338 if (phys_blksize < log_blksize)
1339 phys_blksize = log_blksize;
1340 }
1341
1342 /*
1343 * The cluster layer is not currently prepared to deal with a logical
1344 * block size larger than the system's page size. (It can handle
1345 * blocks per page, but not multiple pages per block.) So limit the
1346 * logical block size to the page size.
1347 */
1348 if (log_blksize > PAGE_SIZE)
1349 log_blksize = PAGE_SIZE;
1350
1351 /* Now switch to our preferred physical block size. */
1352 if (log_blksize > 512) {
1353 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1354 if (HFS_MOUNT_DEBUG) {
1355 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1356 }
1357 retval = ENXIO;
1358 goto error_exit;
1359 }
1360 /* Get the count of physical blocks. */
1361 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1362 if (HFS_MOUNT_DEBUG) {
1363 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1364 }
1365 retval = ENXIO;
1366 goto error_exit;
1367 }
1368 }
1369 /*
1370 * At this point:
1371 * minblksize is the minimum physical block size
1372 * log_blksize has our preferred physical block size
1373 * log_blkcnt has the total number of physical blocks
1374 */
1375
1376 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1377 if ((retval = (int)buf_meta_bread(devvp,
1378 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1379 phys_blksize, cred, &bp))) {
1380 if (HFS_MOUNT_DEBUG) {
1381 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1382 }
1383 goto error_exit;
1384 }
1385 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1386 if (mdbp == NULL) {
1387 retval = ENOMEM;
1388 if (HFS_MOUNT_DEBUG) {
1389 printf("hfs_mountfs: MALLOC failed\n");
1390 }
1391 goto error_exit;
1392 }
1393 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1394 buf_brelse(bp);
1395 bp = NULL;
1396
1397 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1398 if (hfsmp == NULL) {
1399 if (HFS_MOUNT_DEBUG) {
1400 printf("hfs_mountfs: MALLOC (2) failed\n");
1401 }
1402 retval = ENOMEM;
1403 goto error_exit;
1404 }
1405 bzero(hfsmp, sizeof(struct hfsmount));
1406
1407 hfs_chashinit_finish(hfsmp);
1408
1409 /*
1410 * See if the disk is a solid state device. We need this to decide what to do about
1411 * hotfiles.
1412 */
1413 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1414 if (isssd) {
1415 hfsmp->hfs_flags |= HFS_SSD;
1416 }
1417 }
1418
1419
1420 /*
1421 * Init the volume information structure
1422 */
1423
1424 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1425 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1426 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1427 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1428 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1429
1430 vfs_setfsprivate(mp, hfsmp);
1431 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1432 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1433 hfsmp->hfs_devvp = devvp;
1434 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1435 hfsmp->hfs_logical_block_size = log_blksize;
1436 hfsmp->hfs_logical_block_count = log_blkcnt;
1437 hfsmp->hfs_physical_block_size = phys_blksize;
1438 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1439 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1440 if (ronly)
1441 hfsmp->hfs_flags |= HFS_READ_ONLY;
1442 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1443 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1444
1445 #if QUOTA
1446 for (i = 0; i < MAXQUOTAS; i++)
1447 dqfileinit(&hfsmp->hfs_qfiles[i]);
1448 #endif
1449
1450 if (args) {
1451 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1452 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1453 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1454 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1455 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1456 if (args->hfs_mask != (mode_t)VNOVAL) {
1457 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1458 if (args->flags & HFSFSMNT_NOXONFILES) {
1459 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1460 } else {
1461 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1462 }
1463 } else {
1464 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1465 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1466 }
1467 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1468 mntwrapper = 1;
1469 } else {
1470 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1471 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1472 hfsmp->hfs_uid = UNKNOWNUID;
1473 hfsmp->hfs_gid = UNKNOWNGID;
1474 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1475 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1476 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1477 }
1478 }
1479
1480 /* Find out if disk media is writable. */
1481 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1482 if (iswritable)
1483 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1484 else
1485 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1486 }
1487
1488 // record the current time at which we're mounting this volume
1489 struct timeval tv;
1490 microtime(&tv);
1491 hfsmp->hfs_mount_time = tv.tv_sec;
1492
1493 /* Mount a standard HFS disk */
1494 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1495 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1496
1497 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1498 if (vfs_isrdwr(mp)) {
1499 retval = EROFS;
1500 goto error_exit;
1501 }
1502
1503 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1504
1505 /* Treat it as if it's read-only and not writeable */
1506 hfsmp->hfs_flags |= HFS_READ_ONLY;
1507 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1508
1509 /* If only journal replay is requested, exit immediately */
1510 if (journal_replay_only) {
1511 retval = 0;
1512 goto error_exit;
1513 }
1514
1515 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1516 retval = EINVAL; /* Cannot root from HFS standard disks */
1517 goto error_exit;
1518 }
1519 /* HFS disks can only use 512 byte physical blocks */
1520 if (log_blksize > kHFSBlockSize) {
1521 log_blksize = kHFSBlockSize;
1522 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1523 retval = ENXIO;
1524 goto error_exit;
1525 }
1526 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1527 retval = ENXIO;
1528 goto error_exit;
1529 }
1530 hfsmp->hfs_logical_block_size = log_blksize;
1531 hfsmp->hfs_logical_block_count = log_blkcnt;
1532 hfsmp->hfs_physical_block_size = log_blksize;
1533 hfsmp->hfs_log_per_phys = 1;
1534 }
1535 if (args) {
1536 hfsmp->hfs_encoding = args->hfs_encoding;
1537 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1538
1539 /* establish the timezone */
1540 gTimeZone = args->hfs_timezone;
1541 }
1542
1543 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1544 &hfsmp->hfs_get_hfsname);
1545 if (retval)
1546 goto error_exit;
1547
1548 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1549 if (retval)
1550 (void) hfs_relconverter(hfsmp->hfs_encoding);
1551
1552 } else /* Mount an HFS Plus disk */ {
1553 HFSPlusVolumeHeader *vhp;
1554 off_t embeddedOffset;
1555 int jnl_disable = 0;
1556
1557 /* Get the embedded Volume Header */
1558 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1559 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1560 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1561 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1562
1563 /*
1564 * If the embedded volume doesn't start on a block
1565 * boundary, then switch the device to a 512-byte
1566 * block size so everything will line up on a block
1567 * boundary.
1568 */
1569 if ((embeddedOffset % log_blksize) != 0) {
1570 printf("hfs_mountfs: embedded volume offset not"
1571 " a multiple of physical block size (%d);"
1572 " switching to 512\n", log_blksize);
1573 log_blksize = 512;
1574 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1575 (caddr_t)&log_blksize, FWRITE, context)) {
1576
1577 if (HFS_MOUNT_DEBUG) {
1578 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1579 }
1580 retval = ENXIO;
1581 goto error_exit;
1582 }
1583 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1584 (caddr_t)&log_blkcnt, 0, context)) {
1585 if (HFS_MOUNT_DEBUG) {
1586 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1587 }
1588 retval = ENXIO;
1589 goto error_exit;
1590 }
1591 /* Note: relative block count adjustment */
1592 hfsmp->hfs_logical_block_count *=
1593 hfsmp->hfs_logical_block_size / log_blksize;
1594
1595 /* Update logical /physical block size */
1596 hfsmp->hfs_logical_block_size = log_blksize;
1597 hfsmp->hfs_physical_block_size = log_blksize;
1598 phys_blksize = log_blksize;
1599 hfsmp->hfs_log_per_phys = 1;
1600 }
1601
1602 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1603 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1604
1605 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1606
1607 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1608 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1609 phys_blksize, cred, &bp);
1610 if (retval) {
1611 if (HFS_MOUNT_DEBUG) {
1612 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1613 }
1614 goto error_exit;
1615 }
1616 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1617 buf_brelse(bp);
1618 bp = NULL;
1619 vhp = (HFSPlusVolumeHeader*) mdbp;
1620
1621 } else /* pure HFS+ */ {
1622 embeddedOffset = 0;
1623 vhp = (HFSPlusVolumeHeader*) mdbp;
1624 }
1625
1626 if (isroot) {
1627 hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1628 }
1629
1630 /*
1631 * On inconsistent disks, do not allow read-write mount
1632 * unless it is the boot volume being mounted. We also
1633 * always want to replay the journal if the journal_replay_only
1634 * flag is set because that will (most likely) get the
1635 * disk into a consistent state before fsck_hfs starts
1636 * looking at it.
1637 */
1638 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1639 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1640 && !journal_replay_only
1641 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1642
1643 if (HFS_MOUNT_DEBUG) {
1644 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1645 }
1646 retval = EINVAL;
1647 goto error_exit;
1648 }
1649
1650
1651 // XXXdbg
1652 //
1653 hfsmp->jnl = NULL;
1654 hfsmp->jvp = NULL;
1655 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1656 args->journal_disable) {
1657 jnl_disable = 1;
1658 }
1659
1660 //
1661 // We only initialize the journal here if the last person
1662 // to mount this volume was journaling aware. Otherwise
1663 // we delay journal initialization until later at the end
1664 // of hfs_MountHFSPlusVolume() because the last person who
1665 // mounted it could have messed things up behind our back
1666 // (so we need to go find the .journal file, make sure it's
1667 // the right size, re-sync up if it was moved, etc).
1668 //
1669 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1670 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1671 && !jnl_disable) {
1672
1673 // if we're able to init the journal, mark the mount
1674 // point as journaled.
1675 //
1676 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1677 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1678 } else {
1679 if (retval == EROFS) {
1680 // EROFS is a special error code that means the volume has an external
1681 // journal which we couldn't find. in that case we do not want to
1682 // rewrite the volume header - we'll just refuse to mount the volume.
1683 if (HFS_MOUNT_DEBUG) {
1684 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1685 }
1686 retval = EINVAL;
1687 goto error_exit;
1688 }
1689
1690 // if the journal failed to open, then set the lastMountedVersion
1691 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1692 // of just bailing out because the volume is journaled.
1693 if (!ronly) {
1694 if (HFS_MOUNT_DEBUG) {
1695 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1696 }
1697
1698 HFSPlusVolumeHeader *jvhp;
1699
1700 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1701
1702 if (mdb_offset == 0) {
1703 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1704 }
1705
1706 bp = NULL;
1707 retval = (int)buf_meta_bread(devvp,
1708 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1709 phys_blksize, cred, &bp);
1710 if (retval == 0) {
1711 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1712
1713 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1714 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1715 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1716 buf_bwrite(bp);
1717 } else {
1718 buf_brelse(bp);
1719 }
1720 bp = NULL;
1721 } else if (bp) {
1722 buf_brelse(bp);
1723 // clear this so the error exit path won't try to use it
1724 bp = NULL;
1725 }
1726 }
1727
1728 // if this isn't the root device just bail out.
1729 // If it is the root device we just continue on
1730 // in the hopes that fsck_hfs will be able to
1731 // fix any damage that exists on the volume.
1732 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1733 if (HFS_MOUNT_DEBUG) {
1734 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1735 }
1736 retval = EINVAL;
1737 goto error_exit;
1738 }
1739 }
1740 }
1741 // XXXdbg
1742
1743 /* Either the journal is replayed successfully, or there
1744 * was nothing to replay, or no journal exists. In any case,
1745 * return success.
1746 */
1747 if (journal_replay_only) {
1748 retval = 0;
1749 goto error_exit;
1750 }
1751
1752 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1753
1754 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1755 /*
1756 * If the backend didn't like our physical blocksize
1757 * then retry with physical blocksize of 512.
1758 */
1759 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1760 printf("hfs_mountfs: could not use physical block size "
1761 "(%d) switching to 512\n", log_blksize);
1762 log_blksize = 512;
1763 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1764 if (HFS_MOUNT_DEBUG) {
1765 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1766 }
1767 retval = ENXIO;
1768 goto error_exit;
1769 }
1770 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1771 if (HFS_MOUNT_DEBUG) {
1772 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1773 }
1774 retval = ENXIO;
1775 goto error_exit;
1776 }
1777 devvp->v_specsize = log_blksize;
1778 /* Note: relative block count adjustment (in case this is an embedded volume). */
1779 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1780 hfsmp->hfs_logical_block_size = log_blksize;
1781 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1782
1783 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1784 // close and re-open this with the new block size
1785 journal_close(hfsmp->jnl);
1786 hfsmp->jnl = NULL;
1787 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1788 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1789 } else {
1790 // if the journal failed to open, then set the lastMountedVersion
1791 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1792 // of just bailing out because the volume is journaled.
1793 if (!ronly) {
1794 if (HFS_MOUNT_DEBUG) {
1795 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1796 }
1797 HFSPlusVolumeHeader *jvhp;
1798
1799 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1800
1801 if (mdb_offset == 0) {
1802 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1803 }
1804
1805 bp = NULL;
1806 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1807 phys_blksize, cred, &bp);
1808 if (retval == 0) {
1809 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1810
1811 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1812 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1813 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1814 buf_bwrite(bp);
1815 } else {
1816 buf_brelse(bp);
1817 }
1818 bp = NULL;
1819 } else if (bp) {
1820 buf_brelse(bp);
1821 // clear this so the error exit path won't try to use it
1822 bp = NULL;
1823 }
1824 }
1825
1826 // if this isn't the root device just bail out.
1827 // If it is the root device we just continue on
1828 // in the hopes that fsck_hfs will be able to
1829 // fix any damage that exists on the volume.
1830 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1831 if (HFS_MOUNT_DEBUG) {
1832 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1833 }
1834 retval = EINVAL;
1835 goto error_exit;
1836 }
1837 }
1838 }
1839
1840 /* Try again with a smaller block size... */
1841 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1842 if (retval && HFS_MOUNT_DEBUG) {
1843 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1844 }
1845 }
1846 if (retval)
1847 (void) hfs_relconverter(0);
1848 }
1849
1850 // save off a snapshot of the mtime from the previous mount
1851 // (for matador).
1852 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1853
1854 if ( retval ) {
1855 if (HFS_MOUNT_DEBUG) {
1856 printf("hfs_mountfs: encountered failure %d \n", retval);
1857 }
1858 goto error_exit;
1859 }
1860
1861 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1862 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1863 vfs_setmaxsymlen(mp, 0);
1864
1865 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1866 #if NAMEDSTREAMS
1867 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1868 #endif
1869 if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1870 /* Tell VFS that we support directory hard links. */
1871 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1872 } else {
1873 /* HFS standard doesn't support extended readdir! */
1874 mount_set_noreaddirext (mp);
1875 }
1876
1877 if (args) {
1878 /*
1879 * Set the free space warning levels for a non-root volume:
1880 *
1881 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1882 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1883 * whichever is less. And last, set the "desired" freespace level to
1884 * to 3% of the volume size or 200MB, whichever is less.
1885 */
1886 hfsmp->hfs_freespace_notify_dangerlimit =
1887 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1888 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1889 hfsmp->hfs_freespace_notify_warninglimit =
1890 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1891 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1892 hfsmp->hfs_freespace_notify_desiredlevel =
1893 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1894 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1895 } else {
1896 /*
1897 * Set the free space warning levels for the root volume:
1898 *
1899 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1900 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1901 * whichever is less. And last, set the "desired" freespace level to
1902 * to 11% of the volume size or 1.25GB, whichever is less.
1903 */
1904 hfsmp->hfs_freespace_notify_dangerlimit =
1905 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1906 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1907 hfsmp->hfs_freespace_notify_warninglimit =
1908 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1909 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1910 hfsmp->hfs_freespace_notify_desiredlevel =
1911 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1912 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1913 };
1914
1915 /* Check if the file system exists on virtual device, like disk image */
1916 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1917 if (isvirtual) {
1918 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1919 }
1920 }
1921
1922 /* do not allow ejectability checks on the root device */
1923 if (isroot == 0) {
1924 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1925 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1926 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with.
1927 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1928 if (hfsmp->hfs_syncer == NULL) {
1929 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1930 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1931 }
1932 }
1933 }
1934
1935 #if CONFIG_HFS_ALLOC_RBTREE
1936 /*
1937 * We spawn a thread to create the pair of red-black trees for this volume.
1938 * However, in so doing, we must be careful to ensure that if this thread is still
1939 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1940 * we'll need to set a bit that indicates we're in progress building the trees here.
1941 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1942 * notifies the tree generation code that an unmount is waiting. Also mark the bit that
1943 * indicates the tree is live and operating.
1944 *
1945 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1946 */
1947
1948 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1949 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1950
1951 /* Initialize EOF counter so that the thread can assume it started at initial values */
1952 hfsmp->offset_block_end = 0;
1953 InitTree(hfsmp);
1954
1955 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1956 thread_deallocate(allocator_thread);
1957 }
1958
1959 #endif
1960
1961 /*
1962 * Start looking for free space to drop below this level and generate a
1963 * warning immediately if needed:
1964 */
1965 hfsmp->hfs_notification_conditions = 0;
1966 hfs_generate_volume_notifications(hfsmp);
1967
1968 if (ronly == 0) {
1969 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1970 }
1971 FREE(mdbp, M_TEMP);
1972 return (0);
1973
1974 error_exit:
1975 if (bp)
1976 buf_brelse(bp);
1977 if (mdbp)
1978 FREE(mdbp, M_TEMP);
1979
1980 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1981 vnode_clearmountedon(hfsmp->jvp);
1982 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1983 hfsmp->jvp = NULL;
1984 }
1985 if (hfsmp) {
1986 if (hfsmp->hfs_devvp) {
1987 vnode_rele(hfsmp->hfs_devvp);
1988 }
1989 hfs_delete_chash(hfsmp);
1990
1991 FREE(hfsmp, M_HFSMNT);
1992 vfs_setfsprivate(mp, NULL);
1993 }
1994 return (retval);
1995 }
1996
1997
1998 /*
1999 * Make a filesystem operational.
2000 * Nothing to do at the moment.
2001 */
2002 /* ARGSUSED */
2003 static int
2004 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2005 {
2006 return (0);
2007 }
2008
2009
2010 /*
2011 * unmount system call
2012 */
2013 int
2014 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2015 {
2016 struct proc *p = vfs_context_proc(context);
2017 struct hfsmount *hfsmp = VFSTOHFS(mp);
2018 int retval = E_NONE;
2019 int flags;
2020 int force;
2021 int started_tr = 0;
2022 int rb_used = 0;
2023
2024 flags = 0;
2025 force = 0;
2026 if (mntflags & MNT_FORCE) {
2027 flags |= FORCECLOSE;
2028 force = 1;
2029 }
2030
2031 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2032 return (retval);
2033
2034 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2035 (void) hfs_recording_suspend(hfsmp);
2036
2037 /*
2038 * Cancel any pending timers for this volume. Then wait for any timers
2039 * which have fired, but whose callbacks have not yet completed.
2040 */
2041 if (hfsmp->hfs_syncer)
2042 {
2043 struct timespec ts = {0, 100000000}; /* 0.1 seconds */
2044
2045 /*
2046 * Cancel any timers that have been scheduled, but have not
2047 * fired yet. NOTE: The kernel considers a timer complete as
2048 * soon as it starts your callback, so the kernel does not
2049 * keep track of the number of callbacks in progress.
2050 */
2051 if (thread_call_cancel(hfsmp->hfs_syncer))
2052 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2053 thread_call_free(hfsmp->hfs_syncer);
2054 hfsmp->hfs_syncer = NULL;
2055
2056 /*
2057 * This waits for all of the callbacks that were entered before
2058 * we did thread_call_cancel above, but have not completed yet.
2059 */
2060 while(hfsmp->hfs_sync_incomplete > 0)
2061 {
2062 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2063 }
2064
2065 if (hfsmp->hfs_sync_incomplete < 0)
2066 panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2067 }
2068
2069 #if CONFIG_HFS_ALLOC_RBTREE
2070 rb_used = hfs_teardown_allocator(hfsmp);
2071 #endif
2072
2073 /*
2074 * Flush out the b-trees, volume bitmap and Volume Header
2075 */
2076 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2077 retval = hfs_start_transaction(hfsmp);
2078 if (retval == 0) {
2079 started_tr = 1;
2080 } else if (!force) {
2081 goto err_exit;
2082 }
2083
2084 if (hfsmp->hfs_startup_vp) {
2085 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2086 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2087 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2088 if (retval && !force)
2089 goto err_exit;
2090 }
2091
2092 if (hfsmp->hfs_attribute_vp) {
2093 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2094 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2095 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2096 if (retval && !force)
2097 goto err_exit;
2098 }
2099
2100 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2101 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2102 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2103 if (retval && !force)
2104 goto err_exit;
2105
2106 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2107 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2108 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2109 if (retval && !force)
2110 goto err_exit;
2111
2112 if (hfsmp->hfs_allocation_vp) {
2113 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2114 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2115 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2116 if (retval && !force)
2117 goto err_exit;
2118 }
2119
2120 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2121 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2122 if (retval && !force)
2123 goto err_exit;
2124 }
2125
2126 /* If runtime corruption was detected, indicate that the volume
2127 * was not unmounted cleanly.
2128 */
2129 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2130 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2131 } else {
2132 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2133 }
2134
2135
2136 if (rb_used) {
2137 /* If the rb-tree was live, just set min_start to 0 */
2138 hfsmp->nextAllocation = 0;
2139 }
2140 else {
2141 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2142 int i;
2143 u_int32_t min_start = hfsmp->totalBlocks;
2144
2145 // set the nextAllocation pointer to the smallest free block number
2146 // we've seen so on the next mount we won't rescan unnecessarily
2147 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2148 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2149 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2150 min_start = hfsmp->vcbFreeExt[i].startBlock;
2151 }
2152 }
2153 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2154 if (min_start < hfsmp->nextAllocation) {
2155 hfsmp->nextAllocation = min_start;
2156 }
2157 }
2158 }
2159
2160
2161 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2162 if (retval) {
2163 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2164 if (!force)
2165 goto err_exit; /* could not flush everything */
2166 }
2167
2168 if (started_tr) {
2169 hfs_end_transaction(hfsmp);
2170 started_tr = 0;
2171 }
2172 }
2173
2174 if (hfsmp->jnl) {
2175 hfs_journal_flush(hfsmp, FALSE);
2176 }
2177
2178 /*
2179 * Invalidate our caches and release metadata vnodes
2180 */
2181 (void) hfsUnmount(hfsmp, p);
2182
2183 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2184 (void) hfs_relconverter(hfsmp->hfs_encoding);
2185
2186 // XXXdbg
2187 if (hfsmp->jnl) {
2188 journal_close(hfsmp->jnl);
2189 hfsmp->jnl = NULL;
2190 }
2191
2192 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2193
2194 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2195 vnode_clearmountedon(hfsmp->jvp);
2196 retval = VNOP_CLOSE(hfsmp->jvp,
2197 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2198 vfs_context_kernel());
2199 vnode_put(hfsmp->jvp);
2200 hfsmp->jvp = NULL;
2201 }
2202 // XXXdbg
2203
2204 /*
2205 * Last chance to dump unreferenced system files.
2206 */
2207 (void) vflush(mp, NULLVP, FORCECLOSE);
2208
2209 #if HFS_SPARSE_DEV
2210 /* Drop our reference on the backing fs (if any). */
2211 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2212 struct vnode * tmpvp;
2213
2214 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2215 tmpvp = hfsmp->hfs_backingfs_rootvp;
2216 hfsmp->hfs_backingfs_rootvp = NULLVP;
2217 vnode_rele(tmpvp);
2218 }
2219 #endif /* HFS_SPARSE_DEV */
2220 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2221 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2222 vnode_rele(hfsmp->hfs_devvp);
2223
2224 hfs_delete_chash(hfsmp);
2225 FREE(hfsmp, M_HFSMNT);
2226
2227 return (0);
2228
2229 err_exit:
2230 if (started_tr) {
2231 hfs_end_transaction(hfsmp);
2232 }
2233 return retval;
2234 }
2235
2236
2237 /*
2238 * Return the root of a filesystem.
2239 */
2240 static int
2241 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2242 {
2243 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2244 }
2245
2246
2247 /*
2248 * Do operations associated with quotas
2249 */
2250 #if !QUOTA
2251 static int
2252 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2253 {
2254 return (ENOTSUP);
2255 }
2256 #else
2257 static int
2258 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2259 {
2260 struct proc *p = vfs_context_proc(context);
2261 int cmd, type, error;
2262
2263 if (uid == ~0U)
2264 uid = kauth_cred_getuid(vfs_context_ucred(context));
2265 cmd = cmds >> SUBCMDSHIFT;
2266
2267 switch (cmd) {
2268 case Q_SYNC:
2269 case Q_QUOTASTAT:
2270 break;
2271 case Q_GETQUOTA:
2272 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2273 break;
2274 /* fall through */
2275 default:
2276 if ( (error = vfs_context_suser(context)) )
2277 return (error);
2278 }
2279
2280 type = cmds & SUBCMDMASK;
2281 if ((u_int)type >= MAXQUOTAS)
2282 return (EINVAL);
2283 if (vfs_busy(mp, LK_NOWAIT))
2284 return (0);
2285
2286 switch (cmd) {
2287
2288 case Q_QUOTAON:
2289 error = hfs_quotaon(p, mp, type, datap);
2290 break;
2291
2292 case Q_QUOTAOFF:
2293 error = hfs_quotaoff(p, mp, type);
2294 break;
2295
2296 case Q_SETQUOTA:
2297 error = hfs_setquota(mp, uid, type, datap);
2298 break;
2299
2300 case Q_SETUSE:
2301 error = hfs_setuse(mp, uid, type, datap);
2302 break;
2303
2304 case Q_GETQUOTA:
2305 error = hfs_getquota(mp, uid, type, datap);
2306 break;
2307
2308 case Q_SYNC:
2309 error = hfs_qsync(mp);
2310 break;
2311
2312 case Q_QUOTASTAT:
2313 error = hfs_quotastat(mp, type, datap);
2314 break;
2315
2316 default:
2317 error = EINVAL;
2318 break;
2319 }
2320 vfs_unbusy(mp);
2321
2322 return (error);
2323 }
2324 #endif /* QUOTA */
2325
2326 /* Subtype is composite of bits */
2327 #define HFS_SUBTYPE_JOURNALED 0x01
2328 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2329 /* bits 2 - 6 reserved */
2330 #define HFS_SUBTYPE_STANDARDHFS 0x80
2331
2332 /*
2333 * Get file system statistics.
2334 */
2335 int
2336 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2337 {
2338 ExtendedVCB *vcb = VFSTOVCB(mp);
2339 struct hfsmount *hfsmp = VFSTOHFS(mp);
2340 u_int32_t freeCNIDs;
2341 u_int16_t subtype = 0;
2342
2343 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2344
2345 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2346 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2347 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2348 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2349 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2350 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2351 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2352
2353 /*
2354 * Subtypes (flavors) for HFS
2355 * 0: Mac OS Extended
2356 * 1: Mac OS Extended (Journaled)
2357 * 2: Mac OS Extended (Case Sensitive)
2358 * 3: Mac OS Extended (Case Sensitive, Journaled)
2359 * 4 - 127: Reserved
2360 * 128: Mac OS Standard
2361 *
2362 */
2363 if (hfsmp->hfs_flags & HFS_STANDARD) {
2364 subtype = HFS_SUBTYPE_STANDARDHFS;
2365 } else /* HFS Plus */ {
2366 if (hfsmp->jnl)
2367 subtype |= HFS_SUBTYPE_JOURNALED;
2368 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2369 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2370 }
2371 sbp->f_fssubtype = subtype;
2372
2373 return (0);
2374 }
2375
2376
2377 //
2378 // XXXdbg -- this is a callback to be used by the journal to
2379 // get meta data blocks flushed out to disk.
2380 //
2381 // XXXdbg -- be smarter and don't flush *every* block on each
2382 // call. try to only flush some so we don't wind up
2383 // being too synchronous.
2384 //
2385 __private_extern__
2386 void
2387 hfs_sync_metadata(void *arg)
2388 {
2389 struct mount *mp = (struct mount *)arg;
2390 struct hfsmount *hfsmp;
2391 ExtendedVCB *vcb;
2392 buf_t bp;
2393 int retval;
2394 daddr64_t priIDSector;
2395 hfsmp = VFSTOHFS(mp);
2396 vcb = HFSTOVCB(hfsmp);
2397
2398 // now make sure the super block is flushed
2399 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2400 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2401
2402 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2403 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2404 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2405 if ((retval != 0 ) && (retval != ENXIO)) {
2406 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2407 (int)priIDSector, retval);
2408 }
2409
2410 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2411 buf_bwrite(bp);
2412 } else if (bp) {
2413 buf_brelse(bp);
2414 }
2415
2416 // the alternate super block...
2417 // XXXdbg - we probably don't need to do this each and every time.
2418 // hfs_btreeio.c:FlushAlternate() should flag when it was
2419 // written...
2420 if (hfsmp->hfs_alt_id_sector) {
2421 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2422 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2423 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2424 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2425 buf_bwrite(bp);
2426 } else if (bp) {
2427 buf_brelse(bp);
2428 }
2429 }
2430 }
2431
2432
2433 struct hfs_sync_cargs {
2434 kauth_cred_t cred;
2435 struct proc *p;
2436 int waitfor;
2437 int error;
2438 };
2439
2440
2441 static int
2442 hfs_sync_callback(struct vnode *vp, void *cargs)
2443 {
2444 struct cnode *cp;
2445 struct hfs_sync_cargs *args;
2446 int error;
2447
2448 args = (struct hfs_sync_cargs *)cargs;
2449
2450 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2451 return (VNODE_RETURNED);
2452 }
2453 cp = VTOC(vp);
2454
2455 if ((cp->c_flag & C_MODIFIED) ||
2456 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2457 vnode_hasdirtyblks(vp)) {
2458 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2459
2460 if (error)
2461 args->error = error;
2462 }
2463 hfs_unlock(cp);
2464 return (VNODE_RETURNED);
2465 }
2466
2467
2468
2469 /*
2470 * Go through the disk queues to initiate sandbagged IO;
2471 * go through the inodes to write those that have been modified;
2472 * initiate the writing of the super block if it has been modified.
2473 *
2474 * Note: we are always called with the filesystem marked `MPBUSY'.
2475 */
2476 int
2477 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2478 {
2479 struct proc *p = vfs_context_proc(context);
2480 struct cnode *cp;
2481 struct hfsmount *hfsmp;
2482 ExtendedVCB *vcb;
2483 struct vnode *meta_vp[4];
2484 int i;
2485 int error, allerror = 0;
2486 struct hfs_sync_cargs args;
2487
2488 hfsmp = VFSTOHFS(mp);
2489
2490 /*
2491 * hfs_changefs might be manipulating vnodes so back off
2492 */
2493 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2494 return (0);
2495
2496 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2497 return (EROFS);
2498
2499 /* skip over frozen volumes */
2500 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2501 return 0;
2502
2503 args.cred = kauth_cred_get();
2504 args.waitfor = waitfor;
2505 args.p = p;
2506 args.error = 0;
2507 /*
2508 * hfs_sync_callback will be called for each vnode
2509 * hung off of this mount point... the vnode will be
2510 * properly referenced and unreferenced around the callback
2511 */
2512 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2513
2514 if (args.error)
2515 allerror = args.error;
2516
2517 vcb = HFSTOVCB(hfsmp);
2518
2519 meta_vp[0] = vcb->extentsRefNum;
2520 meta_vp[1] = vcb->catalogRefNum;
2521 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2522 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2523
2524 /* Now sync our three metadata files */
2525 for (i = 0; i < 4; ++i) {
2526 struct vnode *btvp;
2527
2528 btvp = meta_vp[i];;
2529 if ((btvp==0) || (vnode_mount(btvp) != mp))
2530 continue;
2531
2532 /* XXX use hfs_systemfile_lock instead ? */
2533 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2534 cp = VTOC(btvp);
2535
2536 if (((cp->c_flag & C_MODIFIED) == 0) &&
2537 (cp->c_touch_acctime == 0) &&
2538 (cp->c_touch_chgtime == 0) &&
2539 (cp->c_touch_modtime == 0) &&
2540 vnode_hasdirtyblks(btvp) == 0) {
2541 hfs_unlock(VTOC(btvp));
2542 continue;
2543 }
2544 error = vnode_get(btvp);
2545 if (error) {
2546 hfs_unlock(VTOC(btvp));
2547 continue;
2548 }
2549 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2550 allerror = error;
2551
2552 hfs_unlock(cp);
2553 vnode_put(btvp);
2554 };
2555
2556 /*
2557 * Force stale file system control information to be flushed.
2558 */
2559 if (vcb->vcbSigWord == kHFSSigWord) {
2560 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2561 allerror = error;
2562 }
2563 }
2564 #if QUOTA
2565 hfs_qsync(mp);
2566 #endif /* QUOTA */
2567
2568 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2569
2570 /*
2571 * Write back modified superblock.
2572 */
2573 if (IsVCBDirty(vcb)) {
2574 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2575 if (error)
2576 allerror = error;
2577 }
2578
2579 if (hfsmp->jnl) {
2580 hfs_journal_flush(hfsmp, FALSE);
2581 }
2582
2583 {
2584 clock_sec_t secs;
2585 clock_usec_t usecs;
2586 uint64_t now;
2587
2588 clock_get_calendar_microtime(&secs, &usecs);
2589 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2590 hfsmp->hfs_last_sync_time = now;
2591 }
2592
2593 lck_rw_unlock_shared(&hfsmp->hfs_insync);
2594 return (allerror);
2595 }
2596
2597
2598 /*
2599 * File handle to vnode
2600 *
2601 * Have to be really careful about stale file handles:
2602 * - check that the cnode id is valid
2603 * - call hfs_vget() to get the locked cnode
2604 * - check for an unallocated cnode (i_mode == 0)
2605 * - check that the given client host has export rights and return
2606 * those rights via. exflagsp and credanonp
2607 */
2608 static int
2609 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2610 {
2611 struct hfsfid *hfsfhp;
2612 struct vnode *nvp;
2613 int result;
2614
2615 *vpp = NULL;
2616 hfsfhp = (struct hfsfid *)fhp;
2617
2618 if (fhlen < (int)sizeof(struct hfsfid))
2619 return (EINVAL);
2620
2621 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2622 if (result) {
2623 if (result == ENOENT)
2624 result = ESTALE;
2625 return result;
2626 }
2627
2628 /*
2629 * We used to use the create time as the gen id of the file handle,
2630 * but it is not static enough because it can change at any point
2631 * via system calls. We still don't have another volume ID or other
2632 * unique identifier to use for a generation ID across reboots that
2633 * persists until the file is removed. Using only the CNID exposes
2634 * us to the potential wrap-around case, but as of 2/2008, it would take
2635 * over 2 months to wrap around if the machine did nothing but allocate
2636 * CNIDs. Using some kind of wrap counter would only be effective if
2637 * each file had the wrap counter associated with it. For now,
2638 * we use only the CNID to identify the file as it's good enough.
2639 */
2640
2641 *vpp = nvp;
2642
2643 hfs_unlock(VTOC(nvp));
2644 return (0);
2645 }
2646
2647
2648 /*
2649 * Vnode pointer to File handle
2650 */
2651 /* ARGSUSED */
2652 static int
2653 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2654 {
2655 struct cnode *cp;
2656 struct hfsfid *hfsfhp;
2657
2658 if (ISHFS(VTOVCB(vp)))
2659 return (ENOTSUP); /* hfs standard is not exportable */
2660
2661 if (*fhlenp < (int)sizeof(struct hfsfid))
2662 return (EOVERFLOW);
2663
2664 cp = VTOC(vp);
2665 hfsfhp = (struct hfsfid *)fhp;
2666 /* only the CNID is used to identify the file now */
2667 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2668 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2669 *fhlenp = sizeof(struct hfsfid);
2670
2671 return (0);
2672 }
2673
2674
2675 /*
2676 * Initial HFS filesystems, done only once.
2677 */
2678 static int
2679 hfs_init(__unused struct vfsconf *vfsp)
2680 {
2681 static int done = 0;
2682
2683 if (done)
2684 return (0);
2685 done = 1;
2686 hfs_chashinit();
2687 hfs_converterinit();
2688
2689 BTReserveSetup();
2690
2691
2692 hfs_lock_attr = lck_attr_alloc_init();
2693 hfs_group_attr = lck_grp_attr_alloc_init();
2694 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2695 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2696 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2697
2698 #if HFS_COMPRESSION
2699 decmpfs_init();
2700 #endif
2701
2702 return (0);
2703 }
2704
2705 static int
2706 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2707 {
2708 struct hfsmount * hfsmp;
2709 char fstypename[MFSNAMELEN];
2710
2711 if (vp == NULL)
2712 return (EINVAL);
2713
2714 if (!vnode_isvroot(vp))
2715 return (EINVAL);
2716
2717 vnode_vfsname(vp, fstypename);
2718 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2719 return (EINVAL);
2720
2721 hfsmp = VTOHFS(vp);
2722
2723 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2724 return (EINVAL);
2725
2726 *hfsmpp = hfsmp;
2727
2728 return (0);
2729 }
2730
2731 // XXXdbg
2732 #include <sys/filedesc.h>
2733
2734 /*
2735 * HFS filesystem related variables.
2736 */
2737 int
2738 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2739 user_addr_t newp, size_t newlen, vfs_context_t context)
2740 {
2741 struct proc *p = vfs_context_proc(context);
2742 int error;
2743 struct hfsmount *hfsmp;
2744
2745 /* all sysctl names at this level are terminal */
2746
2747 if (name[0] == HFS_ENCODINGBIAS) {
2748 int bias;
2749
2750 bias = hfs_getencodingbias();
2751 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2752 if (error == 0 && newp)
2753 hfs_setencodingbias(bias);
2754 return (error);
2755
2756 } else if (name[0] == HFS_EXTEND_FS) {
2757 u_int64_t newsize;
2758 vnode_t vp = vfs_context_cwd(context);
2759
2760 if (newp == USER_ADDR_NULL || vp == NULLVP)
2761 return (EINVAL);
2762 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2763 return (error);
2764 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2765 if (error)
2766 return (error);
2767
2768 error = hfs_extendfs(hfsmp, newsize, context);
2769 return (error);
2770
2771 } else if (name[0] == HFS_ENCODINGHINT) {
2772 size_t bufsize;
2773 size_t bytes;
2774 u_int32_t hint;
2775 u_int16_t *unicode_name = NULL;
2776 char *filename = NULL;
2777
2778 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2779 return (EINVAL);
2780
2781 bufsize = MAX(newlen * 3, MAXPATHLEN);
2782 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2783 if (filename == NULL) {
2784 error = ENOMEM;
2785 goto encodinghint_exit;
2786 }
2787 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2788 if (filename == NULL) {
2789 error = ENOMEM;
2790 goto encodinghint_exit;
2791 }
2792
2793 error = copyin(newp, (caddr_t)filename, newlen);
2794 if (error == 0) {
2795 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2796 &bytes, bufsize, 0, UTF_DECOMPOSED);
2797 if (error == 0) {
2798 hint = hfs_pickencoding(unicode_name, bytes / 2);
2799 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2800 }
2801 }
2802
2803 encodinghint_exit:
2804 if (unicode_name)
2805 FREE(unicode_name, M_TEMP);
2806 if (filename)
2807 FREE(filename, M_TEMP);
2808 return (error);
2809
2810 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2811 // make the file system journaled...
2812 vnode_t vp = vfs_context_cwd(context);
2813 vnode_t jvp;
2814 ExtendedVCB *vcb;
2815 struct cat_attr jnl_attr, jinfo_attr;
2816 struct cat_fork jnl_fork, jinfo_fork;
2817 void *jnl = NULL;
2818 int lockflags;
2819
2820 /* Only root can enable journaling */
2821 if (!is_suser()) {
2822 return (EPERM);
2823 }
2824 if (vp == NULLVP)
2825 return EINVAL;
2826
2827 hfsmp = VTOHFS(vp);
2828 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2829 return EROFS;
2830 }
2831 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2832 printf("hfs: can't make a plain hfs volume journaled.\n");
2833 return EINVAL;
2834 }
2835
2836 if (hfsmp->jnl) {
2837 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2838 return EAGAIN;
2839 }
2840
2841 vcb = HFSTOVCB(hfsmp);
2842 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2843 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2844 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2845
2846 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2847 hfs_systemfile_unlock(hfsmp, lockflags);
2848 return EINVAL;
2849 }
2850 hfs_systemfile_unlock(hfsmp, lockflags);
2851
2852 // make sure these both exist!
2853 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2854 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2855
2856 return EINVAL;
2857 }
2858
2859 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2860
2861 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2862 (off_t)name[2], (off_t)name[3]);
2863
2864 //
2865 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2866 // enabling the journal on a separate device so it is safe
2867 // to just copy hfs_devvp here. If hfs_util gets the ability
2868 // to dynamically enable the journal on a separate device then
2869 // we will have to do the same thing as hfs_early_journal_init()
2870 // to locate and open the journal device.
2871 //
2872 jvp = hfsmp->hfs_devvp;
2873 jnl = journal_create(jvp,
2874 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2875 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2876 (off_t)((unsigned)name[3]),
2877 hfsmp->hfs_devvp,
2878 hfsmp->hfs_logical_block_size,
2879 0,
2880 0,
2881 hfs_sync_metadata, hfsmp->hfs_mp);
2882
2883 /*
2884 * Set up the trim callback function so that we can add
2885 * recently freed extents to the free extent cache once
2886 * the transaction that freed them is written to the
2887 * journal on disk.
2888 */
2889 if (jnl)
2890 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2891
2892 if (jnl == NULL) {
2893 printf("hfs: FAILED to create the journal!\n");
2894 if (jvp && jvp != hfsmp->hfs_devvp) {
2895 vnode_clearmountedon(jvp);
2896 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2897 }
2898 jvp = NULL;
2899
2900 return EINVAL;
2901 }
2902
2903 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2904
2905 /*
2906 * Flush all dirty metadata buffers.
2907 */
2908 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2909 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2910 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2911 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2912 if (hfsmp->hfs_attribute_vp)
2913 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2914
2915 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2916 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2917 hfsmp->jvp = jvp;
2918 hfsmp->jnl = jnl;
2919
2920 // save this off for the hack-y check in hfs_remove()
2921 hfsmp->jnl_start = (u_int32_t)name[2];
2922 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
2923 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2924 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
2925
2926 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2927
2928 hfs_unlock_global (hfsmp);
2929 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2930
2931 {
2932 fsid_t fsid;
2933
2934 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2935 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2936 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2937 }
2938 return 0;
2939 } else if (name[0] == HFS_DISABLE_JOURNALING) {
2940 // clear the journaling bit
2941 vnode_t vp = vfs_context_cwd(context);
2942
2943 /* Only root can disable journaling */
2944 if (!is_suser()) {
2945 return (EPERM);
2946 }
2947 if (vp == NULLVP)
2948 return EINVAL;
2949
2950 hfsmp = VTOHFS(vp);
2951
2952 /*
2953 * Disabling journaling is disallowed on volumes with directory hard links
2954 * because we have not tested the relevant code path.
2955 */
2956 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2957 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2958 return EPERM;
2959 }
2960
2961 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2962
2963 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2964
2965 // Lights out for you buddy!
2966 journal_close(hfsmp->jnl);
2967 hfsmp->jnl = NULL;
2968
2969 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2970 vnode_clearmountedon(hfsmp->jvp);
2971 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2972 vnode_put(hfsmp->jvp);
2973 }
2974 hfsmp->jvp = NULL;
2975 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2976 hfsmp->jnl_start = 0;
2977 hfsmp->hfs_jnlinfoblkid = 0;
2978 hfsmp->hfs_jnlfileid = 0;
2979
2980 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2981
2982 hfs_unlock_global (hfsmp);
2983
2984 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2985
2986 {
2987 fsid_t fsid;
2988
2989 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2990 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2991 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2992 }
2993 return 0;
2994 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2995 vnode_t vp = vfs_context_cwd(context);
2996 off_t jnl_start, jnl_size;
2997
2998 if (vp == NULLVP)
2999 return EINVAL;
3000
3001 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3002 if (proc_is64bit(current_proc()))
3003 return EINVAL;
3004
3005 hfsmp = VTOHFS(vp);
3006 if (hfsmp->jnl == NULL) {
3007 jnl_start = 0;
3008 jnl_size = 0;
3009 } else {
3010 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3011 jnl_size = (off_t)hfsmp->jnl_size;
3012 }
3013
3014 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3015 return error;
3016 }
3017 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3018 return error;
3019 }
3020
3021 return 0;
3022 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3023
3024 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3025
3026 } else if (name[0] == VFS_CTL_QUERY) {
3027 struct sysctl_req *req;
3028 union union_vfsidctl vc;
3029 struct mount *mp;
3030 struct vfsquery vq;
3031
3032 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3033
3034 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3035 if (error) return (error);
3036
3037 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3038 if (mp == NULL) return (ENOENT);
3039
3040 hfsmp = VFSTOHFS(mp);
3041 bzero(&vq, sizeof(vq));
3042 vq.vq_flags = hfsmp->hfs_notification_conditions;
3043 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3044 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3045 vnode_t devvp = NULL;
3046 int device_fd;
3047 if (namelen != 2) {
3048 return (EINVAL);
3049 }
3050 device_fd = name[1];
3051 error = file_vnode(device_fd, &devvp);
3052 if (error) {
3053 return error;
3054 }
3055 error = vnode_getwithref(devvp);
3056 if (error) {
3057 file_drop(device_fd);
3058 return error;
3059 }
3060 error = hfs_journal_replay(devvp, context);
3061 file_drop(device_fd);
3062 vnode_put(devvp);
3063 return error;
3064 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3065 hfs_resize_debug = 1;
3066 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3067 return 0;
3068 }
3069
3070 return (ENOTSUP);
3071 }
3072
3073 /*
3074 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3075 * the build_path ioctl. We use it to leverage the code below that updates
3076 * the origin list cache if necessary
3077 */
3078
3079 int
3080 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3081 {
3082 int error;
3083 int lockflags;
3084 struct hfsmount *hfsmp;
3085
3086 hfsmp = VFSTOHFS(mp);
3087
3088 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3089 if (error)
3090 return (error);
3091
3092 /*
3093 * ADLs may need to have their origin state updated
3094 * since build_path needs a valid parent. The same is true
3095 * for hardlinked files as well. There isn't a race window here
3096 * in re-acquiring the cnode lock since we aren't pulling any data
3097 * out of the cnode; instead, we're going to the catalog.
3098 */
3099 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3100 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3101 cnode_t *cp = VTOC(*vpp);
3102 struct cat_desc cdesc;
3103
3104 if (!hfs_haslinkorigin(cp)) {
3105 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3106 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3107 hfs_systemfile_unlock(hfsmp, lockflags);
3108 if (error == 0) {
3109 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3110 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3111 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3112 }
3113 cat_releasedesc(&cdesc);
3114 }
3115 }
3116 hfs_unlock(cp);
3117 }
3118 return (0);
3119 }
3120
3121
3122 /*
3123 * Look up an HFS object by ID.
3124 *
3125 * The object is returned with an iocount reference and the cnode locked.
3126 *
3127 * If the object is a file then it will represent the data fork.
3128 */
3129 int
3130 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3131 {
3132 struct vnode *vp = NULLVP;
3133 struct cat_desc cndesc;
3134 struct cat_attr cnattr;
3135 struct cat_fork cnfork;
3136 u_int32_t linkref = 0;
3137 int error;
3138
3139 /* Check for cnids that should't be exported. */
3140 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3141 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3142 return (ENOENT);
3143 }
3144 /* Don't export our private directories. */
3145 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3146 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3147 return (ENOENT);
3148 }
3149 /*
3150 * Check the hash first
3151 */
3152 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3153 if (vp) {
3154 *vpp = vp;
3155 return(0);
3156 }
3157
3158 bzero(&cndesc, sizeof(cndesc));
3159 bzero(&cnattr, sizeof(cnattr));
3160 bzero(&cnfork, sizeof(cnfork));
3161
3162 /*
3163 * Not in hash, lookup in catalog
3164 */
3165 if (cnid == kHFSRootParentID) {
3166 static char hfs_rootname[] = "/";
3167
3168 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3169 cndesc.cd_namelen = 1;
3170 cndesc.cd_parentcnid = kHFSRootParentID;
3171 cndesc.cd_cnid = kHFSRootFolderID;
3172 cndesc.cd_flags = CD_ISDIR;
3173
3174 cnattr.ca_fileid = kHFSRootFolderID;
3175 cnattr.ca_linkcount = 1;
3176 cnattr.ca_entries = 1;
3177 cnattr.ca_dircount = 1;
3178 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3179 } else {
3180 int lockflags;
3181 cnid_t pid;
3182 const char *nameptr;
3183
3184 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3185 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3186 hfs_systemfile_unlock(hfsmp, lockflags);
3187
3188 if (error) {
3189 *vpp = NULL;
3190 return (error);
3191 }
3192
3193 /*
3194 * Check for a raw hardlink inode and save its linkref.
3195 */
3196 pid = cndesc.cd_parentcnid;
3197 nameptr = (const char *)cndesc.cd_nameptr;
3198
3199 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3200 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3201 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3202
3203 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3204 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3205 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3206
3207 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3208 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3209 *vpp = NULL;
3210 cat_releasedesc(&cndesc);
3211 return (ENOENT); /* open unlinked file */
3212 }
3213 }
3214
3215 /*
3216 * Finish initializing cnode descriptor for hardlinks.
3217 *
3218 * We need a valid name and parent for reverse lookups.
3219 */
3220 if (linkref) {
3221 cnid_t nextlinkid;
3222 cnid_t prevlinkid;
3223 struct cat_desc linkdesc;
3224 int lockflags;
3225
3226 cnattr.ca_linkref = linkref;
3227
3228 /*
3229 * Pick up the first link in the chain and get a descriptor for it.
3230 * This allows blind volfs paths to work for hardlinks.
3231 */
3232 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) &&
3233 (nextlinkid != 0)) {
3234 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3235 error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3236 hfs_systemfile_unlock(hfsmp, lockflags);
3237 if (error == 0) {
3238 cat_releasedesc(&cndesc);
3239 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3240 }
3241 }
3242 }
3243
3244 if (linkref) {
3245 int newvnode_flags = 0;
3246
3247 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3248 &cnfork, &vp, &newvnode_flags);
3249 if (error == 0) {
3250 VTOC(vp)->c_flag |= C_HARDLINK;
3251 vnode_setmultipath(vp);
3252 }
3253 } else {
3254 struct componentname cn;
3255 int newvnode_flags = 0;
3256
3257 /* Supply hfs_getnewvnode with a component name. */
3258 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3259 cn.cn_nameiop = LOOKUP;
3260 cn.cn_flags = ISLASTCN | HASBUF;
3261 cn.cn_context = NULL;
3262 cn.cn_pnlen = MAXPATHLEN;
3263 cn.cn_nameptr = cn.cn_pnbuf;
3264 cn.cn_namelen = cndesc.cd_namelen;
3265 cn.cn_hash = 0;
3266 cn.cn_consume = 0;
3267 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3268
3269 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3270 &cnfork, &vp, &newvnode_flags);
3271
3272 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3273 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3274 }
3275 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3276 }
3277 cat_releasedesc(&cndesc);
3278
3279 *vpp = vp;
3280 if (vp && skiplock) {
3281 hfs_unlock(VTOC(vp));
3282 }
3283 return (error);
3284 }
3285
3286
3287 /*
3288 * Flush out all the files in a filesystem.
3289 */
3290 static int
3291 #if QUOTA
3292 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3293 #else
3294 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3295 #endif /* QUOTA */
3296 {
3297 struct hfsmount *hfsmp;
3298 struct vnode *skipvp = NULLVP;
3299 int error;
3300 #if QUOTA
3301 int quotafilecnt;
3302 int i;
3303 #endif
3304
3305 hfsmp = VFSTOHFS(mp);
3306
3307 #if QUOTA
3308 /*
3309 * The open quota files have an indirect reference on
3310 * the root directory vnode. We must account for this
3311 * extra reference when doing the intial vflush.
3312 */
3313 quotafilecnt = 0;
3314 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3315
3316 /* Find out how many quota files we have open. */
3317 for (i = 0; i < MAXQUOTAS; i++) {
3318 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3319 ++quotafilecnt;
3320 }
3321
3322 /* Obtain the root vnode so we can skip over it. */
3323 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3324 }
3325 #endif /* QUOTA */
3326
3327 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3328 if (error != 0)
3329 return(error);
3330
3331 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3332
3333 #if QUOTA
3334 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3335 if (skipvp) {
3336 /*
3337 * See if there are additional references on the
3338 * root vp besides the ones obtained from the open
3339 * quota files and the hfs_chash_getvnode call above.
3340 */
3341 if ((error == 0) &&
3342 (vnode_isinuse(skipvp, quotafilecnt))) {
3343 error = EBUSY; /* root directory is still open */
3344 }
3345 hfs_unlock(VTOC(skipvp));
3346 vnode_put(skipvp);
3347 }
3348 if (error && (flags & FORCECLOSE) == 0)
3349 return (error);
3350
3351 for (i = 0; i < MAXQUOTAS; i++) {
3352 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3353 continue;
3354 hfs_quotaoff(p, mp, i);
3355 }
3356 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3357 }
3358 #endif /* QUOTA */
3359
3360 return (error);
3361 }
3362
3363 /*
3364 * Update volume encoding bitmap (HFS Plus only)
3365 */
3366 __private_extern__
3367 void
3368 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3369 {
3370 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3371 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3372
3373 u_int32_t index;
3374
3375 switch (encoding) {
3376 case kTextEncodingMacUkrainian:
3377 index = kIndexMacUkrainian;
3378 break;
3379 case kTextEncodingMacFarsi:
3380 index = kIndexMacFarsi;
3381 break;
3382 default:
3383 index = encoding;
3384 break;
3385 }
3386
3387 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3388 HFS_MOUNT_LOCK(hfsmp, TRUE)
3389 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3390 MarkVCBDirty(hfsmp);
3391 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3392 }
3393 }
3394
3395 /*
3396 * Update volume stats
3397 *
3398 * On journal volumes this will cause a volume header flush
3399 */
3400 int
3401 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3402 {
3403 struct timeval tv;
3404
3405 microtime(&tv);
3406
3407 lck_mtx_lock(&hfsmp->hfs_mutex);
3408
3409 MarkVCBDirty(hfsmp);
3410 hfsmp->hfs_mtime = tv.tv_sec;
3411
3412 switch (op) {
3413 case VOL_UPDATE:
3414 break;
3415 case VOL_MKDIR:
3416 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3417 ++hfsmp->hfs_dircount;
3418 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3419 ++hfsmp->vcbNmRtDirs;
3420 break;
3421 case VOL_RMDIR:
3422 if (hfsmp->hfs_dircount != 0)
3423 --hfsmp->hfs_dircount;
3424 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3425 --hfsmp->vcbNmRtDirs;
3426 break;
3427 case VOL_MKFILE:
3428 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3429 ++hfsmp->hfs_filecount;
3430 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3431 ++hfsmp->vcbNmFls;
3432 break;
3433 case VOL_RMFILE:
3434 if (hfsmp->hfs_filecount != 0)
3435 --hfsmp->hfs_filecount;
3436 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3437 --hfsmp->vcbNmFls;
3438 break;
3439 }
3440
3441 lck_mtx_unlock(&hfsmp->hfs_mutex);
3442
3443 if (hfsmp->jnl) {
3444 hfs_flushvolumeheader(hfsmp, 0, 0);
3445 }
3446
3447 return (0);
3448 }
3449
3450
3451 static int
3452 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3453 {
3454 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3455 struct filefork *fp;
3456 HFSMasterDirectoryBlock *mdb;
3457 struct buf *bp = NULL;
3458 int retval;
3459 int sectorsize;
3460 ByteCount namelen;
3461
3462 sectorsize = hfsmp->hfs_logical_block_size;
3463 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3464 if (retval) {
3465 if (bp)
3466 buf_brelse(bp);
3467 return retval;
3468 }
3469
3470 lck_mtx_lock(&hfsmp->hfs_mutex);
3471
3472 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3473
3474 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3475 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3476 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3477 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3478 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3479 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3480 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3481 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3482
3483 namelen = strlen((char *)vcb->vcbVN);
3484 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3485 /* Retry with MacRoman in case that's how it was exported. */
3486 if (retval)
3487 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3488
3489 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3490 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3491 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3492 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3493 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3494
3495 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3496
3497 fp = VTOF(vcb->extentsRefNum);
3498 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3499 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3500 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3501 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3502 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3503 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3504 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3505 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3506 FTOC(fp)->c_flag &= ~C_MODIFIED;
3507
3508 fp = VTOF(vcb->catalogRefNum);
3509 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3510 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3511 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3512 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3513 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3514 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3515 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3516 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3517 FTOC(fp)->c_flag &= ~C_MODIFIED;
3518
3519 MarkVCBClean( vcb );
3520
3521 lck_mtx_unlock(&hfsmp->hfs_mutex);
3522
3523 /* If requested, flush out the alternate MDB */
3524 if (altflush) {
3525 struct buf *alt_bp = NULL;
3526
3527 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3528 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3529
3530 (void) VNOP_BWRITE(alt_bp);
3531 } else if (alt_bp)
3532 buf_brelse(alt_bp);
3533 }
3534
3535 if (waitfor != MNT_WAIT)
3536 buf_bawrite(bp);
3537 else
3538 retval = VNOP_BWRITE(bp);
3539
3540 return (retval);
3541 }
3542
3543 /*
3544 * Flush any dirty in-memory mount data to the on-disk
3545 * volume header.
3546 *
3547 * Note: the on-disk volume signature is intentionally
3548 * not flushed since the on-disk "H+" and "HX" signatures
3549 * are always stored in-memory as "H+".
3550 */
3551 int
3552 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3553 {
3554 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3555 struct filefork *fp;
3556 HFSPlusVolumeHeader *volumeHeader, *altVH;
3557 int retval;
3558 struct buf *bp, *alt_bp;
3559 int i;
3560 daddr64_t priIDSector;
3561 int critical;
3562 u_int16_t signature;
3563 u_int16_t hfsversion;
3564
3565 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3566 return(0);
3567 }
3568 if (hfsmp->hfs_flags & HFS_STANDARD) {
3569 return hfs_flushMDB(hfsmp, waitfor, altflush);
3570 }
3571 critical = altflush;
3572 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3573 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3574
3575 if (hfs_start_transaction(hfsmp) != 0) {
3576 return EINVAL;
3577 }
3578
3579 bp = NULL;
3580 alt_bp = NULL;
3581
3582 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3583 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3584 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3585 if (retval) {
3586 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3587 goto err_exit;
3588 }
3589
3590 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3591 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3592
3593 /*
3594 * Sanity check what we just read. If it's bad, try the alternate
3595 * instead.
3596 */
3597 signature = SWAP_BE16 (volumeHeader->signature);
3598 hfsversion = SWAP_BE16 (volumeHeader->version);
3599 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3600 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3601 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3602 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3603 vcb->vcbVN, signature, hfsversion,
3604 SWAP_BE32 (volumeHeader->blockSize),
3605 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3606 hfs_mark_volume_inconsistent(hfsmp);
3607
3608 if (hfsmp->hfs_alt_id_sector) {
3609 retval = buf_meta_bread(hfsmp->hfs_devvp,
3610 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3611 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3612 if (retval) {
3613 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3614 goto err_exit;
3615 }
3616
3617 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3618 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3619 signature = SWAP_BE16(altVH->signature);
3620 hfsversion = SWAP_BE16(altVH->version);
3621
3622 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3623 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3624 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3625 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3626 vcb->vcbVN, signature, hfsversion,
3627 SWAP_BE32(altVH->blockSize));
3628 retval = EIO;
3629 goto err_exit;
3630 }
3631
3632 /* The alternate is plausible, so use it. */
3633 bcopy(altVH, volumeHeader, kMDBSize);
3634 buf_brelse(alt_bp);
3635 alt_bp = NULL;
3636 } else {
3637 /* No alternate VH, nothing more we can do. */
3638 retval = EIO;
3639 goto err_exit;
3640 }
3641 }
3642
3643 if (hfsmp->jnl) {
3644 journal_modify_block_start(hfsmp->jnl, bp);
3645 }
3646
3647 /*
3648 * For embedded HFS+ volumes, update create date if it changed
3649 * (ie from a setattrlist call)
3650 */
3651 if ((vcb->hfsPlusIOPosOffset != 0) &&
3652 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3653 struct buf *bp2;
3654 HFSMasterDirectoryBlock *mdb;
3655
3656 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3657 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3658 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3659 if (retval) {
3660 if (bp2)
3661 buf_brelse(bp2);
3662 retval = 0;
3663 } else {
3664 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3665 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3666
3667 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3668 {
3669 if (hfsmp->jnl) {
3670 journal_modify_block_start(hfsmp->jnl, bp2);
3671 }
3672
3673 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3674
3675 if (hfsmp->jnl) {
3676 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3677 } else {
3678 (void) VNOP_BWRITE(bp2); /* write out the changes */
3679 }
3680 }
3681 else
3682 {
3683 buf_brelse(bp2); /* just release it */
3684 }
3685 }
3686 }
3687
3688 lck_mtx_lock(&hfsmp->hfs_mutex);
3689
3690 /* Note: only update the lower 16 bits worth of attributes */
3691 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3692 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3693 if (hfsmp->jnl) {
3694 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3695 } else {
3696 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3697 }
3698 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3699 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3700 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3701 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3702 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
3703 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
3704 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3705 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3706 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3707 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3708 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3709 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3710 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3711
3712 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3713 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3714 critical = 1;
3715 }
3716
3717 /*
3718 * System files are only dirty when altflush is set.
3719 */
3720 if (altflush == 0) {
3721 goto done;
3722 }
3723
3724 /* Sync Extents over-flow file meta data */
3725 fp = VTOF(vcb->extentsRefNum);
3726 if (FTOC(fp)->c_flag & C_MODIFIED) {
3727 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3728 volumeHeader->extentsFile.extents[i].startBlock =
3729 SWAP_BE32 (fp->ff_extents[i].startBlock);
3730 volumeHeader->extentsFile.extents[i].blockCount =
3731 SWAP_BE32 (fp->ff_extents[i].blockCount);
3732 }
3733 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3734 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3735 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3736 FTOC(fp)->c_flag &= ~C_MODIFIED;
3737 }
3738
3739 /* Sync Catalog file meta data */
3740 fp = VTOF(vcb->catalogRefNum);
3741 if (FTOC(fp)->c_flag & C_MODIFIED) {
3742 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3743 volumeHeader->catalogFile.extents[i].startBlock =
3744 SWAP_BE32 (fp->ff_extents[i].startBlock);
3745 volumeHeader->catalogFile.extents[i].blockCount =
3746 SWAP_BE32 (fp->ff_extents[i].blockCount);
3747 }
3748 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3749 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3750 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3751 FTOC(fp)->c_flag &= ~C_MODIFIED;
3752 }
3753
3754 /* Sync Allocation file meta data */
3755 fp = VTOF(vcb->allocationsRefNum);
3756 if (FTOC(fp)->c_flag & C_MODIFIED) {
3757 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3758 volumeHeader->allocationFile.extents[i].startBlock =
3759 SWAP_BE32 (fp->ff_extents[i].startBlock);
3760 volumeHeader->allocationFile.extents[i].blockCount =
3761 SWAP_BE32 (fp->ff_extents[i].blockCount);
3762 }
3763 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3764 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3765 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3766 FTOC(fp)->c_flag &= ~C_MODIFIED;
3767 }
3768
3769 /* Sync Attribute file meta data */
3770 if (hfsmp->hfs_attribute_vp) {
3771 fp = VTOF(hfsmp->hfs_attribute_vp);
3772 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3773 volumeHeader->attributesFile.extents[i].startBlock =
3774 SWAP_BE32 (fp->ff_extents[i].startBlock);
3775 volumeHeader->attributesFile.extents[i].blockCount =
3776 SWAP_BE32 (fp->ff_extents[i].blockCount);
3777 }
3778 FTOC(fp)->c_flag &= ~C_MODIFIED;
3779 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3780 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3781 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3782 }
3783
3784 /* Sync Startup file meta data */
3785 if (hfsmp->hfs_startup_vp) {
3786 fp = VTOF(hfsmp->hfs_startup_vp);
3787 if (FTOC(fp)->c_flag & C_MODIFIED) {
3788 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3789 volumeHeader->startupFile.extents[i].startBlock =
3790 SWAP_BE32 (fp->ff_extents[i].startBlock);
3791 volumeHeader->startupFile.extents[i].blockCount =
3792 SWAP_BE32 (fp->ff_extents[i].blockCount);
3793 }
3794 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3795 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3796 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3797 FTOC(fp)->c_flag &= ~C_MODIFIED;
3798 }
3799 }
3800
3801 done:
3802 MarkVCBClean(hfsmp);
3803 lck_mtx_unlock(&hfsmp->hfs_mutex);
3804
3805 /* If requested, flush out the alternate volume header */
3806 if (altflush && hfsmp->hfs_alt_id_sector) {
3807 if (buf_meta_bread(hfsmp->hfs_devvp,
3808 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3809 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3810 if (hfsmp->jnl) {
3811 journal_modify_block_start(hfsmp->jnl, alt_bp);
3812 }
3813
3814 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3815 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3816 kMDBSize);
3817
3818 if (hfsmp->jnl) {
3819 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3820 } else {
3821 (void) VNOP_BWRITE(alt_bp);
3822 }
3823 } else if (alt_bp)
3824 buf_brelse(alt_bp);
3825 }
3826
3827 if (hfsmp->jnl) {
3828 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3829 } else {
3830 if (waitfor != MNT_WAIT)
3831 buf_bawrite(bp);
3832 else {
3833 retval = VNOP_BWRITE(bp);
3834 /* When critical data changes, flush the device cache */
3835 if (critical && (retval == 0)) {
3836 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3837 NULL, FWRITE, NULL);
3838 }
3839 }
3840 }
3841 hfs_end_transaction(hfsmp);
3842
3843 return (retval);
3844
3845 err_exit:
3846 if (alt_bp)
3847 buf_brelse(alt_bp);
3848 if (bp)
3849 buf_brelse(bp);
3850 hfs_end_transaction(hfsmp);
3851 return retval;
3852 }
3853
3854
3855 /*
3856 * Extend a file system.
3857 */
3858 int
3859 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3860 {
3861 struct proc *p = vfs_context_proc(context);
3862 kauth_cred_t cred = vfs_context_ucred(context);
3863 struct vnode *vp;
3864 struct vnode *devvp;
3865 struct buf *bp;
3866 struct filefork *fp = NULL;
3867 ExtendedVCB *vcb;
3868 struct cat_fork forkdata;
3869 u_int64_t oldsize;
3870 u_int64_t newblkcnt;
3871 u_int64_t prev_phys_block_count;
3872 u_int32_t addblks;
3873 u_int64_t sectorcnt;
3874 u_int32_t sectorsize;
3875 u_int32_t phys_sectorsize;
3876 daddr64_t prev_alt_sector;
3877 daddr_t bitmapblks;
3878 int lockflags = 0;
3879 int error;
3880 int64_t oldBitmapSize;
3881 Boolean usedExtendFileC = false;
3882 int transaction_begun = 0;
3883
3884 devvp = hfsmp->hfs_devvp;
3885 vcb = HFSTOVCB(hfsmp);
3886
3887 /*
3888 * - HFS Plus file systems only.
3889 * - Journaling must be enabled.
3890 * - No embedded volumes.
3891 */
3892 if ((vcb->vcbSigWord == kHFSSigWord) ||
3893 (hfsmp->jnl == NULL) ||
3894 (vcb->hfsPlusIOPosOffset != 0)) {
3895 return (EPERM);
3896 }
3897 /*
3898 * If extending file system by non-root, then verify
3899 * ownership and check permissions.
3900 */
3901 if (suser(cred, NULL)) {
3902 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3903
3904 if (error)
3905 return (error);
3906 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3907 if (error == 0) {
3908 error = hfs_write_access(vp, cred, p, false);
3909 }
3910 hfs_unlock(VTOC(vp));
3911 vnode_put(vp);
3912 if (error)
3913 return (error);
3914
3915 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3916 if (error)
3917 return (error);
3918 }
3919 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3920 return (ENXIO);
3921 }
3922 if (sectorsize != hfsmp->hfs_logical_block_size) {
3923 return (ENXIO);
3924 }
3925 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3926 return (ENXIO);
3927 }
3928 if ((sectorsize * sectorcnt) < newsize) {
3929 printf("hfs_extendfs: not enough space on device\n");
3930 return (ENOSPC);
3931 }
3932 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3933 if (error) {
3934 if ((error != ENOTSUP) && (error != ENOTTY)) {
3935 return (ENXIO);
3936 }
3937 /* If ioctl is not supported, force physical and logical sector size to be same */
3938 phys_sectorsize = sectorsize;
3939 }
3940 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3941
3942 /*
3943 * Validate new size.
3944 */
3945 if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3946 printf("hfs_extendfs: invalid size\n");
3947 return (EINVAL);
3948 }
3949 newblkcnt = newsize / vcb->blockSize;
3950 if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3951 return (EOVERFLOW);
3952
3953 addblks = newblkcnt - vcb->totalBlocks;
3954
3955 if (hfs_resize_debug) {
3956 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3957 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3958 }
3959 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3960
3961 HFS_MOUNT_LOCK(hfsmp, TRUE);
3962 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3963 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3964 error = EALREADY;
3965 goto out;
3966 }
3967 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3968 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3969
3970 /* Start with a clean journal. */
3971 hfs_journal_flush(hfsmp, TRUE);
3972
3973 /*
3974 * Enclose changes inside a transaction.
3975 */
3976 if (hfs_start_transaction(hfsmp) != 0) {
3977 error = EINVAL;
3978 goto out;
3979 }
3980 transaction_begun = 1;
3981
3982 /*
3983 * Note: we take the attributes lock in case we have an attribute data vnode
3984 * which needs to change size.
3985 */
3986 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3987 vp = vcb->allocationsRefNum;
3988 fp = VTOF(vp);
3989 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3990
3991 /*
3992 * Calculate additional space required (if any) by allocation bitmap.
3993 */
3994 oldBitmapSize = fp->ff_size;
3995 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3996 if (bitmapblks > (daddr_t)fp->ff_blocks)
3997 bitmapblks -= fp->ff_blocks;
3998 else
3999 bitmapblks = 0;
4000
4001 /*
4002 * The allocation bitmap can contain unused bits that are beyond end of
4003 * current volume's allocation blocks. Usually they are supposed to be
4004 * zero'ed out but there can be cases where they might be marked as used.
4005 * After extending the file system, those bits can represent valid
4006 * allocation blocks, so we mark all the bits from the end of current
4007 * volume to end of allocation bitmap as "free".
4008 */
4009 BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4010 (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4011
4012 if (bitmapblks > 0) {
4013 daddr64_t blkno;
4014 daddr_t blkcnt;
4015 off_t bytesAdded;
4016
4017 /*
4018 * Get the bitmap's current size (in allocation blocks) so we know
4019 * where to start zero filling once the new space is added. We've
4020 * got to do this before the bitmap is grown.
4021 */
4022 blkno = (daddr64_t)fp->ff_blocks;
4023
4024 /*
4025 * Try to grow the allocation file in the normal way, using allocation
4026 * blocks already existing in the file system. This way, we might be
4027 * able to grow the bitmap contiguously, or at least in the metadata
4028 * zone.
4029 */
4030 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4031 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4032 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4033
4034 if (error == 0) {
4035 usedExtendFileC = true;
4036 } else {
4037 /*
4038 * If the above allocation failed, fall back to allocating the new
4039 * extent of the bitmap from the space we're going to add. Since those
4040 * blocks don't yet belong to the file system, we have to update the
4041 * extent list directly, and manually adjust the file size.
4042 */
4043 bytesAdded = 0;
4044 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4045 if (error) {
4046 printf("hfs_extendfs: error %d adding extents\n", error);
4047 goto out;
4048 }
4049 fp->ff_blocks += bitmapblks;
4050 VTOC(vp)->c_blocks = fp->ff_blocks;
4051 VTOC(vp)->c_flag |= C_MODIFIED;
4052 }
4053
4054 /*
4055 * Update the allocation file's size to include the newly allocated
4056 * blocks. Note that ExtendFileC doesn't do this, which is why this
4057 * statement is outside the above "if" statement.
4058 */
4059 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4060
4061 /*
4062 * Zero out the new bitmap blocks.
4063 */
4064 {
4065
4066 bp = NULL;
4067 blkcnt = bitmapblks;
4068 while (blkcnt > 0) {
4069 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4070 if (error) {
4071 if (bp) {
4072 buf_brelse(bp);
4073 }
4074 break;
4075 }
4076 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4077 buf_markaged(bp);
4078 error = (int)buf_bwrite(bp);
4079 if (error)
4080 break;
4081 --blkcnt;
4082 ++blkno;
4083 }
4084 }
4085 if (error) {
4086 printf("hfs_extendfs: error %d clearing blocks\n", error);
4087 goto out;
4088 }
4089 /*
4090 * Mark the new bitmap space as allocated.
4091 *
4092 * Note that ExtendFileC will have marked any blocks it allocated, so
4093 * this is only needed if we used AddFileExtent. Also note that this
4094 * has to come *after* the zero filling of new blocks in the case where
4095 * we used AddFileExtent (since the part of the bitmap we're touching
4096 * is in those newly allocated blocks).
4097 */
4098 if (!usedExtendFileC) {
4099 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4100 if (error) {
4101 printf("hfs_extendfs: error %d setting bitmap\n", error);
4102 goto out;
4103 }
4104 vcb->freeBlocks -= bitmapblks;
4105 }
4106 }
4107 /*
4108 * Mark the new alternate VH as allocated.
4109 */
4110 if (vcb->blockSize == 512)
4111 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4112 else
4113 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4114 if (error) {
4115 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4116 goto out;
4117 }
4118 /*
4119 * Mark the old alternate VH as free.
4120 */
4121 if (vcb->blockSize == 512)
4122 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4123 else
4124 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4125 /*
4126 * Adjust file system variables for new space.
4127 */
4128 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4129 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4130
4131 vcb->totalBlocks += addblks;
4132 vcb->freeBlocks += addblks;
4133 hfsmp->hfs_logical_block_count = newsize / sectorsize;
4134 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4135 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4136 MarkVCBDirty(vcb);
4137 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4138 if (error) {
4139 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4140 /*
4141 * Restore to old state.
4142 */
4143 if (usedExtendFileC) {
4144 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4145 FTOC(fp)->c_fileid, false);
4146 } else {
4147 fp->ff_blocks -= bitmapblks;
4148 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4149 /*
4150 * No need to mark the excess blocks free since those bitmap blocks
4151 * are no longer part of the bitmap. But we do need to undo the
4152 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4153 */
4154 vcb->freeBlocks += bitmapblks;
4155 }
4156 vcb->totalBlocks -= addblks;
4157 vcb->freeBlocks -= addblks;
4158 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4159 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4160 MarkVCBDirty(vcb);
4161 if (vcb->blockSize == 512) {
4162 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4163 hfs_mark_volume_inconsistent(hfsmp);
4164 }
4165 } else {
4166 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4167 hfs_mark_volume_inconsistent(hfsmp);
4168 }
4169 }
4170 goto out;
4171 }
4172 /*
4173 * Invalidate the old alternate volume header.
4174 */
4175 bp = NULL;
4176 if (prev_alt_sector) {
4177 if (buf_meta_bread(hfsmp->hfs_devvp,
4178 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4179 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4180 journal_modify_block_start(hfsmp->jnl, bp);
4181
4182 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4183
4184 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4185 } else if (bp) {
4186 buf_brelse(bp);
4187 }
4188 }
4189
4190 /*
4191 * Update the metadata zone size based on current volume size
4192 */
4193 hfs_metadatazone_init(hfsmp, false);
4194
4195 /*
4196 * Adjust the size of hfsmp->hfs_attrdata_vp
4197 */
4198 if (hfsmp->hfs_attrdata_vp) {
4199 struct cnode *attr_cp;
4200 struct filefork *attr_fp;
4201
4202 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4203 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4204 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4205
4206 attr_cp->c_blocks = newblkcnt;
4207 attr_fp->ff_blocks = newblkcnt;
4208 attr_fp->ff_extents[0].blockCount = newblkcnt;
4209 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4210 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4211 vnode_put(hfsmp->hfs_attrdata_vp);
4212 }
4213 }
4214
4215 /*
4216 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4217 * locks in the middle of these operations like we do in the truncate case
4218 * where we have to relocate files, we can only update the red-black tree
4219 * if there were actual changes made to the bitmap. Also, we can't really scan the
4220 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4221 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4222 * not currently controlled by the tree.
4223 *
4224 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4225 */
4226
4227 if (error == 0) {
4228 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4229 }
4230
4231 /* Log successful extending */
4232 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4233 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4234
4235 out:
4236 if (error && fp) {
4237 /* Restore allocation fork. */
4238 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4239 VTOC(vp)->c_blocks = fp->ff_blocks;
4240
4241 }
4242
4243 HFS_MOUNT_LOCK(hfsmp, TRUE);
4244 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4245 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4246 if (lockflags) {
4247 hfs_systemfile_unlock(hfsmp, lockflags);
4248 }
4249 if (transaction_begun) {
4250 hfs_end_transaction(hfsmp);
4251 hfs_journal_flush(hfsmp, FALSE);
4252 /* Just to be sure, sync all data to the disk */
4253 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4254 }
4255
4256 return MacToVFSError(error);
4257 }
4258
4259 #define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4260
4261 /*
4262 * Truncate a file system (while still mounted).
4263 */
4264 int
4265 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4266 {
4267 struct buf *bp = NULL;
4268 u_int64_t oldsize;
4269 u_int32_t newblkcnt;
4270 u_int32_t reclaimblks = 0;
4271 int lockflags = 0;
4272 int transaction_begun = 0;
4273 Boolean updateFreeBlocks = false;
4274 Boolean disable_sparse = false;
4275 int error = 0;
4276
4277 lck_mtx_lock(&hfsmp->hfs_mutex);
4278 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4279 lck_mtx_unlock(&hfsmp->hfs_mutex);
4280 return (EALREADY);
4281 }
4282 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4283 hfsmp->hfs_resize_blocksmoved = 0;
4284 hfsmp->hfs_resize_totalblocks = 0;
4285 hfsmp->hfs_resize_progress = 0;
4286 lck_mtx_unlock(&hfsmp->hfs_mutex);
4287
4288 /*
4289 * - Journaled HFS Plus volumes only.
4290 * - No embedded volumes.
4291 */
4292 if ((hfsmp->jnl == NULL) ||
4293 (hfsmp->hfsPlusIOPosOffset != 0)) {
4294 error = EPERM;
4295 goto out;
4296 }
4297 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4298 newblkcnt = newsize / hfsmp->blockSize;
4299 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4300
4301 if (hfs_resize_debug) {
4302 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4303 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4304 }
4305
4306 /* Make sure new size is valid. */
4307 if ((newsize < HFS_MIN_SIZE) ||
4308 (newsize >= oldsize) ||
4309 (newsize % hfsmp->hfs_logical_block_size) ||
4310 (newsize % hfsmp->hfs_physical_block_size)) {
4311 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4312 error = EINVAL;
4313 goto out;
4314 }
4315
4316 /*
4317 * Make sure that the file system has enough free blocks reclaim.
4318 *
4319 * Before resize, the disk is divided into four zones -
4320 * A. Allocated_Stationary - These are allocated blocks that exist
4321 * before the new end of disk. These blocks will not be
4322 * relocated or modified during resize.
4323 * B. Free_Stationary - These are free blocks that exist before the
4324 * new end of disk. These blocks can be used for any new
4325 * allocations during resize, including allocation for relocating
4326 * data from the area of disk being reclaimed.
4327 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4328 * beyond the new end of disk. These blocks need to be reclaimed
4329 * during resize by allocating equal number of blocks in Free
4330 * Stationary zone and copying the data.
4331 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4332 * new end of disk. Nothing special needs to be done to reclaim
4333 * them.
4334 *
4335 * Total number of blocks on the disk before resize:
4336 * ------------------------------------------------
4337 * Total Blocks = Allocated_Stationary + Free_Stationary +
4338 * Allocated_To-Reclaim + Free_To-Reclaim
4339 *
4340 * Total number of blocks that need to be reclaimed:
4341 * ------------------------------------------------
4342 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4343 *
4344 * Note that the check below also makes sure that we have enough space
4345 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4346 * Therefore we do not need to check total number of blocks to relocate
4347 * later in the code.
4348 *
4349 * The condition below gets converted to:
4350 *
4351 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4352 *
4353 * which is equivalent to:
4354 *
4355 * Allocated To-Reclaim >= Free Stationary
4356 */
4357 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4358 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4359 error = ENOSPC;
4360 goto out;
4361 }
4362
4363 /* Start with a clean journal. */
4364 hfs_journal_flush(hfsmp, TRUE);
4365
4366 if (hfs_start_transaction(hfsmp) != 0) {
4367 error = EINVAL;
4368 goto out;
4369 }
4370 transaction_begun = 1;
4371
4372 /* Take the bitmap lock to update the alloc limit field */
4373 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4374
4375 /*
4376 * Prevent new allocations from using the part we're trying to truncate.
4377 *
4378 * NOTE: allocLimit is set to the allocation block number where the new
4379 * alternate volume header will be. That way there will be no files to
4380 * interfere with allocating the new alternate volume header, and no files
4381 * in the allocation blocks beyond (i.e. the blocks we're trying to
4382 * truncate away.
4383 *
4384 * Also shrink the red-black tree if needed.
4385 */
4386 if (hfsmp->blockSize == 512) {
4387 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4388 }
4389 else {
4390 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4391 }
4392
4393 /* Sparse devices use first fit allocation which is not ideal
4394 * for volume resize which requires best fit allocation. If a
4395 * sparse device is being truncated, disable the sparse device
4396 * property temporarily for the duration of resize. Also reset
4397 * the free extent cache so that it is rebuilt as sorted by
4398 * totalBlocks instead of startBlock.
4399 *
4400 * Note that this will affect all allocations on the volume and
4401 * ideal fix would be just to modify resize-related allocations,
4402 * but it will result in complexity like handling of two free
4403 * extent caches sorted differently, etc. So we stick to this
4404 * solution for now.
4405 */
4406 HFS_MOUNT_LOCK(hfsmp, TRUE);
4407 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4408 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4409 ResetVCBFreeExtCache(hfsmp);
4410 disable_sparse = true;
4411 }
4412
4413 /*
4414 * Update the volume free block count to reflect the total number
4415 * of free blocks that will exist after a successful resize.
4416 * Relocation of extents will result in no net change in the total
4417 * free space on the disk. Therefore the code that allocates
4418 * space for new extent and deallocates the old extent explicitly
4419 * prevents updating the volume free block count. It will also
4420 * prevent false disk full error when the number of blocks in
4421 * an extent being relocated is more than the free blocks that
4422 * will exist after the volume is resized.
4423 */
4424 hfsmp->freeBlocks -= reclaimblks;
4425 updateFreeBlocks = true;
4426 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4427
4428 if (lockflags) {
4429 hfs_systemfile_unlock(hfsmp, lockflags);
4430 lockflags = 0;
4431 }
4432
4433 /*
4434 * Update the metadata zone size to match the new volume size,
4435 * and if it too less, metadata zone might be disabled.
4436 */
4437 hfs_metadatazone_init(hfsmp, false);
4438
4439 /*
4440 * If some files have blocks at or beyond the location of the
4441 * new alternate volume header, recalculate free blocks and
4442 * reclaim blocks. Otherwise just update free blocks count.
4443 *
4444 * The current allocLimit is set to the location of new alternate
4445 * volume header, and reclaimblks are the total number of blocks
4446 * that need to be reclaimed. So the check below is really
4447 * ignoring the blocks allocated for old alternate volume header.
4448 */
4449 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4450 /*
4451 * hfs_reclaimspace will use separate transactions when
4452 * relocating files (so we don't overwhelm the journal).
4453 */
4454 hfs_end_transaction(hfsmp);
4455 transaction_begun = 0;
4456
4457 /* Attempt to reclaim some space. */
4458 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4459 if (error != 0) {
4460 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4461 error = ENOSPC;
4462 goto out;
4463 }
4464 if (hfs_start_transaction(hfsmp) != 0) {
4465 error = EINVAL;
4466 goto out;
4467 }
4468 transaction_begun = 1;
4469
4470 /* Check if we're clear now. */
4471 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4472 if (error != 0) {
4473 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4474 error = EAGAIN; /* tell client to try again */
4475 goto out;
4476 }
4477 }
4478
4479 /*
4480 * Note: we take the attributes lock in case we have an attribute data vnode
4481 * which needs to change size.
4482 */
4483 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4484
4485 /*
4486 * Allocate last 1KB for alternate volume header.
4487 */
4488 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4489 if (error) {
4490 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4491 goto out;
4492 }
4493
4494 /*
4495 * Mark the old alternate volume header as free.
4496 * We don't bother shrinking allocation bitmap file.
4497 */
4498 if (hfsmp->blockSize == 512)
4499 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4500 else
4501 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4502
4503 /*
4504 * Invalidate the existing alternate volume header.
4505 *
4506 * Don't include this in a transaction (don't call journal_modify_block)
4507 * since this block will be outside of the truncated file system!
4508 */
4509 if (hfsmp->hfs_alt_id_sector) {
4510 error = buf_meta_bread(hfsmp->hfs_devvp,
4511 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4512 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4513 if (error == 0) {
4514 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4515 (void) VNOP_BWRITE(bp);
4516 } else {
4517 if (bp) {
4518 buf_brelse(bp);
4519 }
4520 }
4521 bp = NULL;
4522 }
4523
4524 /* Log successful shrinking. */
4525 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4526 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4527
4528 /*
4529 * Adjust file system variables and flush them to disk.
4530 */
4531 hfsmp->totalBlocks = newblkcnt;
4532 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4533 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4534 MarkVCBDirty(hfsmp);
4535 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4536 if (error)
4537 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4538
4539 /*
4540 * Adjust the size of hfsmp->hfs_attrdata_vp
4541 */
4542 if (hfsmp->hfs_attrdata_vp) {
4543 struct cnode *cp;
4544 struct filefork *fp;
4545
4546 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4547 cp = VTOC(hfsmp->hfs_attrdata_vp);
4548 fp = VTOF(hfsmp->hfs_attrdata_vp);
4549
4550 cp->c_blocks = newblkcnt;
4551 fp->ff_blocks = newblkcnt;
4552 fp->ff_extents[0].blockCount = newblkcnt;
4553 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4554 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4555 vnode_put(hfsmp->hfs_attrdata_vp);
4556 }
4557 }
4558
4559 out:
4560 /*
4561 * Update the allocLimit to acknowledge the last one or two blocks now.
4562 * Add it to the tree as well if necessary.
4563 */
4564 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4565
4566 HFS_MOUNT_LOCK(hfsmp, TRUE);
4567 if (disable_sparse == true) {
4568 /* Now that resize is completed, set the volume to be sparse
4569 * device again so that all further allocations will be first
4570 * fit instead of best fit. Reset free extent cache so that
4571 * it is rebuilt.
4572 */
4573 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4574 ResetVCBFreeExtCache(hfsmp);
4575 }
4576
4577 if (error && (updateFreeBlocks == true)) {
4578 hfsmp->freeBlocks += reclaimblks;
4579 }
4580
4581 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4582 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4583 }
4584 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4585 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4586
4587 /* On error, reset the metadata zone for original volume size */
4588 if (error && (updateFreeBlocks == true)) {
4589 hfs_metadatazone_init(hfsmp, false);
4590 }
4591
4592 if (lockflags) {
4593 hfs_systemfile_unlock(hfsmp, lockflags);
4594 }
4595 if (transaction_begun) {
4596 hfs_end_transaction(hfsmp);
4597 hfs_journal_flush(hfsmp, FALSE);
4598 /* Just to be sure, sync all data to the disk */
4599 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4600 }
4601
4602 return MacToVFSError(error);
4603 }
4604
4605
4606 /*
4607 * Invalidate the physical block numbers associated with buffer cache blocks
4608 * in the given extent of the given vnode.
4609 */
4610 struct hfs_inval_blk_no {
4611 daddr64_t sectorStart;
4612 daddr64_t sectorCount;
4613 };
4614 static int
4615 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4616 {
4617 daddr64_t blkno;
4618 struct hfs_inval_blk_no *args;
4619
4620 blkno = buf_blkno(bp);
4621 args = args_in;
4622
4623 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4624 buf_setblkno(bp, buf_lblkno(bp));
4625
4626 return BUF_RETURNED;
4627 }
4628 static void
4629 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4630 {
4631 struct hfs_inval_blk_no args;
4632 args.sectorStart = sectorStart;
4633 args.sectorCount = sectorCount;
4634
4635 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4636 }
4637
4638
4639 /*
4640 * Copy the contents of an extent to a new location. Also invalidates the
4641 * physical block number of any buffer cache block in the copied extent
4642 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4643 * determine the new physical block number).
4644 */
4645 static int
4646 hfs_copy_extent(
4647 struct hfsmount *hfsmp,
4648 struct vnode *vp, /* The file whose extent is being copied. */
4649 u_int32_t oldStart, /* The start of the source extent. */
4650 u_int32_t newStart, /* The start of the destination extent. */
4651 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4652 vfs_context_t context)
4653 {
4654 int err = 0;
4655 size_t bufferSize;
4656 void *buffer = NULL;
4657 struct vfsioattr ioattr;
4658 buf_t bp = NULL;
4659 off_t resid;
4660 size_t ioSize;
4661 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4662 daddr64_t srcSector, destSector;
4663 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4664 #if CONFIG_PROTECT
4665 int cpenabled = 0;
4666 #endif
4667
4668 /*
4669 * Sanity check that we have locked the vnode of the file we're copying.
4670 *
4671 * But since hfs_systemfile_lock() doesn't actually take the lock on
4672 * the allocation file if a journal is active, ignore the check if the
4673 * file being copied is the allocation file.
4674 */
4675 struct cnode *cp = VTOC(vp);
4676 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4677 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4678
4679 #if CONFIG_PROTECT
4680 /* Prepare the CP blob and get it ready for use */
4681 if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4682 cp_fs_protected (hfsmp->hfs_mp)) {
4683 int cp_err = 0;
4684 cp_err = cp_handle_relocate (cp);
4685 if (cp_err) {
4686 /*
4687 * can't copy the file because we couldn't set up keys.
4688 * bail out
4689 */
4690 return cp_err;
4691 }
4692 else {
4693 cpenabled = 1;
4694 }
4695 }
4696 #endif
4697
4698 /*
4699 * Determine the I/O size to use
4700 *
4701 * NOTE: Many external drives will result in an ioSize of 128KB.
4702 * TODO: Should we use a larger buffer, doing several consecutive
4703 * reads, then several consecutive writes?
4704 */
4705 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4706 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4707 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4708 return ENOMEM;
4709
4710 /* Get a buffer for doing the I/O */
4711 bp = buf_alloc(hfsmp->hfs_devvp);
4712 buf_setdataptr(bp, (uintptr_t)buffer);
4713
4714 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4715 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4716 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4717 while (resid > 0) {
4718 ioSize = MIN(bufferSize, (size_t) resid);
4719 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4720
4721 /* Prepare the buffer for reading */
4722 buf_reset(bp, B_READ);
4723 buf_setsize(bp, ioSize);
4724 buf_setcount(bp, ioSize);
4725 buf_setblkno(bp, srcSector);
4726 buf_setlblkno(bp, srcSector);
4727
4728 /* Attach the CP to the buffer */
4729 #if CONFIG_PROTECT
4730 if (cpenabled) {
4731 buf_setcpaddr (bp, cp->c_cpentry);
4732 }
4733 #endif
4734
4735 /* Do the read */
4736 err = VNOP_STRATEGY(bp);
4737 if (!err)
4738 err = buf_biowait(bp);
4739 if (err) {
4740 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4741 break;
4742 }
4743
4744 /* Prepare the buffer for writing */
4745 buf_reset(bp, B_WRITE);
4746 buf_setsize(bp, ioSize);
4747 buf_setcount(bp, ioSize);
4748 buf_setblkno(bp, destSector);
4749 buf_setlblkno(bp, destSector);
4750 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4751 buf_markfua(bp);
4752
4753 #if CONFIG_PROTECT
4754 /* Attach the CP to the buffer */
4755 if (cpenabled) {
4756 buf_setcpaddr (bp, cp->c_cpentry);
4757 }
4758 #endif
4759
4760 /* Do the write */
4761 vnode_startwrite(hfsmp->hfs_devvp);
4762 err = VNOP_STRATEGY(bp);
4763 if (!err)
4764 err = buf_biowait(bp);
4765 if (err) {
4766 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4767 break;
4768 }
4769
4770 resid -= ioSize;
4771 srcSector += ioSizeSectors;
4772 destSector += ioSizeSectors;
4773 }
4774 if (bp)
4775 buf_free(bp);
4776 if (buffer)
4777 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4778
4779 /* Make sure all writes have been flushed to disk. */
4780 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4781 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4782 if (err) {
4783 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4784 err = 0; /* Don't fail the copy. */
4785 }
4786 }
4787
4788 if (!err)
4789 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4790
4791 return err;
4792 }
4793
4794
4795 /* Structure to store state of reclaiming extents from a
4796 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
4797 * initializes the values in this structure which are then
4798 * used by code that reclaims and splits the extents.
4799 */
4800 struct hfs_reclaim_extent_info {
4801 struct vnode *vp;
4802 u_int32_t fileID;
4803 u_int8_t forkType;
4804 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
4805 u_int8_t is_sysfile; /* Extent belongs to system file */
4806 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
4807 u_int8_t extent_index;
4808 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
4809 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
4810 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
4811 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
4812 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
4813 union record {
4814 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4815 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
4816 } record;
4817 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
4818 * For catalog extent record, points to the correct
4819 * extent information in filefork. For overflow extent
4820 * record, or xattr record, points to extent record
4821 * in the structure above
4822 */
4823 struct cat_desc *dirlink_desc;
4824 struct cat_attr *dirlink_attr;
4825 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
4826 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
4827 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4828 * use it for writing updated extent record
4829 */
4830 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
4831 u_int16_t recordlen;
4832 int overflow_count; /* For debugging, counter for overflow extent record */
4833 FCB *fcb; /* Pointer to the current btree being traversed */
4834 };
4835
4836 /*
4837 * Split the current extent into two extents, with first extent
4838 * to contain given number of allocation blocks. Splitting of
4839 * extent creates one new extent entry which can result in
4840 * shifting of many entries through all the extent records of a
4841 * file, and/or creating a new extent record in the overflow
4842 * extent btree.
4843 *
4844 * Example:
4845 * The diagram below represents two consecutive extent records,
4846 * for simplicity, lets call them record X and X+1 respectively.
4847 * Interesting extent entries have been denoted by letters.
4848 * If the letter is unchanged before and after split, it means
4849 * that the extent entry was not modified during the split.
4850 * A '.' means that the entry remains unchanged after the split
4851 * and is not relevant for our example. A '0' means that the
4852 * extent entry is empty.
4853 *
4854 * If there isn't sufficient contiguous free space to relocate
4855 * an extent (extent "C" below), we will have to break the one
4856 * extent into multiple smaller extents, and relocate each of
4857 * the smaller extents individually. The way we do this is by
4858 * finding the largest contiguous free space that is currently
4859 * available (N allocation blocks), and then convert extent "C"
4860 * into two extents, C1 and C2, that occupy exactly the same
4861 * allocation blocks as extent C. Extent C1 is the first
4862 * N allocation blocks of extent C, and extent C2 is the remainder
4863 * of extent C. Then we can relocate extent C1 since we know
4864 * we have enough contiguous free space to relocate it in its
4865 * entirety. We then repeat the process starting with extent C2.
4866 *
4867 * In record X, only the entries following entry C are shifted, and
4868 * the original entry C is replaced with two entries C1 and C2 which
4869 * are actually two extent entries for contiguous allocation blocks.
4870 *
4871 * Note that the entry E from record X is shifted into record X+1 as
4872 * the new first entry. Since the first entry of record X+1 is updated,
4873 * the FABN will also get updated with the blockCount of entry E.
4874 * This also results in shifting of all extent entries in record X+1.
4875 * Note that the number of empty entries after the split has been
4876 * changed from 3 to 2.
4877 *
4878 * Before:
4879 * record X record X+1
4880 * ---------------------===--------- ---------------------------------
4881 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
4882 * ---------------------===--------- ---------------------------------
4883 *
4884 * After:
4885 * ---------------------=======----- ---------------------------------
4886 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
4887 * ---------------------=======----- ---------------------------------
4888 *
4889 * C1.startBlock = C.startBlock
4890 * C1.blockCount = N
4891 *
4892 * C2.startBlock = C.startBlock + N
4893 * C2.blockCount = C.blockCount - N
4894 *
4895 * FABN = old FABN - E.blockCount
4896 *
4897 * Inputs:
4898 * extent_info - This is the structure that contains state about
4899 * the current file, extent, and extent record that
4900 * is being relocated. This structure is shared
4901 * among code that traverses through all the extents
4902 * of the file, code that relocates extents, and
4903 * code that splits the extent.
4904 * Output:
4905 * Zero on success, non-zero on failure.
4906 */
4907 static int
4908 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4909 {
4910 int error = 0;
4911 int index = extent_info->extent_index;
4912 int i;
4913 HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
4914 HFSPlusExtentDescriptor last_extent;
4915 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4916 HFSPlusExtentRecord *extents_rec = NULL;
4917 HFSPlusExtentKey *extents_key = NULL;
4918 HFSPlusAttrRecord *xattr_rec = NULL;
4919 HFSPlusAttrKey *xattr_key = NULL;
4920 struct BTreeIterator iterator;
4921 struct FSBufferDescriptor btdata;
4922 uint16_t reclen;
4923 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
4924 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
4925 Boolean create_record = false;
4926 Boolean is_xattr;
4927 struct cnode *cp;
4928
4929 is_xattr = extent_info->is_xattr;
4930 extents = extent_info->extents;
4931 cp = VTOC(extent_info->vp);
4932
4933 if (hfs_resize_debug) {
4934 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4935 }
4936
4937 /* Extents overflow btree can not have more than 8 extents.
4938 * No split allowed if the 8th extent is already used.
4939 */
4940 if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
4941 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
4942 error = ENOSPC;
4943 goto out;
4944 }
4945
4946 /* Determine the starting allocation block number for the following
4947 * overflow extent record, if any, before the current record
4948 * gets modified.
4949 */
4950 read_recStartBlock = extent_info->recStartBlock;
4951 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4952 if (extents[i].blockCount == 0) {
4953 break;
4954 }
4955 read_recStartBlock += extents[i].blockCount;
4956 }
4957
4958 /* Shift and split */
4959 if (index == kHFSPlusExtentDensity-1) {
4960 /* The new extent created after split will go into following overflow extent record */
4961 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4962 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4963
4964 /* Last extent in the record will be split, so nothing to shift */
4965 } else {
4966 /* Splitting of extents can result in at most of one
4967 * extent entry to be shifted into following overflow extent
4968 * record. So, store the last extent entry for later.
4969 */
4970 shift_extent = extents[kHFSPlusExtentDensity-1];
4971 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
4972 printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
4973 }
4974
4975 /* Start shifting extent information from the end of the extent
4976 * record to the index where we want to insert the new extent.
4977 * Note that kHFSPlusExtentDensity-1 is already saved above, and
4978 * does not need to be shifted. The extent entry that is being
4979 * split does not get shifted.
4980 */
4981 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4982 if (hfs_resize_debug) {
4983 if (extents[i].blockCount) {
4984 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4985 }
4986 }
4987 extents[i+1] = extents[i];
4988 }
4989 }
4990
4991 if (index == kHFSPlusExtentDensity-1) {
4992 /* The second half of the extent being split will be the overflow
4993 * entry that will go into following overflow extent record. The
4994 * value has been stored in 'shift_extent' above, so there is
4995 * nothing to be done here.
4996 */
4997 } else {
4998 /* Update the values in the second half of the extent being split
4999 * before updating the first half of the split. Note that the
5000 * extent to split or first half of the split is at index 'index'
5001 * and a new extent or second half of the split will be inserted at
5002 * 'index+1' or into following overflow extent record.
5003 */
5004 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
5005 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
5006 }
5007 /* Update the extent being split, only the block count will change */
5008 extents[index].blockCount = newBlockCount;
5009
5010 if (hfs_resize_debug) {
5011 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
5012 if (index != kHFSPlusExtentDensity-1) {
5013 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
5014 } else {
5015 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
5016 }
5017 }
5018
5019 /* Write out information about the newly split extent to the disk */
5020 if (extent_info->catalog_fp) {
5021 /* (extent_info->catalog_fp != NULL) means the newly split
5022 * extent exists in the catalog record. This means that
5023 * the cnode was updated. Therefore, to write out the changes,
5024 * mark the cnode as modified. We cannot call hfs_update()
5025 * in this function because the caller hfs_reclaim_extent()
5026 * is holding the catalog lock currently.
5027 */
5028 cp->c_flag |= C_MODIFIED;
5029 } else {
5030 /* The newly split extent is for large EAs or is in overflow
5031 * extent record, so update it directly in the btree using the
5032 * iterator information from the shared extent_info structure
5033 */
5034 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5035 &(extent_info->btdata), extent_info->recordlen);
5036 if (error) {
5037 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5038 goto out;
5039 }
5040 }
5041
5042 /* No extent entry to be shifted into another extent overflow record */
5043 if (shift_extent.blockCount == 0) {
5044 if (hfs_resize_debug) {
5045 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5046 }
5047 error = 0;
5048 goto out;
5049 }
5050
5051 /* The overflow extent entry has to be shifted into an extent
5052 * overflow record. This means that we might have to shift
5053 * extent entries from all subsequent overflow records by one.
5054 * We start iteration from the first record to the last record,
5055 * and shift the extent entry from one record to another.
5056 * We might have to create a new extent record for the last
5057 * extent entry for the file.
5058 */
5059
5060 /* Initialize iterator to search the next record */
5061 bzero(&iterator, sizeof(iterator));
5062 if (is_xattr) {
5063 /* Copy the key from the iterator that was used to update the modified attribute record. */
5064 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5065 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5066 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5067
5068 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5069 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5070 if (xattr_rec == NULL) {
5071 error = ENOMEM;
5072 goto out;
5073 }
5074 btdata.bufferAddress = xattr_rec;
5075 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5076 btdata.itemCount = 1;
5077 extents = xattr_rec->overflowExtents.extents;
5078 } else {
5079 /* Initialize the extent key for the current file */
5080 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5081 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5082 extents_key->forkType = extent_info->forkType;
5083 extents_key->fileID = extent_info->fileID;
5084 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5085
5086 MALLOC(extents_rec, HFSPlusExtentRecord *,
5087 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5088 if (extents_rec == NULL) {
5089 error = ENOMEM;
5090 goto out;
5091 }
5092 btdata.bufferAddress = extents_rec;
5093 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5094 btdata.itemCount = 1;
5095 extents = extents_rec[0];
5096 }
5097
5098 /* The overflow extent entry has to be shifted into an extent
5099 * overflow record. This means that we might have to shift
5100 * extent entries from all subsequent overflow records by one.
5101 * We start iteration from the first record to the last record,
5102 * examine one extent record in each iteration and shift one
5103 * extent entry from one record to another. We might have to
5104 * create a new extent record for the last extent entry for the
5105 * file.
5106 *
5107 * If shift_extent.blockCount is non-zero, it means that there is
5108 * an extent entry that needs to be shifted into the next
5109 * overflow extent record. We keep on going till there are no such
5110 * entries left to be shifted. This will also change the starting
5111 * allocation block number of the extent record which is part of
5112 * the key for the extent record in each iteration. Note that
5113 * because the extent record key is changing while we are searching,
5114 * the record can not be updated directly, instead it has to be
5115 * deleted and inserted again.
5116 */
5117 while (shift_extent.blockCount) {
5118 if (hfs_resize_debug) {
5119 printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5120 }
5121
5122 /* Search if there is any existing overflow extent record
5123 * that matches the current file and the logical start block
5124 * number.
5125 *
5126 * For this, the logical start block number in the key is
5127 * the value calculated based on the logical start block
5128 * number of the current extent record and the total number
5129 * of blocks existing in the current extent record.
5130 */
5131 if (is_xattr) {
5132 xattr_key->startBlock = read_recStartBlock;
5133 } else {
5134 extents_key->startBlock = read_recStartBlock;
5135 }
5136 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5137 if (error) {
5138 if (error != btNotFound) {
5139 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5140 goto out;
5141 }
5142 /* No matching record was found, so create a new extent record.
5143 * Note: Since no record was found, we can't rely on the
5144 * btree key in the iterator any longer. This will be initialized
5145 * later before we insert the record.
5146 */
5147 create_record = true;
5148 }
5149
5150 /* The extra extent entry from the previous record is being inserted
5151 * as the first entry in the current extent record. This will change
5152 * the file allocation block number (FABN) of the current extent
5153 * record, which is the startBlock value from the extent record key.
5154 * Since one extra entry is being inserted in the record, the new
5155 * FABN for the record will less than old FABN by the number of blocks
5156 * in the new extent entry being inserted at the start. We have to
5157 * do this before we update read_recStartBlock to point at the
5158 * startBlock of the following record.
5159 */
5160 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5161 if (hfs_resize_debug) {
5162 if (create_record) {
5163 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5164 }
5165 }
5166
5167 /* Now update the read_recStartBlock to account for total number
5168 * of blocks in this extent record. It will now point to the
5169 * starting allocation block number for the next extent record.
5170 */
5171 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5172 if (extents[i].blockCount == 0) {
5173 break;
5174 }
5175 read_recStartBlock += extents[i].blockCount;
5176 }
5177
5178 if (create_record == true) {
5179 /* Initialize new record content with only one extent entry */
5180 bzero(extents, sizeof(HFSPlusExtentRecord));
5181 /* The new record will contain only one extent entry */
5182 extents[0] = shift_extent;
5183 /* There are no more overflow extents to be shifted */
5184 shift_extent.startBlock = shift_extent.blockCount = 0;
5185
5186 if (is_xattr) {
5187 /* BTSearchRecord above returned btNotFound,
5188 * but since the attribute btree is never empty
5189 * if we are trying to insert new overflow
5190 * record for the xattrs, the extents_key will
5191 * contain correct data. So we don't need to
5192 * re-initialize it again like below.
5193 */
5194
5195 /* Initialize the new xattr record */
5196 xattr_rec->recordType = kHFSPlusAttrExtents;
5197 xattr_rec->overflowExtents.reserved = 0;
5198 reclen = sizeof(HFSPlusAttrExtents);
5199 } else {
5200 /* BTSearchRecord above returned btNotFound,
5201 * which means that extents_key content might
5202 * not correspond to the record that we are
5203 * trying to create, especially when the extents
5204 * overflow btree is empty. So we reinitialize
5205 * the extents_key again always.
5206 */
5207 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5208 extents_key->forkType = extent_info->forkType;
5209 extents_key->fileID = extent_info->fileID;
5210
5211 /* Initialize the new extent record */
5212 reclen = sizeof(HFSPlusExtentRecord);
5213 }
5214 } else {
5215 /* The overflow extent entry from previous record will be
5216 * the first entry in this extent record. If the last
5217 * extent entry in this record is valid, it will be shifted
5218 * into the following extent record as its first entry. So
5219 * save the last entry before shifting entries in current
5220 * record.
5221 */
5222 last_extent = extents[kHFSPlusExtentDensity-1];
5223
5224 /* Shift all entries by one index towards the end */
5225 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5226 extents[i+1] = extents[i];
5227 }
5228
5229 /* Overflow extent entry saved from previous record
5230 * is now the first entry in the current record.
5231 */
5232 extents[0] = shift_extent;
5233
5234 if (hfs_resize_debug) {
5235 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5236 }
5237
5238 /* The last entry from current record will be the
5239 * overflow entry which will be the first entry for
5240 * the following extent record.
5241 */
5242 shift_extent = last_extent;
5243
5244 /* Since the key->startBlock is being changed for this record,
5245 * it should be deleted and inserted with the new key.
5246 */
5247 error = BTDeleteRecord(extent_info->fcb, &iterator);
5248 if (error) {
5249 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5250 goto out;
5251 }
5252 if (hfs_resize_debug) {
5253 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5254 }
5255 }
5256
5257 /* Insert the newly created or modified extent record */
5258 bzero(&iterator.hint, sizeof(iterator.hint));
5259 if (is_xattr) {
5260 xattr_key->startBlock = write_recStartBlock;
5261 } else {
5262 extents_key->startBlock = write_recStartBlock;
5263 }
5264 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5265 if (error) {
5266 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5267 goto out;
5268 }
5269 if (hfs_resize_debug) {
5270 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5271 }
5272 }
5273 BTFlushPath(extent_info->fcb);
5274 out:
5275 if (extents_rec) {
5276 FREE (extents_rec, M_TEMP);
5277 }
5278 if (xattr_rec) {
5279 FREE (xattr_rec, M_TEMP);
5280 }
5281 return error;
5282 }
5283
5284
5285 /*
5286 * Relocate an extent if it lies beyond the expected end of volume.
5287 *
5288 * This function is called for every extent of the file being relocated.
5289 * It allocates space for relocation, copies the data, deallocates
5290 * the old extent, and update corresponding on-disk extent. If the function
5291 * does not find contiguous space to relocate an extent, it splits the
5292 * extent in smaller size to be able to relocate it out of the area of
5293 * disk being reclaimed. As an optimization, if an extent lies partially
5294 * in the area of the disk being reclaimed, it is split so that we only
5295 * have to relocate the area that was overlapping with the area of disk
5296 * being reclaimed.
5297 *
5298 * Note that every extent is relocated in its own transaction so that
5299 * they do not overwhelm the journal. This function handles the extent
5300 * record that exists in the catalog record, extent record from overflow
5301 * extents btree, and extents for large EAs.
5302 *
5303 * Inputs:
5304 * extent_info - This is the structure that contains state about
5305 * the current file, extent, and extent record that
5306 * is being relocated. This structure is shared
5307 * among code that traverses through all the extents
5308 * of the file, code that relocates extents, and
5309 * code that splits the extent.
5310 */
5311 static int
5312 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5313 {
5314 int error = 0;
5315 int index;
5316 struct cnode *cp;
5317 u_int32_t oldStartBlock;
5318 u_int32_t oldBlockCount;
5319 u_int32_t newStartBlock;
5320 u_int32_t newBlockCount;
5321 u_int32_t roundedBlockCount;
5322 uint16_t node_size;
5323 uint32_t remainder_blocks;
5324 u_int32_t alloc_flags;
5325 int blocks_allocated = false;
5326
5327 index = extent_info->extent_index;
5328 cp = VTOC(extent_info->vp);
5329
5330 oldStartBlock = extent_info->extents[index].startBlock;
5331 oldBlockCount = extent_info->extents[index].blockCount;
5332
5333 if (0 && hfs_resize_debug) {
5334 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5335 }
5336
5337 /* If the current extent lies completely within allocLimit,
5338 * it does not require any relocation.
5339 */
5340 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5341 extent_info->cur_blockCount += oldBlockCount;
5342 return error;
5343 }
5344
5345 /* Every extent should be relocated in its own transaction
5346 * to make sure that we don't overflow the journal buffer.
5347 */
5348 error = hfs_start_transaction(hfsmp);
5349 if (error) {
5350 return error;
5351 }
5352 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5353
5354 /* Check if the extent lies partially in the area to reclaim,
5355 * i.e. it starts before allocLimit and ends beyond allocLimit.
5356 * We have already skipped extents that lie completely within
5357 * allocLimit in the check above, so we only check for the
5358 * startBlock. If it lies partially, split it so that we
5359 * only relocate part of the extent.
5360 */
5361 if (oldStartBlock < allocLimit) {
5362 newBlockCount = allocLimit - oldStartBlock;
5363
5364 /* If the extent belongs to a btree, check and trim
5365 * it to be multiple of the node size.
5366 */
5367 if (extent_info->is_sysfile) {
5368 node_size = get_btree_nodesize(extent_info->vp);
5369 /* If the btree node size is less than the block size,
5370 * splitting this extent will not split a node across
5371 * different extents. So we only check and trim if
5372 * node size is more than the allocation block size.
5373 */
5374 if (node_size > hfsmp->blockSize) {
5375 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5376 if (remainder_blocks) {
5377 newBlockCount -= remainder_blocks;
5378 if (hfs_resize_debug) {
5379 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5380 }
5381 }
5382 }
5383 }
5384
5385 if (hfs_resize_debug) {
5386 int idx = extent_info->extent_index;
5387 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5388 }
5389
5390 /* Split the extents into two parts --- the first extent lies
5391 * completely within allocLimit and therefore does not require
5392 * relocation. The second extent will require relocation which
5393 * will be handled when the caller calls this function again
5394 * for the next extent.
5395 */
5396 error = hfs_split_extent(extent_info, newBlockCount);
5397 if (error == 0) {
5398 /* Split success, no relocation required */
5399 goto out;
5400 }
5401 /* Split failed, so try to relocate entire extent */
5402 if (hfs_resize_debug) {
5403 printf ("hfs_reclaim_extent: Split straddling extent failed, reclocate full extent\n");
5404 }
5405 }
5406
5407 /* At this point, the current extent requires relocation.
5408 * We will try to allocate space equal to the size of the extent
5409 * being relocated first to try to relocate it without splitting.
5410 * If the allocation fails, we will try to allocate contiguous
5411 * blocks out of metadata zone. If that allocation also fails,
5412 * then we will take a whatever contiguous block run is returned
5413 * by the allocation, split the extent into two parts, and then
5414 * relocate the first splitted extent.
5415 */
5416 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5417 if (extent_info->is_sysfile) {
5418 alloc_flags |= HFS_ALLOC_METAZONE;
5419 }
5420
5421 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5422 &newStartBlock, &newBlockCount);
5423 if ((extent_info->is_sysfile == false) &&
5424 ((error == dskFulErr) || (error == ENOSPC))) {
5425 /* For non-system files, try reallocating space in metadata zone */
5426 alloc_flags |= HFS_ALLOC_METAZONE;
5427 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5428 alloc_flags, &newStartBlock, &newBlockCount);
5429 }
5430 if ((error == dskFulErr) || (error == ENOSPC)) {
5431 /* We did not find desired contiguous space for this extent.
5432 * So try to allocate the maximum contiguous space available.
5433 */
5434 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5435
5436 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5437 alloc_flags, &newStartBlock, &newBlockCount);
5438 if (error) {
5439 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5440 goto out;
5441 }
5442 blocks_allocated = true;
5443
5444 /* The number of blocks allocated is less than the requested
5445 * number of blocks. For btree extents, check and trim the
5446 * extent to be multiple of the node size.
5447 */
5448 if (extent_info->is_sysfile) {
5449 node_size = get_btree_nodesize(extent_info->vp);
5450 if (node_size > hfsmp->blockSize) {
5451 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5452 if (remainder_blocks) {
5453 roundedBlockCount = newBlockCount - remainder_blocks;
5454 /* Free tail-end blocks of the newly allocated extent */
5455 BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
5456 newBlockCount - roundedBlockCount,
5457 HFS_ALLOC_SKIPFREEBLKS);
5458 newBlockCount = roundedBlockCount;
5459 if (hfs_resize_debug) {
5460 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5461 }
5462 if (newBlockCount == 0) {
5463 printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
5464 error = ENOSPC;
5465 goto out;
5466 }
5467 }
5468 }
5469 }
5470
5471 /* The number of blocks allocated is less than the number of
5472 * blocks requested, so split this extent --- the first extent
5473 * will be relocated as part of this function call and the caller
5474 * will handle relocating the second extent by calling this
5475 * function again for the second extent.
5476 */
5477 error = hfs_split_extent(extent_info, newBlockCount);
5478 if (error) {
5479 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5480 goto out;
5481 }
5482 oldBlockCount = newBlockCount;
5483 }
5484 if (error) {
5485 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5486 goto out;
5487 }
5488 blocks_allocated = true;
5489
5490 /* Copy data from old location to new location */
5491 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5492 newStartBlock, newBlockCount, context);
5493 if (error) {
5494 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5495 goto out;
5496 }
5497
5498 /* Update the extent record with the new start block information */
5499 extent_info->extents[index].startBlock = newStartBlock;
5500
5501 /* Sync the content back to the disk */
5502 if (extent_info->catalog_fp) {
5503 /* Update the extents in catalog record */
5504 if (extent_info->is_dirlink) {
5505 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5506 extent_info->dirlink_desc, extent_info->dirlink_attr,
5507 &(extent_info->dirlink_fork->ff_data));
5508 } else {
5509 cp->c_flag |= C_MODIFIED;
5510 /* If this is a system file, sync volume headers on disk */
5511 if (extent_info->is_sysfile) {
5512 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5513 }
5514 }
5515 } else {
5516 /* Replace record for extents overflow or extents-based xattrs */
5517 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5518 &(extent_info->btdata), extent_info->recordlen);
5519 }
5520 if (error) {
5521 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5522 goto out;
5523 }
5524
5525 /* Deallocate the old extent */
5526 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5527 if (error) {
5528 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5529 goto out;
5530 }
5531 extent_info->blocks_relocated += newBlockCount;
5532
5533 if (hfs_resize_debug) {
5534 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5535 }
5536
5537 out:
5538 if (error != 0) {
5539 if (blocks_allocated == true) {
5540 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5541 }
5542 } else {
5543 /* On success, increment the total allocation blocks processed */
5544 extent_info->cur_blockCount += newBlockCount;
5545 }
5546
5547 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5548
5549 /* For a non-system file, if an extent entry from catalog record
5550 * was modified, sync the in-memory changes to the catalog record
5551 * on disk before ending the transaction.
5552 */
5553 if ((extent_info->catalog_fp) &&
5554 (extent_info->is_sysfile == false)) {
5555 (void) hfs_update(extent_info->vp, MNT_WAIT);
5556 }
5557
5558 hfs_end_transaction(hfsmp);
5559
5560 return error;
5561 }
5562
5563 /* Report intermediate progress during volume resize */
5564 static void
5565 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5566 {
5567 u_int32_t cur_progress;
5568
5569 hfs_resize_progress(hfsmp, &cur_progress);
5570 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5571 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5572 hfsmp->hfs_resize_progress = cur_progress;
5573 }
5574 return;
5575 }
5576
5577 /*
5578 * Reclaim space at the end of a volume for given file and forktype.
5579 *
5580 * This routine attempts to move any extent which contains allocation blocks
5581 * at or after "allocLimit." A separate transaction is used for every extent
5582 * that needs to be moved. If there is not contiguous space available for
5583 * moving an extent, it can be split into smaller extents. The contents of
5584 * any moved extents are read and written via the volume's device vnode --
5585 * NOT via "vp." During the move, moved blocks which are part of a transaction
5586 * have their physical block numbers invalidated so they will eventually be
5587 * written to their new locations.
5588 *
5589 * This function is also called for directory hard links. Directory hard links
5590 * are regular files with no data fork and resource fork that contains alias
5591 * information for backward compatibility with pre-Leopard systems. However
5592 * non-Mac OS X implementation can add/modify data fork or resource fork
5593 * information to directory hard links, so we check, and if required, relocate
5594 * both data fork and resource fork.
5595 *
5596 * Inputs:
5597 * hfsmp The volume being resized.
5598 * vp The vnode for the system file.
5599 * fileID ID of the catalog record that needs to be relocated
5600 * forktype The type of fork that needs relocated,
5601 * kHFSResourceForkType for resource fork,
5602 * kHFSDataForkType for data fork
5603 * allocLimit Allocation limit for the new volume size,
5604 * do not use this block or beyond. All extents
5605 * that use this block or any blocks beyond this limit
5606 * will be relocated.
5607 *
5608 * Side Effects:
5609 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5610 * blocks that were relocated.
5611 */
5612 static int
5613 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5614 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5615 {
5616 int error = 0;
5617 struct hfs_reclaim_extent_info *extent_info;
5618 int i;
5619 int lockflags = 0;
5620 struct cnode *cp;
5621 struct filefork *fp;
5622 int took_truncate_lock = false;
5623 int release_desc = false;
5624 HFSPlusExtentKey *key;
5625
5626 /* If there is no vnode for this file, then there's nothing to do. */
5627 if (vp == NULL) {
5628 return 0;
5629 }
5630
5631 cp = VTOC(vp);
5632
5633 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5634 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5635 if (extent_info == NULL) {
5636 return ENOMEM;
5637 }
5638 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5639 extent_info->vp = vp;
5640 extent_info->fileID = fileID;
5641 extent_info->forkType = forktype;
5642 extent_info->is_sysfile = vnode_issystem(vp);
5643 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5644 extent_info->is_dirlink = true;
5645 }
5646 /* We always need allocation bitmap and extent btree lock */
5647 lockflags = SFL_BITMAP | SFL_EXTENTS;
5648 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5649 lockflags |= SFL_CATALOG;
5650 } else if (fileID == kHFSAttributesFileID) {
5651 lockflags |= SFL_ATTRIBUTE;
5652 } else if (fileID == kHFSStartupFileID) {
5653 lockflags |= SFL_STARTUP;
5654 }
5655 extent_info->lockflags = lockflags;
5656 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5657
5658 /* Flush data associated with current file on disk.
5659 *
5660 * If the current vnode is directory hard link, no flushing of
5661 * journal or vnode is required. The current kernel does not
5662 * modify data/resource fork of directory hard links, so nothing
5663 * will be in the cache. If a directory hard link is newly created,
5664 * the resource fork data is written directly using devvp and
5665 * the code that actually relocates data (hfs_copy_extent()) also
5666 * uses devvp for its I/O --- so they will see a consistent copy.
5667 */
5668 if (extent_info->is_sysfile) {
5669 /* If the current vnode is system vnode, flush journal
5670 * to make sure that all data is written to the disk.
5671 */
5672 error = hfs_journal_flush(hfsmp, TRUE);
5673 if (error) {
5674 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5675 goto out;
5676 }
5677 } else if (extent_info->is_dirlink == false) {
5678 /* Flush all blocks associated with this regular file vnode.
5679 * Normally there should not be buffer cache blocks for regular
5680 * files, but for objects like symlinks, we can have buffer cache
5681 * blocks associated with the vnode. Therefore we call
5682 * buf_flushdirtyblks() also.
5683 */
5684 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5685
5686 hfs_unlock(cp);
5687 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5688 took_truncate_lock = true;
5689 (void) cluster_push(vp, 0);
5690 error = hfs_lock(cp, HFS_FORCE_LOCK);
5691 if (error) {
5692 goto out;
5693 }
5694
5695 /* If the file no longer exists, nothing left to do */
5696 if (cp->c_flag & C_NOEXISTS) {
5697 error = 0;
5698 goto out;
5699 }
5700
5701 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5702 * be copying consistent bits. (Otherwise, it's possible that an async
5703 * write will complete to the old extent after we read from it. That
5704 * could lead to corruption.)
5705 */
5706 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5707 if (error) {
5708 goto out;
5709 }
5710 }
5711
5712 if (hfs_resize_debug) {
5713 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5714 }
5715
5716 if (extent_info->is_dirlink) {
5717 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5718 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5719 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5720 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5721 MALLOC(extent_info->dirlink_fork, struct filefork *,
5722 sizeof(struct filefork), M_TEMP, M_WAITOK);
5723 if ((extent_info->dirlink_desc == NULL) ||
5724 (extent_info->dirlink_attr == NULL) ||
5725 (extent_info->dirlink_fork == NULL)) {
5726 error = ENOMEM;
5727 goto out;
5728 }
5729
5730 /* Lookup catalog record for directory hard link and
5731 * create a fake filefork for the value looked up from
5732 * the disk.
5733 */
5734 fp = extent_info->dirlink_fork;
5735 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5736 extent_info->dirlink_fork->ff_cp = cp;
5737 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5738 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5739 extent_info->dirlink_desc, extent_info->dirlink_attr,
5740 &(extent_info->dirlink_fork->ff_data));
5741 hfs_systemfile_unlock(hfsmp, lockflags);
5742 if (error) {
5743 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5744 goto out;
5745 }
5746 release_desc = true;
5747 } else {
5748 fp = VTOF(vp);
5749 }
5750
5751 extent_info->catalog_fp = fp;
5752 extent_info->recStartBlock = 0;
5753 extent_info->extents = extent_info->catalog_fp->ff_extents;
5754 /* Relocate extents from the catalog record */
5755 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5756 if (fp->ff_extents[i].blockCount == 0) {
5757 break;
5758 }
5759 extent_info->extent_index = i;
5760 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5761 if (error) {
5762 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5763 goto out;
5764 }
5765 }
5766
5767 /* If the number of allocation blocks processed for reclaiming
5768 * are less than total number of blocks for the file, continuing
5769 * working on overflow extents record.
5770 */
5771 if (fp->ff_blocks <= extent_info->cur_blockCount) {
5772 if (0 && hfs_resize_debug) {
5773 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5774 }
5775 goto out;
5776 }
5777
5778 if (hfs_resize_debug) {
5779 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5780 }
5781
5782 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5783 if (extent_info->iterator == NULL) {
5784 error = ENOMEM;
5785 goto out;
5786 }
5787 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5788 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5789 key->keyLength = kHFSPlusExtentKeyMaximumLength;
5790 key->forkType = forktype;
5791 key->fileID = fileID;
5792 key->startBlock = extent_info->cur_blockCount;
5793
5794 extent_info->btdata.bufferAddress = extent_info->record.overflow;
5795 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5796 extent_info->btdata.itemCount = 1;
5797
5798 extent_info->catalog_fp = NULL;
5799
5800 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5801 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5802 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5803 &(extent_info->btdata), &(extent_info->recordlen),
5804 extent_info->iterator);
5805 hfs_systemfile_unlock(hfsmp, lockflags);
5806 while (error == 0) {
5807 extent_info->overflow_count++;
5808 extent_info->recStartBlock = key->startBlock;
5809 extent_info->extents = extent_info->record.overflow;
5810 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5811 if (extent_info->record.overflow[i].blockCount == 0) {
5812 goto out;
5813 }
5814 extent_info->extent_index = i;
5815 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5816 if (error) {
5817 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5818 goto out;
5819 }
5820 }
5821
5822 /* Look for more overflow records */
5823 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5824 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5825 extent_info->iterator, &(extent_info->btdata),
5826 &(extent_info->recordlen));
5827 hfs_systemfile_unlock(hfsmp, lockflags);
5828 if (error) {
5829 break;
5830 }
5831 /* Stop when we encounter a different file or fork. */
5832 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5833 break;
5834 }
5835 }
5836 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5837 error = 0;
5838 }
5839
5840 out:
5841 /* If any blocks were relocated, account them and report progress */
5842 if (extent_info->blocks_relocated) {
5843 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5844 hfs_truncatefs_progress(hfsmp);
5845 if (fileID < kHFSFirstUserCatalogNodeID) {
5846 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5847 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5848 }
5849 }
5850 if (extent_info->iterator) {
5851 FREE(extent_info->iterator, M_TEMP);
5852 }
5853 if (release_desc == true) {
5854 cat_releasedesc(extent_info->dirlink_desc);
5855 }
5856 if (extent_info->dirlink_desc) {
5857 FREE(extent_info->dirlink_desc, M_TEMP);
5858 }
5859 if (extent_info->dirlink_attr) {
5860 FREE(extent_info->dirlink_attr, M_TEMP);
5861 }
5862 if (extent_info->dirlink_fork) {
5863 FREE(extent_info->dirlink_fork, M_TEMP);
5864 }
5865 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5866 (void) hfs_update(vp, MNT_WAIT);
5867 }
5868 if (took_truncate_lock) {
5869 hfs_unlock_truncate(cp, 0);
5870 }
5871 if (extent_info) {
5872 FREE(extent_info, M_TEMP);
5873 }
5874 if (hfs_resize_debug) {
5875 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5876 }
5877
5878 return error;
5879 }
5880
5881
5882 /*
5883 * This journal_relocate callback updates the journal info block to point
5884 * at the new journal location. This write must NOT be done using the
5885 * transaction. We must write the block immediately. We must also force
5886 * it to get to the media so that the new journal location will be seen by
5887 * the replay code before we can safely let journaled blocks be written
5888 * to their normal locations.
5889 *
5890 * The tests for journal_uses_fua below are mildly hacky. Since the journal
5891 * and the file system are both on the same device, I'm leveraging what
5892 * the journal has decided about FUA.
5893 */
5894 struct hfs_journal_relocate_args {
5895 struct hfsmount *hfsmp;
5896 vfs_context_t context;
5897 u_int32_t newStartBlock;
5898 };
5899
5900 static errno_t
5901 hfs_journal_relocate_callback(void *_args)
5902 {
5903 int error;
5904 struct hfs_journal_relocate_args *args = _args;
5905 struct hfsmount *hfsmp = args->hfsmp;
5906 buf_t bp;
5907 JournalInfoBlock *jibp;
5908
5909 error = buf_meta_bread(hfsmp->hfs_devvp,
5910 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5911 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5912 if (error) {
5913 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5914 if (bp) {
5915 buf_brelse(bp);
5916 }
5917 return error;
5918 }
5919 jibp = (JournalInfoBlock*) buf_dataptr(bp);
5920 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5921 jibp->size = SWAP_BE64(hfsmp->jnl_size);
5922 if (journal_uses_fua(hfsmp->jnl))
5923 buf_markfua(bp);
5924 error = buf_bwrite(bp);
5925 if (error) {
5926 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5927 return error;
5928 }
5929 if (!journal_uses_fua(hfsmp->jnl)) {
5930 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5931 if (error) {
5932 printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5933 error = 0; /* Don't fail the operation. */
5934 }
5935 }
5936
5937 return error;
5938 }
5939
5940
5941 static int
5942 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5943 {
5944 int error;
5945 int journal_err;
5946 int lockflags;
5947 u_int32_t oldStartBlock;
5948 u_int32_t newStartBlock;
5949 u_int32_t oldBlockCount;
5950 u_int32_t newBlockCount;
5951 struct cat_desc journal_desc;
5952 struct cat_attr journal_attr;
5953 struct cat_fork journal_fork;
5954 struct hfs_journal_relocate_args callback_args;
5955
5956 if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5957 /* The journal does not require relocation */
5958 return 0;
5959 }
5960
5961 error = hfs_start_transaction(hfsmp);
5962 if (error) {
5963 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5964 return error;
5965 }
5966 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5967
5968 oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5969
5970 /* TODO: Allow the journal to change size based on the new volume size. */
5971 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5972 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5973 &newStartBlock, &newBlockCount);
5974 if (error) {
5975 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5976 goto fail;
5977 }
5978 if (newBlockCount != oldBlockCount) {
5979 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5980 goto free_fail;
5981 }
5982
5983 error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5984 if (error) {
5985 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5986 goto free_fail;
5987 }
5988
5989 /* Update the catalog record for .journal */
5990 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5991 if (error) {
5992 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5993 goto free_fail;
5994 }
5995 oldStartBlock = journal_fork.cf_extents[0].startBlock;
5996 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5997 journal_fork.cf_extents[0].startBlock = newStartBlock;
5998 journal_fork.cf_extents[0].blockCount = newBlockCount;
5999 journal_fork.cf_blocks = newBlockCount;
6000 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
6001 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
6002 if (error) {
6003 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
6004 goto free_fail;
6005 }
6006 callback_args.hfsmp = hfsmp;
6007 callback_args.context = context;
6008 callback_args.newStartBlock = newStartBlock;
6009
6010 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
6011 (off_t)newBlockCount*hfsmp->blockSize, 0,
6012 hfs_journal_relocate_callback, &callback_args);
6013 if (error) {
6014 /* NOTE: journal_relocate will mark the journal invalid. */
6015 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
6016 goto fail;
6017 }
6018 hfsmp->jnl_start = newStartBlock;
6019 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
6020
6021 hfs_systemfile_unlock(hfsmp, lockflags);
6022 error = hfs_end_transaction(hfsmp);
6023 if (error) {
6024 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
6025 }
6026
6027 /* Account for the blocks relocated and print progress */
6028 hfsmp->hfs_resize_blocksmoved += oldBlockCount;
6029 hfs_truncatefs_progress(hfsmp);
6030 if (!error) {
6031 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
6032 oldBlockCount, hfsmp->vcbVN);
6033 if (hfs_resize_debug) {
6034 printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
6035 }
6036 }
6037 return error;
6038
6039 free_fail:
6040 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
6041 if (journal_err) {
6042 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
6043 hfs_mark_volume_inconsistent(hfsmp);
6044 }
6045 fail:
6046 hfs_systemfile_unlock(hfsmp, lockflags);
6047 (void) hfs_end_transaction(hfsmp);
6048 if (hfs_resize_debug) {
6049 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
6050 }
6051 return error;
6052 }
6053
6054
6055 /*
6056 * Move the journal info block to a new location. We have to make sure the
6057 * new copy of the journal info block gets to the media first, then change
6058 * the field in the volume header and the catalog record.
6059 */
6060 static int
6061 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6062 {
6063 int error;
6064 int journal_err;
6065 int lockflags;
6066 u_int32_t oldBlock;
6067 u_int32_t newBlock;
6068 u_int32_t blockCount;
6069 struct cat_desc jib_desc;
6070 struct cat_attr jib_attr;
6071 struct cat_fork jib_fork;
6072 buf_t old_bp, new_bp;
6073
6074 if (hfsmp->vcbJinfoBlock <= allocLimit) {
6075 /* The journal info block does not require relocation */
6076 return 0;
6077 }
6078
6079 error = hfs_start_transaction(hfsmp);
6080 if (error) {
6081 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
6082 return error;
6083 }
6084 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6085
6086 error = BlockAllocate(hfsmp, 1, 1, 1,
6087 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
6088 &newBlock, &blockCount);
6089 if (error) {
6090 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
6091 goto fail;
6092 }
6093 if (blockCount != 1) {
6094 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
6095 goto free_fail;
6096 }
6097 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
6098 if (error) {
6099 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6100 goto free_fail;
6101 }
6102
6103 /* Copy the old journal info block content to the new location */
6104 error = buf_meta_bread(hfsmp->hfs_devvp,
6105 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6106 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
6107 if (error) {
6108 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
6109 if (old_bp) {
6110 buf_brelse(old_bp);
6111 }
6112 goto free_fail;
6113 }
6114 new_bp = buf_getblk(hfsmp->hfs_devvp,
6115 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6116 hfsmp->blockSize, 0, 0, BLK_META);
6117 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
6118 buf_brelse(old_bp);
6119 if (journal_uses_fua(hfsmp->jnl))
6120 buf_markfua(new_bp);
6121 error = buf_bwrite(new_bp);
6122 if (error) {
6123 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
6124 goto free_fail;
6125 }
6126 if (!journal_uses_fua(hfsmp->jnl)) {
6127 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
6128 if (error) {
6129 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6130 /* Don't fail the operation. */
6131 }
6132 }
6133
6134 /* Update the catalog record for .journal_info_block */
6135 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
6136 if (error) {
6137 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
6138 goto fail;
6139 }
6140 oldBlock = jib_fork.cf_extents[0].startBlock;
6141 jib_fork.cf_size = hfsmp->blockSize;
6142 jib_fork.cf_extents[0].startBlock = newBlock;
6143 jib_fork.cf_extents[0].blockCount = 1;
6144 jib_fork.cf_blocks = 1;
6145 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
6146 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
6147 if (error) {
6148 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6149 goto fail;
6150 }
6151
6152 /* Update the pointer to the journal info block in the volume header. */
6153 hfsmp->vcbJinfoBlock = newBlock;
6154 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6155 if (error) {
6156 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6157 goto fail;
6158 }
6159 hfs_systemfile_unlock(hfsmp, lockflags);
6160 error = hfs_end_transaction(hfsmp);
6161 if (error) {
6162 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6163 }
6164 error = hfs_journal_flush(hfsmp, FALSE);
6165 if (error) {
6166 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6167 }
6168
6169 /* Account for the block relocated and print progress */
6170 hfsmp->hfs_resize_blocksmoved += 1;
6171 hfs_truncatefs_progress(hfsmp);
6172 if (!error) {
6173 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6174 hfsmp->vcbVN);
6175 if (hfs_resize_debug) {
6176 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6177 }
6178 }
6179 return error;
6180
6181 free_fail:
6182 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6183 if (journal_err) {
6184 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6185 hfs_mark_volume_inconsistent(hfsmp);
6186 }
6187
6188 fail:
6189 hfs_systemfile_unlock(hfsmp, lockflags);
6190 (void) hfs_end_transaction(hfsmp);
6191 if (hfs_resize_debug) {
6192 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6193 }
6194 return error;
6195 }
6196
6197
6198 /*
6199 * This function traverses through all extended attribute records for a given
6200 * fileID, and calls function that reclaims data blocks that exist in the
6201 * area of the disk being reclaimed which in turn is responsible for allocating
6202 * new space, copying extent data, deallocating new space, and if required,
6203 * splitting the extent.
6204 *
6205 * Note: The caller has already acquired the cnode lock on the file. Therefore
6206 * we are assured that no other thread would be creating/deleting/modifying
6207 * extended attributes for this file.
6208 *
6209 * Side Effects:
6210 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6211 * blocks that were relocated.
6212 *
6213 * Returns:
6214 * 0 on success, non-zero on failure.
6215 */
6216 static int
6217 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6218 {
6219 int error = 0;
6220 struct hfs_reclaim_extent_info *extent_info;
6221 int i;
6222 HFSPlusAttrKey *key;
6223 int *lockflags;
6224
6225 if (hfs_resize_debug) {
6226 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6227 }
6228
6229 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6230 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6231 if (extent_info == NULL) {
6232 return ENOMEM;
6233 }
6234 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6235 extent_info->vp = vp;
6236 extent_info->fileID = fileID;
6237 extent_info->is_xattr = true;
6238 extent_info->is_sysfile = vnode_issystem(vp);
6239 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6240 lockflags = &(extent_info->lockflags);
6241 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6242
6243 /* Initialize iterator from the extent_info structure */
6244 MALLOC(extent_info->iterator, struct BTreeIterator *,
6245 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6246 if (extent_info->iterator == NULL) {
6247 error = ENOMEM;
6248 goto out;
6249 }
6250 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6251
6252 /* Build attribute key */
6253 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6254 error = hfs_buildattrkey(fileID, NULL, key);
6255 if (error) {
6256 goto out;
6257 }
6258
6259 /* Initialize btdata from extent_info structure. Note that the
6260 * buffer pointer actually points to the xattr record from the
6261 * extent_info structure itself.
6262 */
6263 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6264 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6265 extent_info->btdata.itemCount = 1;
6266
6267 /*
6268 * Sync all extent-based attribute data to the disk.
6269 *
6270 * All extent-based attribute data I/O is performed via cluster
6271 * I/O using a virtual file that spans across entire file system
6272 * space.
6273 */
6274 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6275 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6276 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6277 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6278 if (error) {
6279 goto out;
6280 }
6281
6282 /* Search for extended attribute for current file. This
6283 * will place the iterator before the first matching record.
6284 */
6285 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6286 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6287 &(extent_info->btdata), &(extent_info->recordlen),
6288 extent_info->iterator);
6289 hfs_systemfile_unlock(hfsmp, *lockflags);
6290 if (error) {
6291 if (error != btNotFound) {
6292 goto out;
6293 }
6294 /* btNotFound is expected here, so just mask it */
6295 error = 0;
6296 }
6297
6298 while (1) {
6299 /* Iterate to the next record */
6300 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6301 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6302 extent_info->iterator, &(extent_info->btdata),
6303 &(extent_info->recordlen));
6304 hfs_systemfile_unlock(hfsmp, *lockflags);
6305
6306 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6307 if (error || key->fileID != fileID) {
6308 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6309 error = 0;
6310 }
6311 break;
6312 }
6313
6314 /* We only care about extent-based EAs */
6315 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6316 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6317 continue;
6318 }
6319
6320 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6321 extent_info->overflow_count = 0;
6322 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6323 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6324 extent_info->overflow_count++;
6325 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6326 }
6327
6328 extent_info->recStartBlock = key->startBlock;
6329 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6330 if (extent_info->extents[i].blockCount == 0) {
6331 break;
6332 }
6333 extent_info->extent_index = i;
6334 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6335 if (error) {
6336 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6337 goto out;
6338 }
6339 }
6340 }
6341
6342 out:
6343 /* If any blocks were relocated, account them and report progress */
6344 if (extent_info->blocks_relocated) {
6345 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6346 hfs_truncatefs_progress(hfsmp);
6347 }
6348 if (extent_info->iterator) {
6349 FREE(extent_info->iterator, M_TEMP);
6350 }
6351 if (extent_info) {
6352 FREE(extent_info, M_TEMP);
6353 }
6354 if (hfs_resize_debug) {
6355 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6356 }
6357 return error;
6358 }
6359
6360 /*
6361 * Reclaim any extent-based extended attributes allocation blocks from
6362 * the area of the disk that is being truncated.
6363 *
6364 * The function traverses the attribute btree to find out the fileIDs
6365 * of the extended attributes that need to be relocated. For every
6366 * file whose large EA requires relocation, it looks up the cnode and
6367 * calls hfs_reclaim_xattr() to do all the work for allocating
6368 * new space, copying data, deallocating old space, and if required,
6369 * splitting the extents.
6370 *
6371 * Inputs:
6372 * allocLimit - starting block of the area being reclaimed
6373 *
6374 * Returns:
6375 * returns 0 on success, non-zero on failure.
6376 */
6377 static int
6378 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6379 {
6380 int error = 0;
6381 FCB *fcb;
6382 struct BTreeIterator *iterator = NULL;
6383 struct FSBufferDescriptor btdata;
6384 HFSPlusAttrKey *key;
6385 HFSPlusAttrRecord rec;
6386 int lockflags = 0;
6387 cnid_t prev_fileid = 0;
6388 struct vnode *vp;
6389 int need_relocate;
6390 int btree_operation;
6391 u_int32_t files_moved = 0;
6392 u_int32_t prev_blocksmoved;
6393 int i;
6394
6395 fcb = VTOF(hfsmp->hfs_attribute_vp);
6396 /* Store the value to print total blocks moved by this function in end */
6397 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6398
6399 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6400 return ENOMEM;
6401 }
6402 bzero(iterator, sizeof(*iterator));
6403 key = (HFSPlusAttrKey *)&iterator->key;
6404 btdata.bufferAddress = &rec;
6405 btdata.itemSize = sizeof(rec);
6406 btdata.itemCount = 1;
6407
6408 need_relocate = false;
6409 btree_operation = kBTreeFirstRecord;
6410 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6411 while (1) {
6412 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6413 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6414 hfs_systemfile_unlock(hfsmp, lockflags);
6415 if (error) {
6416 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6417 error = 0;
6418 }
6419 break;
6420 }
6421 btree_operation = kBTreeNextRecord;
6422
6423 /* If the extents of current fileID were already relocated, skip it */
6424 if (prev_fileid == key->fileID) {
6425 continue;
6426 }
6427
6428 /* Check if any of the extents in the current record need to be relocated */
6429 need_relocate = false;
6430 switch(rec.recordType) {
6431 case kHFSPlusAttrForkData:
6432 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6433 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6434 break;
6435 }
6436 if ((rec.forkData.theFork.extents[i].startBlock +
6437 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6438 need_relocate = true;
6439 break;
6440 }
6441 }
6442 break;
6443
6444 case kHFSPlusAttrExtents:
6445 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6446 if (rec.overflowExtents.extents[i].blockCount == 0) {
6447 break;
6448 }
6449 if ((rec.overflowExtents.extents[i].startBlock +
6450 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6451 need_relocate = true;
6452 break;
6453 }
6454 }
6455 break;
6456 };
6457
6458 /* Continue iterating to next attribute record */
6459 if (need_relocate == false) {
6460 continue;
6461 }
6462
6463 /* Look up the vnode for corresponding file. The cnode
6464 * will be locked which will ensure that no one modifies
6465 * the xattrs when we are relocating them.
6466 *
6467 * We want to allow open-unlinked files to be moved,
6468 * so provide allow_deleted == 1 for hfs_vget().
6469 */
6470 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6471 continue;
6472 }
6473
6474 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6475 hfs_unlock(VTOC(vp));
6476 vnode_put(vp);
6477 if (error) {
6478 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6479 break;
6480 }
6481 prev_fileid = key->fileID;
6482 files_moved++;
6483 }
6484
6485 if (files_moved) {
6486 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6487 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6488 files_moved, hfsmp->vcbVN);
6489 }
6490
6491 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6492 return error;
6493 }
6494
6495 /*
6496 * Reclaim blocks from regular files.
6497 *
6498 * This function iterates over all the record in catalog btree looking
6499 * for files with extents that overlap into the space we're trying to
6500 * free up. If a file extent requires relocation, it looks up the vnode
6501 * and calls function to relocate the data.
6502 *
6503 * Returns:
6504 * Zero on success, non-zero on failure.
6505 */
6506 static int
6507 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6508 {
6509 int error;
6510 FCB *fcb;
6511 struct BTreeIterator *iterator = NULL;
6512 struct FSBufferDescriptor btdata;
6513 int btree_operation;
6514 int lockflags;
6515 struct HFSPlusCatalogFile filerec;
6516 struct vnode *vp;
6517 struct vnode *rvp;
6518 struct filefork *datafork;
6519 u_int32_t files_moved = 0;
6520 u_int32_t prev_blocksmoved;
6521
6522 fcb = VTOF(hfsmp->hfs_catalog_vp);
6523 /* Store the value to print total blocks moved by this function at the end */
6524 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6525
6526 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6527 return ENOMEM;
6528 }
6529 bzero(iterator, sizeof(*iterator));
6530
6531 btdata.bufferAddress = &filerec;
6532 btdata.itemSize = sizeof(filerec);
6533 btdata.itemCount = 1;
6534
6535 btree_operation = kBTreeFirstRecord;
6536 while (1) {
6537 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6538 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6539 hfs_systemfile_unlock(hfsmp, lockflags);
6540 if (error) {
6541 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6542 error = 0;
6543 }
6544 break;
6545 }
6546 btree_operation = kBTreeNextRecord;
6547
6548 if (filerec.recordType != kHFSPlusFileRecord) {
6549 continue;
6550 }
6551
6552 /* Check if any of the extents require relocation */
6553 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6554 continue;
6555 }
6556
6557 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6558 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6559 continue;
6560 }
6561
6562 /* If data fork exists or item is a directory hard link, relocate blocks */
6563 datafork = VTOF(vp);
6564 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6565 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6566 kHFSDataForkType, allocLimit, context);
6567 if (error) {
6568 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6569 hfs_unlock(VTOC(vp));
6570 vnode_put(vp);
6571 break;
6572 }
6573 }
6574
6575 /* If resource fork exists or item is a directory hard link, relocate blocks */
6576 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6577 if (vnode_isdir(vp)) {
6578 /* Resource fork vnode lookup is invalid for directory hard link.
6579 * So we fake data fork vnode as resource fork vnode.
6580 */
6581 rvp = vp;
6582 } else {
6583 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6584 if (error) {
6585 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6586 hfs_unlock(VTOC(vp));
6587 vnode_put(vp);
6588 break;
6589 }
6590 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6591 }
6592
6593 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6594 kHFSResourceForkType, allocLimit, context);
6595 if (error) {
6596 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6597 hfs_unlock(VTOC(vp));
6598 vnode_put(vp);
6599 break;
6600 }
6601 }
6602
6603 /* The file forks were relocated successfully, now drop the
6604 * cnode lock and vnode reference, and continue iterating to
6605 * next catalog record.
6606 */
6607 hfs_unlock(VTOC(vp));
6608 vnode_put(vp);
6609 files_moved++;
6610 }
6611
6612 if (files_moved) {
6613 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6614 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6615 files_moved, hfsmp->vcbVN);
6616 }
6617
6618 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6619 return error;
6620 }
6621
6622 /*
6623 * Reclaim space at the end of a file system.
6624 *
6625 * Inputs -
6626 * allocLimit - start block of the space being reclaimed
6627 * reclaimblks - number of allocation blocks to reclaim
6628 */
6629 static int
6630 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6631 {
6632 int error = 0;
6633
6634 /*
6635 * Preflight the bitmap to find out total number of blocks that need
6636 * relocation.
6637 *
6638 * Note: Since allocLimit is set to the location of new alternate volume
6639 * header, the check below does not account for blocks allocated for old
6640 * alternate volume header.
6641 */
6642 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6643 if (error) {
6644 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6645 return error;
6646 }
6647 if (hfs_resize_debug) {
6648 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6649 }
6650
6651 /* Just to be safe, sync the content of the journal to the disk before we proceed */
6652 hfs_journal_flush(hfsmp, TRUE);
6653
6654 /* First, relocate journal file blocks if they're in the way.
6655 * Doing this first will make sure that journal relocate code
6656 * gets access to contiguous blocks on disk first. The journal
6657 * file has to be contiguous on the disk, otherwise resize will
6658 * fail.
6659 */
6660 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6661 if (error) {
6662 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6663 return error;
6664 }
6665
6666 /* Relocate journal info block blocks if they're in the way. */
6667 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6668 if (error) {
6669 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6670 return error;
6671 }
6672
6673 /* Relocate extents of the Extents B-tree if they're in the way.
6674 * Relocating extents btree before other btrees is important as
6675 * this will provide access to largest contiguous block range on
6676 * the disk for relocating extents btree. Note that extents btree
6677 * can only have maximum of 8 extents.
6678 */
6679 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6680 kHFSDataForkType, allocLimit, context);
6681 if (error) {
6682 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6683 return error;
6684 }
6685
6686 /* Relocate extents of the Allocation file if they're in the way. */
6687 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6688 kHFSDataForkType, allocLimit, context);
6689 if (error) {
6690 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6691 return error;
6692 }
6693
6694 /* Relocate extents of the Catalog B-tree if they're in the way. */
6695 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6696 kHFSDataForkType, allocLimit, context);
6697 if (error) {
6698 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6699 return error;
6700 }
6701
6702 /* Relocate extents of the Attributes B-tree if they're in the way. */
6703 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6704 kHFSDataForkType, allocLimit, context);
6705 if (error) {
6706 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6707 return error;
6708 }
6709
6710 /* Relocate extents of the Startup File if there is one and they're in the way. */
6711 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6712 kHFSDataForkType, allocLimit, context);
6713 if (error) {
6714 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6715 return error;
6716 }
6717
6718 /*
6719 * We need to make sure the alternate volume header gets flushed if we moved
6720 * any extents in the volume header. But we need to do that before
6721 * shrinking the size of the volume, or else the journal code will panic
6722 * with an invalid (too large) block number.
6723 *
6724 * Note that blks_moved will be set if ANY extent was moved, even
6725 * if it was just an overflow extent. In this case, the journal_flush isn't
6726 * strictly required, but shouldn't hurt.
6727 */
6728 if (hfsmp->hfs_resize_blocksmoved) {
6729 hfs_journal_flush(hfsmp, TRUE);
6730 }
6731
6732 /* Reclaim extents from catalog file records */
6733 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6734 if (error) {
6735 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6736 return error;
6737 }
6738
6739 /* Reclaim extents from extent-based extended attributes, if any */
6740 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6741 if (error) {
6742 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6743 return error;
6744 }
6745
6746 return error;
6747 }
6748
6749
6750 /*
6751 * Check if there are any extents (including overflow extents) that overlap
6752 * into the disk space that is being reclaimed.
6753 *
6754 * Output -
6755 * true - One of the extents need to be relocated
6756 * false - No overflow extents need to be relocated, or there was an error
6757 */
6758 static int
6759 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6760 {
6761 struct BTreeIterator * iterator = NULL;
6762 struct FSBufferDescriptor btdata;
6763 HFSPlusExtentRecord extrec;
6764 HFSPlusExtentKey *extkeyptr;
6765 FCB *fcb;
6766 int overlapped = false;
6767 int i, j;
6768 int error;
6769 int lockflags = 0;
6770 u_int32_t endblock;
6771
6772 /* Check if data fork overlaps the target space */
6773 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6774 if (filerec->dataFork.extents[i].blockCount == 0) {
6775 break;
6776 }
6777 endblock = filerec->dataFork.extents[i].startBlock +
6778 filerec->dataFork.extents[i].blockCount;
6779 if (endblock > allocLimit) {
6780 overlapped = true;
6781 goto out;
6782 }
6783 }
6784
6785 /* Check if resource fork overlaps the target space */
6786 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6787 if (filerec->resourceFork.extents[j].blockCount == 0) {
6788 break;
6789 }
6790 endblock = filerec->resourceFork.extents[j].startBlock +
6791 filerec->resourceFork.extents[j].blockCount;
6792 if (endblock > allocLimit) {
6793 overlapped = true;
6794 goto out;
6795 }
6796 }
6797
6798 /* Return back if there are no overflow extents for this file */
6799 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6800 goto out;
6801 }
6802
6803 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6804 return 0;
6805 }
6806 bzero(iterator, sizeof(*iterator));
6807 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6808 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6809 extkeyptr->forkType = 0;
6810 extkeyptr->fileID = filerec->fileID;
6811 extkeyptr->startBlock = 0;
6812
6813 btdata.bufferAddress = &extrec;
6814 btdata.itemSize = sizeof(extrec);
6815 btdata.itemCount = 1;
6816
6817 fcb = VTOF(hfsmp->hfs_extents_vp);
6818
6819 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6820
6821 /* This will position the iterator just before the first overflow
6822 * extent record for given fileID. It will always return btNotFound,
6823 * so we special case the error code.
6824 */
6825 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6826 if (error && (error != btNotFound)) {
6827 goto out;
6828 }
6829
6830 /* BTIterateRecord() might return error if the btree is empty, and
6831 * therefore we return that the extent does not overflow to the caller
6832 */
6833 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6834 while (error == 0) {
6835 /* Stop when we encounter a different file. */
6836 if (extkeyptr->fileID != filerec->fileID) {
6837 break;
6838 }
6839 /* Check if any of the forks exist in the target space. */
6840 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6841 if (extrec[i].blockCount == 0) {
6842 break;
6843 }
6844 endblock = extrec[i].startBlock + extrec[i].blockCount;
6845 if (endblock > allocLimit) {
6846 overlapped = true;
6847 goto out;
6848 }
6849 }
6850 /* Look for more records. */
6851 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6852 }
6853
6854 out:
6855 if (lockflags) {
6856 hfs_systemfile_unlock(hfsmp, lockflags);
6857 }
6858 if (iterator) {
6859 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6860 }
6861 return overlapped;
6862 }
6863
6864
6865 /*
6866 * Calculate the progress of a file system resize operation.
6867 */
6868 __private_extern__
6869 int
6870 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6871 {
6872 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6873 return (ENXIO);
6874 }
6875
6876 if (hfsmp->hfs_resize_totalblocks > 0) {
6877 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6878 } else {
6879 *progress = 0;
6880 }
6881
6882 return (0);
6883 }
6884
6885
6886 /*
6887 * Creates a UUID from a unique "name" in the HFS UUID Name space.
6888 * See version 3 UUID.
6889 */
6890 static void
6891 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6892 {
6893 MD5_CTX md5c;
6894 uint8_t rawUUID[8];
6895
6896 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6897 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6898
6899 MD5Init( &md5c );
6900 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6901 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6902 MD5Final( result, &md5c );
6903
6904 result[6] = 0x30 | ( result[6] & 0x0F );
6905 result[8] = 0x80 | ( result[8] & 0x3F );
6906 }
6907
6908 /*
6909 * Get file system attributes.
6910 */
6911 static int
6912 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6913 {
6914 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6915 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6916 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6917
6918 ExtendedVCB *vcb = VFSTOVCB(mp);
6919 struct hfsmount *hfsmp = VFSTOHFS(mp);
6920 u_int32_t freeCNIDs;
6921
6922 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6923
6924 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6925 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6926 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6927 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6928 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6929 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6930 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6931 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6932 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6933 /* XXX needs clarification */
6934 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6935 /* Maximum files is constrained by total blocks. */
6936 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6937 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6938
6939 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6940 fsap->f_fsid.val[1] = vfs_typenum(mp);
6941 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6942
6943 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6944 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6945
6946 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6947 vol_capabilities_attr_t *cap;
6948
6949 cap = &fsap->f_capabilities;
6950
6951 if (hfsmp->hfs_flags & HFS_STANDARD) {
6952 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6953 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6954 VOL_CAP_FMT_CASE_PRESERVING |
6955 VOL_CAP_FMT_FAST_STATFS |
6956 VOL_CAP_FMT_HIDDEN_FILES |
6957 VOL_CAP_FMT_PATH_FROM_ID;
6958 } else {
6959 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6960 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6961 VOL_CAP_FMT_SYMBOLICLINKS |
6962 VOL_CAP_FMT_HARDLINKS |
6963 VOL_CAP_FMT_JOURNAL |
6964 VOL_CAP_FMT_ZERO_RUNS |
6965 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6966 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6967 VOL_CAP_FMT_CASE_PRESERVING |
6968 VOL_CAP_FMT_FAST_STATFS |
6969 VOL_CAP_FMT_2TB_FILESIZE |
6970 VOL_CAP_FMT_HIDDEN_FILES |
6971 #if HFS_COMPRESSION
6972 VOL_CAP_FMT_PATH_FROM_ID |
6973 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6974 #else
6975 VOL_CAP_FMT_PATH_FROM_ID;
6976 #endif
6977 }
6978 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6979 VOL_CAP_INT_SEARCHFS |
6980 VOL_CAP_INT_ATTRLIST |
6981 VOL_CAP_INT_NFSEXPORT |
6982 VOL_CAP_INT_READDIRATTR |
6983 VOL_CAP_INT_EXCHANGEDATA |
6984 VOL_CAP_INT_ALLOCATE |
6985 VOL_CAP_INT_VOL_RENAME |
6986 VOL_CAP_INT_ADVLOCK |
6987 VOL_CAP_INT_FLOCK |
6988 #if NAMEDSTREAMS
6989 VOL_CAP_INT_EXTENDED_ATTR |
6990 VOL_CAP_INT_NAMEDSTREAMS;
6991 #else
6992 VOL_CAP_INT_EXTENDED_ATTR;
6993 #endif
6994 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6995 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6996
6997 cap->valid[VOL_CAPABILITIES_FORMAT] =
6998 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6999 VOL_CAP_FMT_SYMBOLICLINKS |
7000 VOL_CAP_FMT_HARDLINKS |
7001 VOL_CAP_FMT_JOURNAL |
7002 VOL_CAP_FMT_JOURNAL_ACTIVE |
7003 VOL_CAP_FMT_NO_ROOT_TIMES |
7004 VOL_CAP_FMT_SPARSE_FILES |
7005 VOL_CAP_FMT_ZERO_RUNS |
7006 VOL_CAP_FMT_CASE_SENSITIVE |
7007 VOL_CAP_FMT_CASE_PRESERVING |
7008 VOL_CAP_FMT_FAST_STATFS |
7009 VOL_CAP_FMT_2TB_FILESIZE |
7010 VOL_CAP_FMT_OPENDENYMODES |
7011 VOL_CAP_FMT_HIDDEN_FILES |
7012 #if HFS_COMPRESSION
7013 VOL_CAP_FMT_PATH_FROM_ID |
7014 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7015 #else
7016 VOL_CAP_FMT_PATH_FROM_ID;
7017 #endif
7018 cap->valid[VOL_CAPABILITIES_INTERFACES] =
7019 VOL_CAP_INT_SEARCHFS |
7020 VOL_CAP_INT_ATTRLIST |
7021 VOL_CAP_INT_NFSEXPORT |
7022 VOL_CAP_INT_READDIRATTR |
7023 VOL_CAP_INT_EXCHANGEDATA |
7024 VOL_CAP_INT_COPYFILE |
7025 VOL_CAP_INT_ALLOCATE |
7026 VOL_CAP_INT_VOL_RENAME |
7027 VOL_CAP_INT_ADVLOCK |
7028 VOL_CAP_INT_FLOCK |
7029 VOL_CAP_INT_MANLOCK |
7030 #if NAMEDSTREAMS
7031 VOL_CAP_INT_EXTENDED_ATTR |
7032 VOL_CAP_INT_NAMEDSTREAMS;
7033 #else
7034 VOL_CAP_INT_EXTENDED_ATTR;
7035 #endif
7036 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
7037 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
7038 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
7039 }
7040 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
7041 vol_attributes_attr_t *attrp = &fsap->f_attributes;
7042
7043 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7044 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7045 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
7046 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7047 attrp->validattr.forkattr = 0;
7048
7049 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7050 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7051 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
7052 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7053 attrp->nativeattr.forkattr = 0;
7054 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
7055 }
7056 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
7057 fsap->f_create_time.tv_nsec = 0;
7058 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
7059 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
7060 fsap->f_modify_time.tv_nsec = 0;
7061 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
7062
7063 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
7064 fsap->f_backup_time.tv_nsec = 0;
7065 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
7066 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
7067 u_int16_t subtype = 0;
7068
7069 /*
7070 * Subtypes (flavors) for HFS
7071 * 0: Mac OS Extended
7072 * 1: Mac OS Extended (Journaled)
7073 * 2: Mac OS Extended (Case Sensitive)
7074 * 3: Mac OS Extended (Case Sensitive, Journaled)
7075 * 4 - 127: Reserved
7076 * 128: Mac OS Standard
7077 *
7078 */
7079 if (hfsmp->hfs_flags & HFS_STANDARD) {
7080 subtype = HFS_SUBTYPE_STANDARDHFS;
7081 } else /* HFS Plus */ {
7082 if (hfsmp->jnl)
7083 subtype |= HFS_SUBTYPE_JOURNALED;
7084 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
7085 subtype |= HFS_SUBTYPE_CASESENSITIVE;
7086 }
7087 fsap->f_fssubtype = subtype;
7088 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
7089 }
7090
7091 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7092 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
7093 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7094 }
7095 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
7096 hfs_getvoluuid(hfsmp, fsap->f_uuid);
7097 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
7098 }
7099 return (0);
7100 }
7101
7102 /*
7103 * Perform a volume rename. Requires the FS' root vp.
7104 */
7105 static int
7106 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
7107 {
7108 ExtendedVCB *vcb = VTOVCB(vp);
7109 struct cnode *cp = VTOC(vp);
7110 struct hfsmount *hfsmp = VTOHFS(vp);
7111 struct cat_desc to_desc;
7112 struct cat_desc todir_desc;
7113 struct cat_desc new_desc;
7114 cat_cookie_t cookie;
7115 int lockflags;
7116 int error = 0;
7117 char converted_volname[256];
7118 size_t volname_length = 0;
7119 size_t conv_volname_length = 0;
7120
7121
7122 /*
7123 * Ignore attempts to rename a volume to a zero-length name.
7124 */
7125 if (name[0] == 0)
7126 return(0);
7127
7128 bzero(&to_desc, sizeof(to_desc));
7129 bzero(&todir_desc, sizeof(todir_desc));
7130 bzero(&new_desc, sizeof(new_desc));
7131 bzero(&cookie, sizeof(cookie));
7132
7133 todir_desc.cd_parentcnid = kHFSRootParentID;
7134 todir_desc.cd_cnid = kHFSRootFolderID;
7135 todir_desc.cd_flags = CD_ISDIR;
7136
7137 to_desc.cd_nameptr = (const u_int8_t *)name;
7138 to_desc.cd_namelen = strlen(name);
7139 to_desc.cd_parentcnid = kHFSRootParentID;
7140 to_desc.cd_cnid = cp->c_cnid;
7141 to_desc.cd_flags = CD_ISDIR;
7142
7143 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
7144 if ((error = hfs_start_transaction(hfsmp)) == 0) {
7145 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
7146 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
7147
7148 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
7149
7150 /*
7151 * If successful, update the name in the VCB, ensure it's terminated.
7152 */
7153 if (!error) {
7154 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
7155 volname_length = strlen ((const char*)vcb->vcbVN);
7156 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
7157 /* Send the volume name down to CoreStorage if necessary */
7158 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
7159 if (error == 0) {
7160 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7161 }
7162 error = 0;
7163 }
7164
7165 hfs_systemfile_unlock(hfsmp, lockflags);
7166 cat_postflight(hfsmp, &cookie, p);
7167
7168 if (error)
7169 MarkVCBDirty(vcb);
7170 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7171 }
7172 hfs_end_transaction(hfsmp);
7173 }
7174 if (!error) {
7175 /* Release old allocated name buffer */
7176 if (cp->c_desc.cd_flags & CD_HASBUF) {
7177 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7178
7179 cp->c_desc.cd_nameptr = 0;
7180 cp->c_desc.cd_namelen = 0;
7181 cp->c_desc.cd_flags &= ~CD_HASBUF;
7182 vfs_removename(tmp_name);
7183 }
7184 /* Update cnode's catalog descriptor */
7185 replace_desc(cp, &new_desc);
7186 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7187 cp->c_touch_chgtime = TRUE;
7188 }
7189
7190 hfs_unlock(cp);
7191 }
7192
7193 return(error);
7194 }
7195
7196 /*
7197 * Get file system attributes.
7198 */
7199 static int
7200 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7201 {
7202 kauth_cred_t cred = vfs_context_ucred(context);
7203 int error = 0;
7204
7205 /*
7206 * Must be superuser or owner of filesystem to change volume attributes
7207 */
7208 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7209 return(EACCES);
7210
7211 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7212 vnode_t root_vp;
7213
7214 error = hfs_vfs_root(mp, &root_vp, context);
7215 if (error)
7216 goto out;
7217
7218 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7219 (void) vnode_put(root_vp);
7220 if (error)
7221 goto out;
7222
7223 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7224 }
7225
7226 out:
7227 return error;
7228 }
7229
7230 /* If a runtime corruption is detected, set the volume inconsistent
7231 * bit in the volume attributes. The volume inconsistent bit is a persistent
7232 * bit which represents that the volume is corrupt and needs repair.
7233 * The volume inconsistent bit can be set from the kernel when it detects
7234 * runtime corruption or from file system repair utilities like fsck_hfs when
7235 * a repair operation fails. The bit should be cleared only from file system
7236 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7237 */
7238 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7239 {
7240 HFS_MOUNT_LOCK(hfsmp, TRUE);
7241 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7242 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7243 MarkVCBDirty(hfsmp);
7244 }
7245 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7246 /* Log information to ASL log */
7247 fslog_fs_corrupt(hfsmp->hfs_mp);
7248 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7249 }
7250 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7251 }
7252
7253 /* Replay the journal on the device node provided. Returns zero if
7254 * journal replay succeeded or no journal was supposed to be replayed.
7255 */
7256 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7257 {
7258 int retval = 0;
7259 struct mount *mp = NULL;
7260 struct hfs_mount_args *args = NULL;
7261
7262 /* Replay allowed only on raw devices */
7263 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7264 retval = EINVAL;
7265 goto out;
7266 }
7267
7268 /* Create dummy mount structures */
7269 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7270 if (mp == NULL) {
7271 retval = ENOMEM;
7272 goto out;
7273 }
7274 bzero(mp, sizeof(struct mount));
7275 mount_lock_init(mp);
7276
7277 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7278 if (args == NULL) {
7279 retval = ENOMEM;
7280 goto out;
7281 }
7282 bzero(args, sizeof(struct hfs_mount_args));
7283
7284 retval = hfs_mountfs(devvp, mp, args, 1, context);
7285 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7286
7287 /* FSYNC the devnode to be sure all data has been flushed */
7288 retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7289
7290 out:
7291 if (mp) {
7292 mount_lock_destroy(mp);
7293 FREE(mp, M_TEMP);
7294 }
7295 if (args) {
7296 FREE(args, M_TEMP);
7297 }
7298 return retval;
7299 }
7300
7301 /*
7302 * hfs vfs operations.
7303 */
7304 struct vfsops hfs_vfsops = {
7305 hfs_mount,
7306 hfs_start,
7307 hfs_unmount,
7308 hfs_vfs_root,
7309 hfs_quotactl,
7310 hfs_vfs_getattr, /* was hfs_statfs */
7311 hfs_sync,
7312 hfs_vfs_vget,
7313 hfs_fhtovp,
7314 hfs_vptofh,
7315 hfs_init,
7316 hfs_sysctl,
7317 hfs_vfs_setattr,
7318 {NULL}
7319 };